diff --git a/.github/workflows/build-cov.yaml b/.github/workflows/build-cov.yaml index bd0c4b8e6..46ff4e104 100644 --- a/.github/workflows/build-cov.yaml +++ b/.github/workflows/build-cov.yaml @@ -9,18 +9,43 @@ on: branches: - 'main' jobs: - check-license: - name: "Check License" + prepare: + name: "Prepare" runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + - name: Set up JDK 17 + uses: actions/setup-java@v4 + with: + java-version: '17' + distribution: 'temurin' + cache: maven + - name: Install without tests + run: mvn -q -DskipTests install + + license: + name: "License Check" + needs: prepare + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up JDK 17 + uses: actions/setup-java@v4 + with: + java-version: '17' + distribution: 'temurin' + cache: maven - name: Check License Header uses: apache/skywalking-eyes/header@main - name: Check Dependencies' License uses: apache/skywalking-eyes/dependency@main - build: + env: + MAVEN_OPTS: -Xmx1g + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + test: name: "BifroMQ Code Coverage Test" - needs: check-license + needs: license runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/build-dev.yaml b/.github/workflows/build-dev.yaml index c7bc6a053..5521fd53a 100644 --- a/.github/workflows/build-dev.yaml +++ b/.github/workflows/build-dev.yaml @@ -14,18 +14,43 @@ on: - 'bugfix-**' - 'fix-**' jobs: - check-license: - name: "Check License" + prepare: + name: "Prepare" runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + - name: Set up JDK 17 + uses: actions/setup-java@v4 + with: + java-version: '17' + distribution: 'temurin' + cache: maven + - name: Install without tests + run: mvn -q -DskipTests install + + license: + name: "License Check" + needs: prepare + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up JDK 17 + uses: actions/setup-java@v4 + with: + java-version: '17' + distribution: 'temurin' + cache: maven - name: Check License Header uses: apache/skywalking-eyes/header@main - name: Check Dependencies' License uses: apache/skywalking-eyes/dependency@main - build: + env: + MAVEN_OPTS: -Xmx1g + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + test: name: "Build BifroMQ" - needs: check-license + needs: license runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 diff --git a/.licenserc.yaml b/.licenserc.yaml index f41f504bb..8ecf5a4d2 100644 --- a/.licenserc.yaml +++ b/.licenserc.yaml @@ -25,6 +25,9 @@ header: - '.gitignore' - '.gitattributes' - 'lombok.config' + - 'META-INF/**' + - '**/META-INF/**' + - '**/resources/META-INF/**' - '**/*.crt' - '**/*.pem' - '**/MANIFEST.MF' diff --git a/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/HostMemberList.java b/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/HostMemberList.java index 25868a209..c2033ef5d 100644 --- a/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/HostMemberList.java +++ b/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/HostMemberList.java @@ -381,6 +381,11 @@ private void handleJoin(Join join) { messenger.send(ClusterMessage.newBuilder() .setJoin(Join.newBuilder().setMember(local).build()) .build(), getMemberAddress(joinMember.getEndpoint()), true); + } else if (newMember) { + // send back a join to speed up convergence + messenger.send(ClusterMessage.newBuilder() + .setJoin(Join.newBuilder().setMember(local).build()) + .build(), getMemberAddress(joinMember.getEndpoint()), true); } } else { clearZombie(join.getExpectedHost()); diff --git a/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/agent/CRDTUtil.java b/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/agent/CRDTUtil.java index 679d453bb..eab8c8a55 100644 --- a/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/agent/CRDTUtil.java +++ b/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/agent/CRDTUtil.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basecluster.memberlist.agent; @@ -23,10 +23,6 @@ import static org.apache.bifromq.basecrdt.core.api.CausalCRDTType.mvreg; import static org.apache.bifromq.basecrdt.core.api.CausalCRDTType.ormap; -import org.apache.bifromq.basecluster.agent.proto.AgentMemberAddr; -import org.apache.bifromq.basecluster.agent.proto.AgentMemberMetadata; -import org.apache.bifromq.basecrdt.core.api.IMVReg; -import org.apache.bifromq.basecrdt.core.api.IORMap; import com.google.common.collect.Iterators; import com.google.common.collect.Lists; import com.google.protobuf.InvalidProtocolBufferException; @@ -41,6 +37,10 @@ import lombok.NoArgsConstructor; import lombok.SneakyThrows; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.basecluster.agent.proto.AgentMemberAddr; +import org.apache.bifromq.basecluster.agent.proto.AgentMemberMetadata; +import org.apache.bifromq.basecrdt.core.api.IMVReg; +import org.apache.bifromq.basecrdt.core.api.IORMap; @Slf4j @NoArgsConstructor(access = AccessLevel.PRIVATE) @@ -56,7 +56,8 @@ static Map toAgentMemberMap(IORMap agentCR Iterator orMapKeyItr = agentCRDT.keys(); while (orMapKeyItr.hasNext()) { IORMap.ORMapKey orMapKey = orMapKeyItr.next(); - agentMemberMap.put(parseAgentMemberAddr(orMapKey), parseMetadata(agentCRDT.getMVReg(orMapKey.key())).get()); + Optional meta = parseMetadata(agentCRDT.getMVReg(orMapKey.key())); + meta.ifPresent(m -> agentMemberMap.put(parseAgentMemberAddr(orMapKey), m)); } return agentMemberMap; } @@ -78,11 +79,11 @@ private static Optional parseMetadata(IMVReg value) { return AgentMemberMetadata.parseFrom(data); } catch (InvalidProtocolBufferException e) { log.error("Unable to parse agent host node", e); - // this exception should not happen + // should not happen, skip the broken value return null; } }), Objects::nonNull)); - metaList.sort(Comparator.comparingLong(AgentMemberMetadata::getHlc)); + metaList.sort(Comparator.comparingLong(AgentMemberMetadata::getHlc).reversed()); return Optional.ofNullable(metaList.isEmpty() ? null : metaList.get(0)); } } diff --git a/base-cluster/src/main/java/org/apache/bifromq/basecluster/messenger/Messenger.java b/base-cluster/src/main/java/org/apache/bifromq/basecluster/messenger/Messenger.java index 7a6e6c3ec..7af1d6ecd 100644 --- a/base-cluster/src/main/java/org/apache/bifromq/basecluster/messenger/Messenger.java +++ b/base-cluster/src/main/java/org/apache/bifromq/basecluster/messenger/Messenger.java @@ -14,17 +14,11 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basecluster.messenger; -import org.apache.bifromq.basecluster.messenger.proto.DirectMessage; -import org.apache.bifromq.basecluster.messenger.proto.GossipMessage; -import org.apache.bifromq.basecluster.messenger.proto.MessengerMessage; -import org.apache.bifromq.basecluster.proto.ClusterMessage; -import org.apache.bifromq.basecluster.transport.ITransport; -import org.apache.bifromq.basecluster.util.RandomUtils; import com.google.common.collect.Maps; import com.google.protobuf.InvalidProtocolBufferException; import io.micrometer.core.instrument.Counter; @@ -46,14 +40,16 @@ import java.util.stream.Collectors; import lombok.Builder; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.basecluster.messenger.proto.DirectMessage; +import org.apache.bifromq.basecluster.messenger.proto.GossipMessage; +import org.apache.bifromq.basecluster.messenger.proto.MessengerMessage; +import org.apache.bifromq.basecluster.proto.ClusterMessage; +import org.apache.bifromq.basecluster.transport.ITransport; +import org.apache.bifromq.basecluster.util.RandomUtils; +import org.apache.bifromq.baseenv.ZeroCopyParser; @Slf4j public class Messenger implements IMessenger { - private enum State { - INIT, START, STOP - } - - private State state = State.INIT; // threshold for determine which transport to use private final MessengerTransport transport; private final CompositeDisposable disposables = new CompositeDisposable(); @@ -61,11 +57,10 @@ private enum State { private final Gossiper gossiper; private final Subject> publisher = PublishSubject.>create().toSerialized(); - private final Scheduler scheduler; private final MessengerOptions opts; private final MetricManager metricManager; - + private State state = State.INIT; @Builder private Messenger(ITransport transport, Scheduler scheduler, MessengerOptions opts) { this.transport = new MessengerTransport(transport); @@ -202,8 +197,8 @@ private void onMessengerMessage(Timed timedMessageEnve switch (messengerMessageEnvelope.message.getMessengerMessageTypeCase()) { case DIRECT: try { - ClusterMessage clusterMessage = - ClusterMessage.parseFrom(messengerMessageEnvelope.message.getDirect().getPayload()); + ClusterMessage clusterMessage = ZeroCopyParser.parse( + messengerMessageEnvelope.message.getDirect().getPayload(), ClusterMessage.parser()); log.trace("Received message: sender={}, message={}", messengerMessageEnvelope.sender, clusterMessage); metricManager.msgRecvCounters.get(clusterMessage.getClusterMessageTypeCase()).increment(); @@ -244,6 +239,10 @@ private void onError(Throwable throwable) { log.error("Received unexpected error:", throwable); } + private enum State { + INIT, START, STOP + } + private static class MetricManager { final Map msgSendCounters = Maps.newHashMap(); final Map msgRecvCounters = Maps.newHashMap(); @@ -253,17 +252,19 @@ private static class MetricManager { MetricManager(InetSocketAddress localAddress) { for (ClusterMessage.ClusterMessageTypeCase typeCase : ClusterMessage.ClusterMessageTypeCase.values()) { - Tags tags = Tags - .of("local", localAddress.getAddress().getHostAddress() + ":" + localAddress.getPort()) - .and("type", typeCase.name()); - msgSendCounters.put(typeCase, - Metrics.counter("basecluster.send.count", tags)); - msgRecvCounters.put(typeCase, - Metrics.counter("basecluster.recv.count", tags)); - gossipGenCounters.put(typeCase, - Metrics.counter("basecluster.gossip.gen.count", tags)); - gossipHeardCounters.put(typeCase, - Metrics.counter("basecluster.gossip.heard.count", tags)); + if (typeCase != ClusterMessage.ClusterMessageTypeCase.CLUSTERMESSAGETYPE_NOT_SET) { + Tags tags = Tags + .of("local", localAddress.getAddress().getHostAddress() + ":" + localAddress.getPort()) + .and("type", typeCase.name()); + msgSendCounters.put(typeCase, + Metrics.counter("basecluster.send.count", tags)); + msgRecvCounters.put(typeCase, + Metrics.counter("basecluster.recv.count", tags)); + gossipGenCounters.put(typeCase, + Metrics.counter("basecluster.gossip.gen.count", tags)); + gossipHeardCounters.put(typeCase, + Metrics.counter("basecluster.gossip.heard.count", tags)); + } } } diff --git a/base-cluster/src/main/java/org/apache/bifromq/basecluster/messenger/MessengerTransport.java b/base-cluster/src/main/java/org/apache/bifromq/basecluster/messenger/MessengerTransport.java index 3bda3c546..0afbfe5d6 100644 --- a/base-cluster/src/main/java/org/apache/bifromq/basecluster/messenger/MessengerTransport.java +++ b/base-cluster/src/main/java/org/apache/bifromq/basecluster/messenger/MessengerTransport.java @@ -33,6 +33,7 @@ import org.apache.bifromq.basecluster.messenger.proto.MessengerMessage; import org.apache.bifromq.basecluster.transport.ITransport; import org.apache.bifromq.basecluster.transport.PacketEnvelope; +import org.apache.bifromq.baseenv.ZeroCopyParser; @Slf4j final class MessengerTransport { @@ -62,9 +63,11 @@ Observable> receive() { private Observable> convert(PacketEnvelope packetEnvelope) { return Observable.fromIterable(packetEnvelope.data.stream().map(b -> { try { + // Parse with aliasing directly from ByteString to reduce copies + MessengerMessage msg = ZeroCopyParser.parse(b, MessengerMessage.parser()); MessengerMessageEnvelope mmEnvelop = MessengerMessageEnvelope.builder() .recipient(packetEnvelope.recipient) - .message(MessengerMessage.parseFrom(b)) + .message(msg) .sender(packetEnvelope.sender) .build(); return new Timed<>(mmEnvelop, System.currentTimeMillis(), TimeUnit.MILLISECONDS); diff --git a/base-cluster/src/main/java/org/apache/bifromq/basecluster/transport/UDPTransport.java b/base-cluster/src/main/java/org/apache/bifromq/basecluster/transport/UDPTransport.java index 163fdcaf9..c133f1b76 100644 --- a/base-cluster/src/main/java/org/apache/bifromq/basecluster/transport/UDPTransport.java +++ b/base-cluster/src/main/java/org/apache/bifromq/basecluster/transport/UDPTransport.java @@ -14,14 +14,11 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basecluster.transport; -import org.apache.bifromq.basecluster.transport.proto.Packet; -import org.apache.bifromq.baseenv.NettyEnv; -import org.apache.bifromq.basehlc.HLC; import com.google.protobuf.InvalidProtocolBufferException; import io.micrometer.core.instrument.Counter; import io.micrometer.core.instrument.DistributionSummary; @@ -49,6 +46,10 @@ import lombok.Builder; import lombok.NonNull; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.basecluster.transport.proto.Packet; +import org.apache.bifromq.baseenv.NettyEnv; +import org.apache.bifromq.baseenv.ZeroCopyParser; +import org.apache.bifromq.basehlc.HLC; @Slf4j public final class UDPTransport extends AbstractTransport { @@ -151,7 +152,8 @@ protected void channelRead0(ChannelHandlerContext ctx, DatagramPacket dp) { try { byte[] data = new byte[dp.content().readableBytes()]; dp.content().readBytes(data); - Packet packet = Packet.parseFrom(data); + // Parse with aliasing to avoid extra copies of bytes fields + Packet packet = ZeroCopyParser.parse(data, Packet.parser()); transportLatency.record(HLC.INST.getPhysical(packet.getHlc() - HLC.INST.get())); doReceive(packet, dp.sender(), dp.recipient()); } catch (InvalidProtocolBufferException e) { diff --git a/base-cluster/src/test/java/org/apache/bifromq/basecluster/memberlist/HostMemberListTest.java b/base-cluster/src/test/java/org/apache/bifromq/basecluster/memberlist/HostMemberListTest.java index 0a93cf4c2..57e63f5c8 100644 --- a/base-cluster/src/test/java/org/apache/bifromq/basecluster/memberlist/HostMemberListTest.java +++ b/base-cluster/src/test/java/org/apache/bifromq/basecluster/memberlist/HostMemberListTest.java @@ -202,6 +202,67 @@ public void handleJoin() { argThat(r -> r.getUri().equals(AGENT_HOST_MAP_URI) && r.getId().equals(LOCAL_STORE_ID)), any()); } + @Test + public void handleJoinFromNewMemberWithoutExpectedHost() { + // ensure local crdt read returns empty and allow address resolve + when(hostListCRDT.getMVReg(any())).thenReturn(hostMemberOnCRDT); + when(hostMemberOnCRDT.read()).thenReturn(emptyIterator()); + when(addressResolver.resolve(REMOTE_HOST_1_ENDPOINT)).thenReturn(REMOTE_ADDR_1); + + IHostMemberList memberList = new HostMemberList(LOCAL_ADDR.getHostName(), LOCAL_ADDR.getPort(), + messenger, scheduler, store, addressResolver); + + // send a normal join without expectedHost from a new member + messageSubject.onNext(joinMsg(HostMember.newBuilder() + .setEndpoint(REMOTE_HOST_1_ENDPOINT) + .setIncarnation(1) + .build())); + + // verify one direct join is sent back to the new member + ArgumentCaptor msgCap = ArgumentCaptor.forClass(ClusterMessage.class); + ArgumentCaptor addrCap = ArgumentCaptor.forClass(InetSocketAddress.class); + ArgumentCaptor reliableCap = ArgumentCaptor.forClass(Boolean.class); + verify(messenger, times(1)).send(msgCap.capture(), addrCap.capture(), reliableCap.capture()); + + assertEquals(msgCap.getValue().getJoin().getMember().getEndpoint(), LOCAL_ENDPOINT); + assertEquals(msgCap.getValue().getJoin().getMember().getIncarnation(), 0); + assertEquals(addrCap.getValue(), REMOTE_ADDR_1); + assertTrue(reliableCap.getValue()); + } + + @Test + public void handleDuplicatedJoinWithoutExpectedHost() { + // ensure local crdt read returns empty and allow address resolve + when(hostListCRDT.getMVReg(any())).thenReturn(hostMemberOnCRDT); + when(hostMemberOnCRDT.read()).thenReturn(emptyIterator()); + when(addressResolver.resolve(REMOTE_HOST_1_ENDPOINT)).thenReturn(REMOTE_ADDR_1); + + IHostMemberList memberList = new HostMemberList(LOCAL_ADDR.getHostName(), LOCAL_ADDR.getPort(), + messenger, scheduler, store, addressResolver); + + // first join from new member triggers a send-back + messageSubject.onNext(joinMsg(HostMember.newBuilder() + .setEndpoint(REMOTE_HOST_1_ENDPOINT) + .setIncarnation(1) + .build())); + + // duplicated join from the same member should not trigger another send-back + messageSubject.onNext(joinMsg(HostMember.newBuilder() + .setEndpoint(REMOTE_HOST_1_ENDPOINT) + .setIncarnation(1) + .build())); + + ArgumentCaptor msgCap = ArgumentCaptor.forClass(ClusterMessage.class); + ArgumentCaptor addrCap = ArgumentCaptor.forClass(InetSocketAddress.class); + ArgumentCaptor reliableCap = ArgumentCaptor.forClass(Boolean.class); + verify(messenger, times(1)).send(msgCap.capture(), addrCap.capture(), reliableCap.capture()); + + assertEquals(msgCap.getValue().getJoin().getMember().getEndpoint(), LOCAL_ENDPOINT); + assertEquals(msgCap.getValue().getJoin().getMember().getIncarnation(), 0); + assertEquals(addrCap.getValue(), REMOTE_ADDR_1); + assertTrue(reliableCap.getValue()); + } + @Test public void handleJoinAndClearZombie() { when(hostListCRDT.getMVReg(any())).thenReturn(hostMemberOnCRDT); diff --git a/base-crdt/base-crdt-service/src/main/java/org/apache/bifromq/basecrdt/service/CRDTCluster.java b/base-crdt/base-crdt-service/src/main/java/org/apache/bifromq/basecrdt/service/CRDTCluster.java index 25eb0c825..da8bfbdbf 100644 --- a/base-crdt/base-crdt-service/src/main/java/org/apache/bifromq/basecrdt/service/CRDTCluster.java +++ b/base-crdt/base-crdt-service/src/main/java/org/apache/bifromq/basecrdt/service/CRDTCluster.java @@ -47,6 +47,7 @@ import org.apache.bifromq.basecrdt.proto.Replica; import org.apache.bifromq.basecrdt.store.ICRDTStore; import org.apache.bifromq.basecrdt.store.proto.CRDTStoreMessage; +import org.apache.bifromq.baseenv.ZeroCopyParser; import org.apache.bifromq.logger.MDCLogger; import org.slf4j.Logger; @@ -101,7 +102,9 @@ class CRDTCluster> { return; } try { - this.storeMsgSubject.onNext(CRDTStoreMessage.parseFrom(agentMessage.getPayload())); + // Parse with aliasing directly from ByteString + this.storeMsgSubject.onNext( + ZeroCopyParser.parse(agentMessage.getPayload(), CRDTStoreMessage.parser())); } catch (InvalidProtocolBufferException e) { log.error("Unable to parse crdt store message from agent message", e); } @@ -113,7 +116,8 @@ class CRDTCluster> { if (stopped.get()) { return; } - AgentMemberAddr target = AgentMemberAddr.parseFrom(msg.getReceiver()); + // Parse receiver with aliasing directly from ByteString + AgentMemberAddr target = ZeroCopyParser.parse(msg.getReceiver(), AgentMemberAddr.parser()); ; localMembership.send(target, msg.toByteString(), true) .whenComplete((v, e) -> { if (e != null) { diff --git a/base-env/base-env-provider/src/main/java/org/apache/bifromq/baseenv/ZeroCopyParser.java b/base-env/base-env-provider/src/main/java/org/apache/bifromq/baseenv/ZeroCopyParser.java index af6cf8e19..9eb649a57 100644 --- a/base-env/base-env-provider/src/main/java/org/apache/bifromq/baseenv/ZeroCopyParser.java +++ b/base-env/base-env-provider/src/main/java/org/apache/bifromq/baseenv/ZeroCopyParser.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.baseenv; @@ -23,6 +23,7 @@ import com.google.protobuf.CodedInputStream; import com.google.protobuf.InvalidProtocolBufferException; import com.google.protobuf.Parser; +import com.google.protobuf.UnsafeByteOperations; /** * A utility class for parsing protocol buffer messages from a ByteString using zero-copy parsing. @@ -43,4 +44,17 @@ public static T parse(ByteString bytes, Parser parser) throws InvalidProt input.enableAliasing(true); return parser.parseFrom(input); } + + /** + * Parses a protocol buffer message from a byte array using zero-copy parsing. + * + * @param bytes The byte array to parse + * @param parser The parser for the protocol buffer message. + * @return The parsed protocol buffer message. + * + * @throws InvalidProtocolBufferException If the parsing fails. + */ + public static T parse(byte[] bytes, Parser parser) throws InvalidProtocolBufferException { + return parse(UnsafeByteOperations.unsafeWrap(bytes), parser); + } } diff --git a/base-kv/base-kv-local-engine-memory/pom.xml b/base-kv/base-kv-local-engine-memory/pom.xml new file mode 100644 index 000000000..c294466c3 --- /dev/null +++ b/base-kv/base-kv-local-engine-memory/pom.xml @@ -0,0 +1,81 @@ + + + + 4.0.0 + + org.apache.bifromq + base-kv + ${revision} + + base-kv-local-engine-memory + + + + org.apache.bifromq + base-kv-local-engine-spi + + + org.apache.bifromq + base-kv-local-engine-spi + test-jar + ${project.version} + test + + + + org.awaitility + awaitility + test + + + org.mockito + mockito-core + test + + + org.openjdk.jmh + jmh-core + test + + + org.openjdk.jmh + jmh-generator-annprocess + ${jmh.version} + test + + + org.apache.logging.log4j + log4j-api + test + + + org.apache.logging.log4j + log4j-core + test + + + org.apache.logging.log4j + log4j-slf4j2-impl + test + + + diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemWALableKVSpace.java b/base-kv/base-kv-local-engine-memory/src/main/java/org/apache/bifromq/basekv/localengine/memory/AbstractInMemKVSpaceReader.java similarity index 51% rename from base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemWALableKVSpace.java rename to base-kv/base-kv-local-engine-memory/src/main/java/org/apache/bifromq/basekv/localengine/memory/AbstractInMemKVSpaceReader.java index 38b2b53c8..7d71c54b7 100644 --- a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemWALableKVSpace.java +++ b/base-kv/base-kv-local-engine-memory/src/main/java/org/apache/bifromq/basekv/localengine/memory/AbstractInMemKVSpaceReader.java @@ -19,24 +19,36 @@ package org.apache.bifromq.basekv.localengine.memory; -import org.apache.bifromq.basekv.localengine.IWALableKVSpace; +import com.google.protobuf.ByteString; +import java.util.Map; +import java.util.NavigableMap; +import java.util.Optional; +import org.apache.bifromq.basekv.localengine.AbstractKVSpaceReader; import org.apache.bifromq.basekv.localengine.metrics.KVSpaceOpMeters; -import java.util.concurrent.CompletableFuture; import org.slf4j.Logger; -public class InMemWALableKVSpace extends InMemKVSpace - implements IWALableKVSpace { - protected InMemWALableKVSpace(String id, - InMemKVEngineConfigurator configurator, - InMemWALableKVEngine engine, - Runnable onDestroy, - KVSpaceOpMeters opMeters, - Logger logger) { - super(id, configurator, engine, onDestroy, opMeters, logger); +abstract class AbstractInMemKVSpaceReader extends AbstractKVSpaceReader { + protected AbstractInMemKVSpaceReader(String id, KVSpaceOpMeters readOpMeters, Logger logger) { + super(id, readOpMeters, logger); + } + + protected abstract Map metadataMap(); + + protected abstract NavigableMap rangeData(); + + @Override + protected Optional doMetadata(ByteString metaKey) { + return Optional.ofNullable(metadataMap().get(metaKey)); } @Override - public CompletableFuture flush() { - return CompletableFuture.completedFuture(System.nanoTime()); + protected boolean doExist(ByteString key) { + return rangeData().containsKey(key); } + + @Override + protected Optional doGet(ByteString key) { + return Optional.ofNullable(rangeData().get(key)); + } + } diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemCPableKVEngine.java b/base-kv/base-kv-local-engine-memory/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemCPableKVEngine.java similarity index 73% rename from base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemCPableKVEngine.java rename to base-kv/base-kv-local-engine-memory/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemCPableKVEngine.java index 136a7cf6e..7a34557b2 100644 --- a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemCPableKVEngine.java +++ b/base-kv/base-kv-local-engine-memory/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemCPableKVEngine.java @@ -19,21 +19,27 @@ package org.apache.bifromq.basekv.localengine.memory; +import com.google.protobuf.Struct; import org.apache.bifromq.basekv.localengine.metrics.KVSpaceOpMeters; import org.slf4j.Logger; -public class InMemCPableKVEngine extends InMemKVEngine { - public InMemCPableKVEngine(String overrideIdentity, InMemKVEngineConfigurator c) { - super(overrideIdentity, c); +class InMemCPableKVEngine extends InMemKVEngine { + InMemCPableKVEngine(String overrideIdentity, Struct conf) { + super(overrideIdentity, conf); } @Override protected InMemCPableKVSpace doBuildKVSpace(String spaceId, - InMemKVEngineConfigurator configurator, + Struct conf, Runnable onDestroy, KVSpaceOpMeters opMeters, Logger logger, String... tags) { - return new InMemCPableKVSpace(spaceId, configurator, this, onDestroy, opMeters, logger, tags); + return new InMemCPableKVSpace(spaceId, conf, this, onDestroy, opMeters, logger, tags); + } + + @Override + protected Struct defaultConf() { + return InMemDefaultConfigs.CP; } } diff --git a/base-kv/base-kv-local-engine-memory/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemCPableKVSpace.java b/base-kv/base-kv-local-engine-memory/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemCPableKVSpace.java new file mode 100644 index 000000000..60368367b --- /dev/null +++ b/base-kv/base-kv-local-engine-memory/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemCPableKVSpace.java @@ -0,0 +1,194 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine.memory; + +import static org.apache.bifromq.basekv.localengine.metrics.KVSpaceMeters.getGauge; + +import com.github.benmanes.caffeine.cache.Cache; +import com.github.benmanes.caffeine.cache.Caffeine; +import com.google.protobuf.ByteString; +import com.google.protobuf.Struct; +import io.micrometer.core.instrument.Gauge; +import io.micrometer.core.instrument.Tags; +import java.util.Optional; +import java.util.UUID; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; +import org.apache.bifromq.basekv.localengine.ICPableKVSpace; +import org.apache.bifromq.basekv.localengine.IKVSpaceCheckpoint; +import org.apache.bifromq.basekv.localengine.IKVSpaceMigratableWriter; +import org.apache.bifromq.basekv.localengine.IRestoreSession; +import org.apache.bifromq.basekv.localengine.RestoreMode; +import org.apache.bifromq.basekv.localengine.metrics.GeneralKVSpaceMetric; +import org.apache.bifromq.basekv.localengine.metrics.KVSpaceOpMeters; +import org.slf4j.Logger; + +class InMemCPableKVSpace extends InMemKVSpace + implements ICPableKVSpace { + private final Cache checkpoints; + private final Gauge checkpointGauge; + private final AtomicReference activeEpoch; + private volatile InMemKVSpaceCheckpoint latestCheckpoint; + + protected InMemCPableKVSpace(String id, + Struct conf, + InMemCPableKVEngine engine, + Runnable onDestroy, + KVSpaceOpMeters opMeters, + Logger logger, + String... tags) { + super(id, conf, engine, onDestroy, opMeters, logger, tags); + activeEpoch = new AtomicReference<>(new InMemKVSpaceEpoch()); + checkpoints = Caffeine.newBuilder().weakValues().build(); + checkpointGauge = getGauge(id, GeneralKVSpaceMetric.CheckpointNumGauge, checkpoints::estimatedSize, + Tags.of(tags)); + } + + @Override + public String checkpoint() { + synchronized (this) { + return metadataRefresher.call(() -> { + String cpId = UUID.randomUUID().toString(); + latestCheckpoint = new InMemKVSpaceCheckpoint(id, cpId, activeEpoch.get(), opMeters, logger); + checkpoints.put(cpId, latestCheckpoint); + return cpId; + }); + } + } + + @Override + public Optional openCheckpoint(String checkpointId) { + return Optional.ofNullable(checkpoints.getIfPresent(checkpointId)); + } + + @Override + public IRestoreSession startRestore(IRestoreSession.FlushListener flushListener) { + return new RestoreSession(RestoreMode.Replace, flushListener); + } + + @Override + public IRestoreSession startReceiving(IRestoreSession.FlushListener flushListener) { + return new RestoreSession(RestoreMode.Overlay, flushListener); + } + + @Override + protected void doClose() { + checkpointGauge.close(); + } + + @Override + protected void doDestroy() { + activeEpoch.set(new InMemKVSpaceEpoch()); + } + + @Override + protected InMemKVSpaceEpoch handle() { + return activeEpoch.get(); + } + + @Override + protected void doOpen() { + + } + + @Override + public IKVSpaceMigratableWriter toWriter() { + return new InMemKVSpaceMigratableWriter<>(id, activeEpoch.get(), engine, syncContext, metadataUpdated -> { + if (metadataUpdated) { + this.loadMetadata(); + } + }, impact -> { + // Update tracked boundary sizes on write impact + this.tracker.updateOnWrite(impact, activeEpoch.get().dataMap()); + }, opMeters, logger); + } + + private class RestoreSession implements IRestoreSession { + private final InMemKVSpaceEpoch staging; + private final IRestoreSession.FlushListener flushListener; + private final AtomicBoolean closed = new AtomicBoolean(); + private int ops = 0; + private long bytes = 0; + + private RestoreSession(RestoreMode mode, FlushListener flushListener) { + switch (mode) { + case Overlay -> staging = new InMemKVSpaceEpoch(activeEpoch.get()); + case Replace -> staging = new InMemKVSpaceEpoch(); + default -> throw new IllegalArgumentException("Unsupported restore mode: " + mode); + } + this.flushListener = flushListener; + } + + private void ensureOpen() { + if (closed.get()) { + throw new IllegalStateException("Restore session already closed"); + } + } + + @Override + public IRestoreSession put(ByteString key, ByteString value) { + ensureOpen(); + staging.putData(key, value); + ops++; + bytes += key.size() + value.size(); + return this; + } + + @Override + public IRestoreSession metadata(ByteString metaKey, ByteString metaValue) { + ensureOpen(); + staging.setMetadata(metaKey, metaValue); + ops++; + bytes += metaKey.size() + metaValue.size(); + return this; + } + + @Override + public void done() { + ensureOpen(); + if (closed.compareAndSet(false, true)) { + // Replace mode ignores existing state; Overlay mode applies on top of current state + syncContext().mutator().run(() -> { + activeEpoch.set(staging); + // Epoch changed, invalidate tracked boundary sizes + tracker.invalidateAll(); + if (flushListener != null) { + flushListener.onFlush(ops, bytes); + } + loadMetadata(); + // InMemCPableKVSpace is not generation aware + return true; + }); + } + } + + @Override + public void abort() { + if (closed.compareAndSet(false, true)) { + // no-op + } + } + + @Override + public int count() { + return ops; + } + } +} diff --git a/base-kv/base-kv-local-engine-memory/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemDefaultConfigs.java b/base-kv/base-kv-local-engine-memory/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemDefaultConfigs.java new file mode 100644 index 000000000..7f9a36fa4 --- /dev/null +++ b/base-kv/base-kv-local-engine-memory/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemDefaultConfigs.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine.memory; + +import com.google.protobuf.Struct; + +/** + * Default configuration constants for in-memory engines. + */ +public final class InMemDefaultConfigs { + public static final Struct CP = Struct.newBuilder().build(); + public static final Struct WAL = CP; + + private InMemDefaultConfigs() { + } +} diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVEngine.java b/base-kv/base-kv-local-engine-memory/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVEngine.java similarity index 89% rename from base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVEngine.java rename to base-kv/base-kv-local-engine-memory/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVEngine.java index b5d77ad97..9a03c2974 100644 --- a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVEngine.java +++ b/base-kv/base-kv-local-engine-memory/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVEngine.java @@ -19,15 +19,16 @@ package org.apache.bifromq.basekv.localengine.memory; -import org.apache.bifromq.basekv.localengine.AbstractKVEngine; +import com.google.protobuf.Struct; import java.util.UUID; +import org.apache.bifromq.basekv.localengine.AbstractKVEngine; abstract class InMemKVEngine, T extends InMemKVSpace> - extends AbstractKVEngine { + extends AbstractKVEngine { private final String identity; - public InMemKVEngine(String overrideIdentity, InMemKVEngineConfigurator c) { - super(overrideIdentity, c); + public InMemKVEngine(String overrideIdentity, Struct conf) { + super(overrideIdentity, conf); if (overrideIdentity != null && !overrideIdentity.trim().isEmpty()) { identity = overrideIdentity; } else { diff --git a/base-kv/base-kv-local-engine-memory/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVEngineProvider.java b/base-kv/base-kv-local-engine-memory/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVEngineProvider.java new file mode 100644 index 000000000..b66d3c798 --- /dev/null +++ b/base-kv/base-kv-local-engine-memory/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVEngineProvider.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine.memory; + +import com.google.protobuf.Struct; +import org.apache.bifromq.basekv.localengine.ICPableKVSpace; +import org.apache.bifromq.basekv.localengine.IKVEngine; +import org.apache.bifromq.basekv.localengine.IWALableKVSpace; +import org.apache.bifromq.basekv.localengine.spi.IKVEngineProvider; + +/** + * Provider for in-memory engine implementation. + */ +public class InMemKVEngineProvider implements IKVEngineProvider { + + @Override + public String type() { + return "memory"; + } + + @Override + public IKVEngine createCPable(String overrideIdentity, Struct conf) { + return new InMemCPableKVEngine(overrideIdentity, conf); + } + + @Override + public IKVEngine createWALable(String overrideIdentity, Struct conf) { + return new InMemWALableKVEngine(overrideIdentity, conf); + } + + @Override + public Struct defaultsForCPable() { + return InMemDefaultConfigs.CP; + } + + @Override + public Struct defaultsForWALable() { + return InMemDefaultConfigs.WAL; + } +} diff --git a/base-kv/base-kv-local-engine-memory/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVHelper.java b/base-kv/base-kv-local-engine-memory/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVHelper.java new file mode 100644 index 000000000..cc23decad --- /dev/null +++ b/base-kv/base-kv-local-engine-memory/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVHelper.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine.memory; + +import com.google.protobuf.ByteString; +import java.util.Map; +import java.util.NavigableMap; +import java.util.SortedMap; +import org.apache.bifromq.basekv.proto.Boundary; + +public class InMemKVHelper { + public static long sizeOfRange(NavigableMap rangeData, Boundary boundary) { + SortedMap sizedData; + if (!boundary.hasStartKey() && !boundary.hasEndKey()) { + sizedData = rangeData; + } else if (!boundary.hasStartKey()) { + sizedData = rangeData.headMap(boundary.getEndKey()); + } else if (!boundary.hasEndKey()) { + sizedData = rangeData.tailMap(boundary.getStartKey()); + } else { + sizedData = rangeData.subMap(boundary.getStartKey(), boundary.getEndKey()); + } + long sum = 0L; + for (Map.Entry e : sizedData.entrySet()) { + sum += e.getKey().size() + e.getValue().size(); + } + return sum; + } +} diff --git a/base-kv/base-kv-local-engine-memory/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVSpace.java b/base-kv/base-kv-local-engine-memory/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVSpace.java new file mode 100644 index 000000000..1f7057e6a --- /dev/null +++ b/base-kv/base-kv-local-engine-memory/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVSpace.java @@ -0,0 +1,145 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine.memory; + +import com.google.protobuf.ByteString; +import com.google.protobuf.Struct; +import java.util.Collections; +import java.util.Map; +import java.util.NavigableMap; +import java.util.concurrent.ConcurrentSkipListMap; +import org.apache.bifromq.basekv.localengine.AbstractKVSpace; +import org.apache.bifromq.basekv.localengine.IKVSpaceRefreshableReader; +import org.apache.bifromq.basekv.localengine.ISyncContext; +import org.apache.bifromq.basekv.localengine.SyncContext; +import org.apache.bifromq.basekv.localengine.metrics.KVSpaceOpMeters; +import org.apache.bifromq.basekv.proto.Boundary; +import org.apache.bifromq.basekv.utils.BoundaryUtil; +import org.slf4j.Logger; + +abstract class InMemKVSpace< + E extends InMemKVEngine, + T extends InMemKVSpace> extends AbstractKVSpace { + protected final E engine; + protected final ISyncContext syncContext = new SyncContext(); + protected final ISyncContext.IRefresher metadataRefresher = syncContext.refresher(); + protected final TrackedBoundaryIndex tracker = new TrackedBoundaryIndex(); + + protected InMemKVSpace(String id, + Struct conf, + E engine, + Runnable onDestroy, + KVSpaceOpMeters opMeters, + Logger logger, + String... tags) { + super(id, onDestroy, opMeters, logger, tags); + this.engine = engine; + } + + ISyncContext syncContext() { + return syncContext; + } + + @Override + public IKVSpaceRefreshableReader reader() { + return new InMemKVSpaceReader(id, opMeters, logger, syncContext.refresher(), this::handle, tracker); + } + + protected void loadMetadata() { + metadataRefresher.runIfNeeded((genBumped) -> { + if (!handle().metadataMap().isEmpty()) { + updateMetadata(Collections.unmodifiableMap(handle().metadataMap())); + } + }); + } + + protected long doSize(Boundary boundary) { + if (!boundary.hasStartKey() && !boundary.hasEndKey()) { + return handle().totalDataBytes(); + } + // Track boundary size lazily and keep it updated on writes + return tracker.sizeOrTrack(handle(), boundary); + } + + static final class TrackedBoundaryIndex { + private static final int MAX_TRACKED = 1024; + private final NavigableMap buckets = + new ConcurrentSkipListMap<>(BoundaryUtil::compare); + + long sizeOrTrack(InMemKVSpaceEpoch epoch, Boundary boundary) { + TrackedBucket b = buckets.get(boundary); + if (b != null) { + b.touch(); + return b.bytes; + } + long sized = InMemKVHelper.sizeOfRange(epoch.dataMap(), boundary); + if (buckets.size() >= MAX_TRACKED) { + buckets.pollFirstEntry(); + } + buckets.put(boundary, new TrackedBucket(sized)); + return sized; + } + + void updateOnWrite(InMemKVSpaceWriterHelper.WriteImpact impact, NavigableMap data) { + if (impact == null || buckets.isEmpty()) { + return; + } + Map delta = impact.pointDeltaBytes(); + if (delta == null || delta.isEmpty()) { + return; + } + final long now = System.nanoTime(); + for (Map.Entry e : buckets.entrySet()) { + final Boundary tracked = e.getKey(); + final TrackedBucket bucket = e.getValue(); + long deltaSum = 0L; + // accumulate per-key delta inside the boundary + for (Map.Entry de : delta.entrySet()) { + if (BoundaryUtil.inRange(de.getKey(), tracked)) { + deltaSum += de.getValue(); + } + } + if (deltaSum != 0L) { + long newBytes = bucket.bytes + deltaSum; + bucket.bytes = Math.max(newBytes, 0L); + bucket.lastAccessNanos = now; + } + } + } + + void invalidateAll() { + buckets.clear(); + } + + private static final class TrackedBucket { + volatile long bytes; + volatile long lastAccessNanos; + + TrackedBucket(long bytes) { + this.bytes = bytes; + this.lastAccessNanos = System.nanoTime(); + } + + void touch() { + lastAccessNanos = System.nanoTime(); + } + } + } +} diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVSpaceCheckpoint.java b/base-kv/base-kv-local-engine-memory/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVSpaceCheckpoint.java similarity index 61% rename from base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVSpaceCheckpoint.java rename to base-kv/base-kv-local-engine-memory/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVSpaceCheckpoint.java index 8e910c16d..4d9d9a3a9 100644 --- a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVSpaceCheckpoint.java +++ b/base-kv/base-kv-local-engine-memory/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVSpaceCheckpoint.java @@ -14,33 +14,33 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.localengine.memory; import org.apache.bifromq.basekv.localengine.IKVSpaceCheckpoint; +import org.apache.bifromq.basekv.localengine.IKVSpaceReader; import org.apache.bifromq.basekv.localengine.metrics.KVSpaceOpMeters; -import com.google.protobuf.ByteString; -import java.util.Map; -import java.util.concurrent.ConcurrentSkipListMap; import org.slf4j.Logger; -class InMemKVSpaceCheckpoint extends InMemKVSpaceReader implements IKVSpaceCheckpoint { +class InMemKVSpaceCheckpoint implements IKVSpaceCheckpoint { + private final String id; + private final KVSpaceOpMeters opMeters; + private final Logger logger; private final String cpId; - private final Map metadataMap; - private final ConcurrentSkipListMap rangeData; + private final InMemKVSpaceEpoch checkpoint; protected InMemKVSpaceCheckpoint(String id, String cpId, - Map metadataMap, - ConcurrentSkipListMap rangeData, + InMemKVSpaceEpoch checkpoint, KVSpaceOpMeters opMeters, Logger logger) { - super(id, opMeters, logger); + this.id = id; + this.opMeters = opMeters; + this.logger = logger; this.cpId = cpId; - this.metadataMap = metadataMap; - this.rangeData = rangeData; + this.checkpoint = checkpoint; } @Override @@ -49,12 +49,7 @@ public String cpId() { } @Override - protected Map metadataMap() { - return metadataMap; - } - - @Override - protected ConcurrentSkipListMap rangeData() { - return rangeData; + public IKVSpaceReader newReader() { + return new InMemKVSpaceCheckpointReader(id, opMeters, logger, checkpoint); } } diff --git a/base-kv/base-kv-local-engine-memory/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVSpaceCheckpointReader.java b/base-kv/base-kv-local-engine-memory/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVSpaceCheckpointReader.java new file mode 100644 index 000000000..8c56c4770 --- /dev/null +++ b/base-kv/base-kv-local-engine-memory/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVSpaceCheckpointReader.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine.memory; + +import static org.apache.bifromq.basekv.localengine.memory.InMemKVHelper.sizeOfRange; + +import com.google.protobuf.ByteString; +import java.util.Map; +import java.util.NavigableMap; +import org.apache.bifromq.basekv.localengine.IKVSpaceIterator; +import org.apache.bifromq.basekv.localengine.metrics.KVSpaceOpMeters; +import org.apache.bifromq.basekv.proto.Boundary; +import org.slf4j.Logger; + +class InMemKVSpaceCheckpointReader extends AbstractInMemKVSpaceReader { + private final InMemKVSpaceEpoch checkpoint; + + protected InMemKVSpaceCheckpointReader(String id, + KVSpaceOpMeters readOpMeters, + Logger logger, + InMemKVSpaceEpoch checkpoint) { + super(id, readOpMeters, logger); + this.checkpoint = checkpoint; + } + + @Override + protected Map metadataMap() { + return checkpoint.metadataMap(); + } + + @Override + protected NavigableMap rangeData() { + return checkpoint.dataMap(); + } + + @Override + protected IKVSpaceIterator doNewIterator(Boundary subBoundary) { + return new InMemKVSpaceIterator(rangeData(), subBoundary); + } + + @Override + protected long doSize(Boundary boundary) { + return sizeOfRange(checkpoint.dataMap(), boundary); + } + + @Override + public void close() { + + } +} diff --git a/base-kv/base-kv-local-engine-memory/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVSpaceEpoch.java b/base-kv/base-kv-local-engine-memory/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVSpaceEpoch.java new file mode 100644 index 000000000..e472d9a32 --- /dev/null +++ b/base-kv/base-kv-local-engine-memory/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVSpaceEpoch.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine.memory; + +import static com.google.protobuf.ByteString.unsignedLexicographicalComparator; + +import com.google.protobuf.ByteString; +import java.util.Map; +import java.util.NavigableMap; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.AtomicReference; +import org.apache.bifromq.basekv.localengine.IKVSpaceEpoch; +import org.pcollections.HashPMap; +import org.pcollections.HashTreePMap; +import org.pcollections.TreePMap; + +class InMemKVSpaceEpoch implements IKVSpaceEpoch { + private final AtomicReference> metadataMap; + private final AtomicReference> dataMap; + private final AtomicLong totalDataBytes; + + InMemKVSpaceEpoch() { + metadataMap = new AtomicReference<>(HashTreePMap.empty()); + dataMap = new AtomicReference<>(TreePMap.empty(unsignedLexicographicalComparator())); + totalDataBytes = new AtomicLong(0); + } + + InMemKVSpaceEpoch(InMemKVSpaceEpoch overlay) { + metadataMap = new AtomicReference<>(overlay.metadataMap.get()); + dataMap = new AtomicReference<>(overlay.dataMap.get()); + totalDataBytes = new AtomicLong(overlay.totalDataBytes.get()); + } + + Map metadataMap() { + return metadataMap.get(); + } + + NavigableMap dataMap() { + return dataMap.get(); + } + + long totalDataBytes() { + return totalDataBytes.get(); + } + + void setMetadata(ByteString key, ByteString value) { + metadataMap.updateAndGet(m -> m.plus(key, value)); + } + + void removeMetadata(ByteString key) { + metadataMap.updateAndGet(m -> m.minus(key)); + } + + void putData(ByteString key, ByteString value) { + TreePMap current = dataMap.get(); + ByteString old = current.get(key); + long oldBytes = old == null ? 0 : (long) key.size() + old.size(); + long newBytes = (long) key.size() + value.size(); + totalDataBytes.addAndGet(newBytes - oldBytes); + dataMap.set(current.plus(key, value)); + } + + void removeData(ByteString key) { + TreePMap current = dataMap.get(); + ByteString old = current.get(key); + if (old != null) { + totalDataBytes.addAndGet(-((long) key.size() + old.size())); + dataMap.set(current.minus(key)); + } + } +} diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVSpaceIterator.java b/base-kv/base-kv-local-engine-memory/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVSpaceIterator.java similarity index 51% rename from base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVSpaceIterator.java rename to base-kv/base-kv-local-engine-memory/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVSpaceIterator.java index 2a9217c1d..65ffbd386 100644 --- a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVSpaceIterator.java +++ b/base-kv/base-kv-local-engine-memory/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVSpaceIterator.java @@ -14,32 +14,36 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.localengine.memory; -import org.apache.bifromq.basekv.localengine.IKVSpaceIterator; -import org.apache.bifromq.basekv.proto.Boundary; import com.google.protobuf.ByteString; import java.util.Map; -import java.util.concurrent.ConcurrentNavigableMap; -import java.util.concurrent.ConcurrentSkipListMap; +import java.util.NavigableMap; +import java.util.concurrent.atomic.AtomicReference; +import org.apache.bifromq.basekv.localengine.IKVSpaceIterator; +import org.apache.bifromq.basekv.proto.Boundary; class InMemKVSpaceIterator implements IKVSpaceIterator { - private final ConcurrentSkipListMap origData; private final Boundary boundary; + private final AtomicReference> currentEpoch = new AtomicReference<>(); + private final CloseListener closeListener; private Map.Entry currentEntry; - private ConcurrentNavigableMap dataSource; + private NavigableMap dataSource; - public InMemKVSpaceIterator(ConcurrentSkipListMap data) { - this(data, Boundary.getDefaultInstance()); + public InMemKVSpaceIterator(NavigableMap epoch, Boundary boundary) { + this(epoch, boundary, iterator -> { + }); } - public InMemKVSpaceIterator(ConcurrentSkipListMap data, Boundary boundary) { - origData = data; + public InMemKVSpaceIterator(NavigableMap epoch, + Boundary boundary, + CloseListener closeListener) { this.boundary = boundary; - refresh(); + this.closeListener = closeListener; + refresh(epoch); } @Override @@ -59,51 +63,68 @@ public boolean isValid() { @Override public void next() { - currentEntry = dataSource.higherEntry(currentEntry.getKey()); + currentEntry = dataSource().higherEntry(currentEntry.getKey()); } @Override public void prev() { - currentEntry = dataSource.lowerEntry(currentEntry.getKey()); + currentEntry = dataSource().lowerEntry(currentEntry.getKey()); } @Override public void seekToFirst() { - currentEntry = dataSource.firstEntry(); + if (dataSource().isEmpty()) { + currentEntry = null; + return; + } + currentEntry = dataSource().firstEntry(); } @Override public void seekToLast() { - currentEntry = dataSource.lastEntry(); + if (dataSource().isEmpty()) { + currentEntry = null; + return; + } + currentEntry = dataSource().lastEntry(); } @Override public void seek(ByteString target) { - currentEntry = dataSource.ceilingEntry(target); + currentEntry = dataSource().ceilingEntry(target); } @Override public void seekForPrev(ByteString target) { - currentEntry = dataSource.floorEntry(target); + currentEntry = dataSource().floorEntry(target); } - @Override - public void refresh() { - ConcurrentSkipListMap data = origData.clone(); + public void refresh(NavigableMap epoch) { + currentEntry = null; + currentEpoch.set(epoch); + NavigableMap data = currentEpoch.get(); if (!boundary.hasStartKey() && !boundary.hasEndKey()) { dataSource = data; } else if (!boundary.hasStartKey()) { - dataSource = data.headMap(boundary.getEndKey()); + dataSource = data.headMap(boundary.getEndKey(), false); } else if (!boundary.hasEndKey()) { - dataSource = data.tailMap(boundary.getStartKey()); + dataSource = data.tailMap(boundary.getStartKey(), true); } else { - dataSource = data.subMap(boundary.getStartKey(), boundary.getEndKey()); + dataSource = data.subMap(boundary.getStartKey(), true, boundary.getEndKey(), false); } - currentEntry = dataSource.firstEntry(); + } + + private NavigableMap dataSource() { + return dataSource; } @Override public void close() { currentEntry = null; + closeListener.onClose(this); + } + + interface CloseListener { + void onClose(InMemKVSpaceIterator iterator); } } diff --git a/base-kv/base-kv-local-engine-memory/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVSpaceMigratableWriter.java b/base-kv/base-kv-local-engine-memory/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVSpaceMigratableWriter.java new file mode 100644 index 000000000..06992c063 --- /dev/null +++ b/base-kv/base-kv-local-engine-memory/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVSpaceMigratableWriter.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine.memory; + +import java.util.function.Consumer; +import org.apache.bifromq.basekv.localengine.IKVSpaceIterator; +import org.apache.bifromq.basekv.localengine.IKVSpaceMigratableWriter; +import org.apache.bifromq.basekv.localengine.IRestoreSession; +import org.apache.bifromq.basekv.localengine.ISyncContext; +import org.apache.bifromq.basekv.localengine.KVEngineException; +import org.apache.bifromq.basekv.localengine.metrics.KVSpaceOpMeters; +import org.apache.bifromq.basekv.proto.Boundary; +import org.slf4j.Logger; + +public class InMemKVSpaceMigratableWriter, T extends InMemKVSpace> + extends InMemKVSpaceWriter implements IKVSpaceMigratableWriter { + + InMemKVSpaceMigratableWriter(String id, + InMemKVSpaceEpoch epoch, + E engine, + ISyncContext syncContext, + Consumer afterWrite, + Consumer impactListener, + KVSpaceOpMeters readOpMeters, + Logger logger) { + super(id, epoch, engine, syncContext, afterWrite, impactListener, readOpMeters, logger); + } + + @Override + public IRestoreSession migrateTo(String targetSpaceId, Boundary boundary) { + try { + InMemCPableKVSpace targetKVSpace = (InMemCPableKVSpace) engine.createIfMissing(targetSpaceId); + IRestoreSession session = targetKVSpace.startRestore(((count, bytes) -> + logger.debug("Migrate {} kv to space[{}] from space[{}]: startKey={}, endKey={}", + count, targetSpaceId, id, boundary.getStartKey().toStringUtf8(), + boundary.getEndKey().toStringUtf8()))); + // move data + try (IKVSpaceIterator itr = new InMemKVSpaceIterator(epoch.dataMap(), boundary)) { + for (itr.seekToFirst(); itr.isValid(); itr.next()) { + session.put(itr.key(), itr.value()); + } + } + // clear moved data in left range + helper.clear(id, boundary); + return session; + } catch (Throwable e) { + throw new KVEngineException("Delete range in batch failed", e); + } + } +} diff --git a/base-kv/base-kv-local-engine-memory/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVSpaceReader.java b/base-kv/base-kv-local-engine-memory/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVSpaceReader.java new file mode 100644 index 000000000..fbddd6e34 --- /dev/null +++ b/base-kv/base-kv-local-engine-memory/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVSpaceReader.java @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine.memory; + +import com.google.common.collect.Sets; +import com.google.protobuf.ByteString; +import java.util.Map; +import java.util.NavigableMap; +import java.util.Set; +import java.util.concurrent.ConcurrentSkipListMap; +import java.util.function.Supplier; +import org.apache.bifromq.basekv.localengine.IKVSpaceIterator; +import org.apache.bifromq.basekv.localengine.IKVSpaceRefreshableReader; +import org.apache.bifromq.basekv.localengine.ISyncContext; +import org.apache.bifromq.basekv.localengine.metrics.KVSpaceOpMeters; +import org.apache.bifromq.basekv.proto.Boundary; +import org.apache.bifromq.basekv.utils.BoundaryUtil; +import org.slf4j.Logger; + +class InMemKVSpaceReader extends AbstractInMemKVSpaceReader implements IKVSpaceRefreshableReader { + private static final int MAX_CACHE_ENTRIES = 1024; + private final ISyncContext.IRefresher refresher; + private final Supplier epochSupplier; + private final InMemKVSpace.TrackedBoundaryIndex tracker; + private final Set openedIterators = Sets.newConcurrentHashSet(); + private final Map sizeCache = new ConcurrentSkipListMap<>(BoundaryUtil::compare); + private volatile InMemKVSpaceEpoch currentEpoch; + + InMemKVSpaceReader(String id, + KVSpaceOpMeters readOpMeters, + Logger logger, + ISyncContext.IRefresher refresher, + Supplier epochSupplier, + InMemKVSpace.TrackedBoundaryIndex tracker) { + super(id, readOpMeters, logger); + this.refresher = refresher; + this.epochSupplier = epochSupplier; + this.tracker = tracker; + this.currentEpoch = epochSupplier.get(); + } + + @Override + protected Map metadataMap() { + return currentEpoch.metadataMap(); + } + + @Override + protected NavigableMap rangeData() { + return currentEpoch.dataMap(); + } + + @Override + public void close() { + + } + + @Override + public void refresh() { + refresher.runIfNeeded((genBumped) -> { + currentEpoch = epochSupplier.get(); + sizeCache.clear(); + openedIterators.forEach(itr -> itr.refresh(currentEpoch.dataMap())); + }); + } + + @Override + protected IKVSpaceIterator doNewIterator(Boundary subBoundary) { + InMemKVSpaceIterator itr = new InMemKVSpaceIterator(rangeData(), subBoundary, openedIterators::remove); + openedIterators.add(itr); + return itr; + } + + @Override + protected long doSize(Boundary boundary) { + // Fast path for full-range size + if (!boundary.hasStartKey() && !boundary.hasEndKey()) { + return currentEpoch.totalDataBytes(); + } + // Consult reader-local cache first + Long cached = sizeCache.get(boundary); + if (cached != null) { + return cached; + } + // Delegate to space-level tracker for lazy tracking + long sized = tracker.sizeOrTrack(currentEpoch, boundary); + if (sizeCache.size() >= MAX_CACHE_ENTRIES) { + try { + @SuppressWarnings("unchecked") + ConcurrentSkipListMap sl = (ConcurrentSkipListMap) sizeCache; + if (sl.pollFirstEntry() == null && !sizeCache.isEmpty()) { + // fallback best-effort + sizeCache.remove(sizeCache.keySet().iterator().next()); + } + } catch (ClassCastException ignored) { + // best-effort fallback if type changes in future + if (!sizeCache.isEmpty()) { + sizeCache.remove(sizeCache.keySet().iterator().next()); + } + } + } + sizeCache.put(boundary, sized); + return sized; + } +} diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVSpaceWriter.java b/base-kv/base-kv-local-engine-memory/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVSpaceWriter.java similarity index 54% rename from base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVSpaceWriter.java rename to base-kv/base-kv-local-engine-memory/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVSpaceWriter.java index a22448df2..ac8236a2c 100644 --- a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVSpaceWriter.java +++ b/base-kv/base-kv-local-engine-memory/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVSpaceWriter.java @@ -19,56 +19,61 @@ package org.apache.bifromq.basekv.localengine.memory; -import org.apache.bifromq.basekv.localengine.IKVSpaceIterator; -import org.apache.bifromq.basekv.localengine.IKVSpaceMetadataWriter; +import com.google.protobuf.ByteString; +import java.util.function.Consumer; import org.apache.bifromq.basekv.localengine.IKVSpaceWriter; import org.apache.bifromq.basekv.localengine.ISyncContext; import org.apache.bifromq.basekv.localengine.KVEngineException; import org.apache.bifromq.basekv.localengine.metrics.KVSpaceOpMeters; import org.apache.bifromq.basekv.proto.Boundary; -import com.google.protobuf.ByteString; -import java.util.Map; -import java.util.concurrent.ConcurrentSkipListMap; -import java.util.function.Consumer; import org.slf4j.Logger; -class InMemKVSpaceWriter, T extends InMemKVSpace> extends InMemKVSpaceReader - implements IKVSpaceWriter { - private final Map metadataMap; - private final ConcurrentSkipListMap rangeData; - private final E engine; - private final InMemKVSpaceWriterHelper helper; +class InMemKVSpaceWriter, T extends InMemKVSpace> implements IKVSpaceWriter { + protected final String id; + protected final KVSpaceOpMeters opMeters; + protected final Logger logger; + protected final InMemKVSpaceWriterHelper helper; + protected final E engine; + protected final InMemKVSpaceEpoch epoch; InMemKVSpaceWriter(String id, - Map metadataMap, - ConcurrentSkipListMap rangeData, + InMemKVSpaceEpoch epoch, E engine, ISyncContext syncContext, Consumer afterWrite, + Consumer impactListener, KVSpaceOpMeters readOpMeters, Logger logger) { - this(id, metadataMap, rangeData, engine, syncContext, new InMemKVSpaceWriterHelper(), - afterWrite, readOpMeters, logger); + this(id, epoch, engine, syncContext, new InMemKVSpaceWriterHelper(), + afterWrite, impactListener, readOpMeters, logger); } private InMemKVSpaceWriter(String id, - Map metadataMap, - ConcurrentSkipListMap rangeData, + InMemKVSpaceEpoch epoch, E engine, ISyncContext syncContext, InMemKVSpaceWriterHelper writerHelper, Consumer afterWrite, + Consumer impactListener, KVSpaceOpMeters readOpMeters, Logger logger) { - super(id, readOpMeters, logger); - this.metadataMap = metadataMap; - this.rangeData = rangeData; + this.id = id; + this.opMeters = readOpMeters; + this.logger = logger; + this.epoch = epoch; this.engine = engine; this.helper = writerHelper; - writerHelper.addMutators(id, metadataMap, rangeData, syncContext.mutator()); + writerHelper.addMutators(id, epoch, syncContext.mutator()); writerHelper.addAfterWriteCallback(id, afterWrite); + if (impactListener != null) { + writerHelper.addAfterImpactCallback(id, impactListener); + } } + @Override + public String id() { + return id; + } @Override public IKVSpaceWriter metadata(ByteString metaKey, ByteString metaValue) { @@ -106,45 +111,6 @@ public IKVSpaceWriter clear(Boundary boundary) { return this; } - @Override - public IKVSpaceMetadataWriter migrateTo(String targetSpaceId, Boundary boundary) { - try { - InMemKVSpace targetKVSpace = engine.createIfMissing(targetSpaceId); - IKVSpaceWriter targetKVSpaceWriter = targetKVSpace.toWriter(); - // move data - try (IKVSpaceIterator itr = newIterator(boundary)) { - for (itr.seekToFirst(); itr.isValid(); itr.next()) { - targetKVSpaceWriter.put(itr.key(), itr.value()); - } - } - // clear moved data in left range - helper.clear(id, boundary); - return targetKVSpaceWriter; - } catch (Throwable e) { - throw new KVEngineException("Delete range in batch failed", e); - } - } - - @Override - public IKVSpaceMetadataWriter migrateFrom(String fromSpaceId, Boundary boundary) { - - try { - InMemKVSpace sourceKVSpace = engine.createIfMissing(fromSpaceId); - IKVSpaceWriter sourceKVSpaceWriter = sourceKVSpace.toWriter(); - // move data - try (IKVSpaceIterator itr = sourceKVSpace.newIterator(boundary)) { - for (itr.seekToFirst(); itr.isValid(); itr.next()) { - helper.put(id, itr.key(), itr.value()); - } - } - // clear moved data in right range - sourceKVSpaceWriter.clear(boundary); - return sourceKVSpaceWriter; - } catch (Throwable e) { - throw new KVEngineException("Delete range in batch failed", e); - } - } - @Override public void done() { opMeters.batchWriteCallTimer.record(() -> { @@ -167,14 +133,4 @@ public void abort() { public int count() { return helper.count(); } - - @Override - protected Map metadataMap() { - return metadataMap; - } - - @Override - protected ConcurrentSkipListMap rangeData() { - return rangeData; - } } diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVSpaceWriterHelper.java b/base-kv/base-kv-local-engine-memory/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVSpaceWriterHelper.java similarity index 57% rename from base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVSpaceWriterHelper.java rename to base-kv/base-kv-local-engine-memory/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVSpaceWriterHelper.java index 961f05036..b7edd39f9 100644 --- a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVSpaceWriterHelper.java +++ b/base-kv/base-kv-local-engine-memory/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVSpaceWriterHelper.java @@ -19,8 +19,6 @@ package org.apache.bifromq.basekv.localengine.memory; -import org.apache.bifromq.basekv.localengine.ISyncContext; -import org.apache.bifromq.basekv.proto.Boundary; import com.google.protobuf.ByteString; import java.util.ArrayList; import java.util.HashMap; @@ -29,30 +27,28 @@ import java.util.Map; import java.util.NavigableSet; import java.util.Set; -import java.util.concurrent.ConcurrentSkipListMap; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Consumer; +import org.apache.bifromq.basekv.localengine.ISyncContext; +import org.apache.bifromq.basekv.proto.Boundary; class InMemKVSpaceWriterHelper { - private final Map> metadataMap; - private final Map> rangeDataMap; + private final Map kvSpaceEpochMap; private final Map batchMap; private final Map> afterWriteCallbacks = new HashMap<>(); + private final Map> afterImpactCallbacks = new HashMap<>(); private final Map metadataChanges = new HashMap<>(); private final Set mutators = new HashSet<>(); InMemKVSpaceWriterHelper() { - this.metadataMap = new HashMap<>(); - this.rangeDataMap = new HashMap<>(); + this.kvSpaceEpochMap = new HashMap<>(); this.batchMap = new HashMap<>(); } - void addMutators(String rangeId, - Map metadata, - ConcurrentSkipListMap rangeData, + void addMutators(String id, + InMemKVSpaceEpoch epoch, ISyncContext.IMutator mutator) { - metadataMap.put(rangeId, metadata); - rangeDataMap.put(rangeId, rangeData); + kvSpaceEpochMap.put(id, epoch); mutators.add(mutator); } @@ -61,6 +57,9 @@ void addAfterWriteCallback(String rangeId, Consumer afterWrite) { metadataChanges.put(rangeId, false); } + void addAfterImpactCallback(String rangeId, Consumer afterImpact) { + afterImpactCallbacks.put(rangeId, afterImpact); + } void metadata(String rangeId, ByteString metaKey, ByteString metaValue) { batchMap.computeIfAbsent(rangeId, k -> new WriteBatch(rangeId)).metadata(metaKey, metaValue); @@ -84,24 +83,39 @@ void clear(String rangeId, Boundary boundary) { } void done() { - Runnable doneFn = () -> batchMap.values().forEach(WriteBatch::end); - AtomicReference finalRun = new AtomicReference<>(); + ISyncContext.IMutation doneFn = () -> { + Map impacts = new HashMap<>(); + batchMap.values().forEach(batch -> { + WriteImpact impact = batch.endAndCollectImpact(); + if (impact != null) { + impacts.put(batch.rangeId, impact); + } + }); + impacts.forEach((id, imp) -> { + Consumer cb = afterImpactCallbacks.get(id); + if (cb != null) { + cb.accept(imp); + } + }); + return false; + }; + AtomicReference finalRun = new AtomicReference<>(); for (ISyncContext.IMutator mutator : mutators) { if (finalRun.get() == null) { finalRun.set(() -> mutator.run(doneFn)); } else { - Runnable innerRun = finalRun.get(); + ISyncContext.IMutation innerRun = finalRun.get(); finalRun.set(() -> mutator.run(innerRun)); } } - finalRun.get().run(); + finalRun.get().mutate(); for (String rangeId : afterWriteCallbacks.keySet()) { afterWriteCallbacks.get(rangeId).accept(metadataChanges.get(rangeId)); } } void abort() { - rangeDataMap.clear(); + batchMap.clear(); } int count() { @@ -111,7 +125,12 @@ int count() { protected interface KVAction { KVAction.Type type(); - enum Type {Put, Delete, DeleteRange} + enum Type { Put, Delete, DeleteRange } + } + + public record WriteImpact(List pointKeys, + List deleteRanges, + Map pointDeltaBytes) { } protected class WriteBatch { @@ -119,8 +138,8 @@ protected class WriteBatch { Map metadata = new HashMap<>(); List actions = new ArrayList<>(); - protected WriteBatch(String rangeId) { - this.rangeId = rangeId; + protected WriteBatch(String spaceId) { + this.rangeId = spaceId; } public void metadata(ByteString key, ByteString value) { @@ -147,43 +166,78 @@ public void deleteRange(Boundary boundary) { actions.add(new WriteBatch.DeleteRange(boundary)); } - public void end() { - metadataMap.get(rangeId).putAll(metadata); + public WriteImpact endAndCollectImpact() { + List putOrDeleteKeys = new ArrayList<>(); + List deleteRanges = new ArrayList<>(); + Map pointDeltaBytes = new HashMap<>(); + InMemKVSpaceEpoch epoch = kvSpaceEpochMap.get(rangeId); + + metadata.forEach(epoch::setMetadata); + for (KVAction action : actions) { switch (action.type()) { case Put -> { WriteBatch.Put put = (WriteBatch.Put) action; - rangeDataMap.get(rangeId).put(put.key, put.value); + // compute delta before mutation + ByteString old = epoch.dataMap().get(put.key); + int oldBytes = old == null ? 0 : (put.key.size() + old.size()); + int newBytes = put.key.size() + put.value.size(); + int delta = newBytes - oldBytes; + if (delta != 0) { + pointDeltaBytes.merge(put.key, delta, Integer::sum); + } + epoch.putData(put.key, put.value); + putOrDeleteKeys.add(put.key); } case Delete -> { WriteBatch.Delete delete = (WriteBatch.Delete) action; - rangeDataMap.get(rangeId).remove(delete.key); + // compute delta before mutation + ByteString old = epoch.dataMap().get(delete.key); + if (old != null) { + int delta = -(delete.key.size() + old.size()); + pointDeltaBytes.merge(delete.key, delta, Integer::sum); + epoch.removeData(delete.key); + } + putOrDeleteKeys.add(delete.key); } case DeleteRange -> { WriteBatch.DeleteRange deleteRange = (WriteBatch.DeleteRange) action; Boundary boundary = deleteRange.boundary; NavigableSet inRangeKeys; if (!boundary.hasStartKey() && !boundary.hasEndKey()) { - inRangeKeys = rangeDataMap.get(rangeId).keySet(); + inRangeKeys = epoch.dataMap().navigableKeySet(); } else if (!boundary.hasStartKey()) { - inRangeKeys = rangeDataMap.get(rangeId).headMap(boundary.getEndKey()).keySet(); + inRangeKeys = epoch.dataMap().headMap(boundary.getEndKey(), false).navigableKeySet(); } else if (!boundary.hasEndKey()) { - inRangeKeys = rangeDataMap.get(rangeId).tailMap(boundary.getStartKey()).keySet(); + inRangeKeys = epoch.dataMap().tailMap(boundary.getStartKey(), true).navigableKeySet(); } else { - inRangeKeys = - rangeDataMap.get(rangeId).subMap(boundary.getStartKey(), boundary.getEndKey()).keySet(); + inRangeKeys = epoch.dataMap() + .subMap(boundary.getStartKey(), true, boundary.getEndKey(), false) + .navigableKeySet(); + } + // accumulate negative delta per key before removal + for (ByteString k : inRangeKeys) { + ByteString v = epoch.dataMap().get(k); + if (v != null) { + int delta = -(k.size() + v.size()); + pointDeltaBytes.merge(k, delta, Integer::sum); + } } - inRangeKeys.forEach(k -> rangeDataMap.get(rangeId).remove(k)); + inRangeKeys.forEach(epoch::removeData); + deleteRanges.add(boundary); + } + default -> { + // no-op } } } + return new WriteImpact(putOrDeleteKeys, deleteRanges, pointDeltaBytes); } public void abort() { } - record Put(ByteString key, ByteString value) implements KVAction { @Override public Type type() { diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemWALableKVEngine.java b/base-kv/base-kv-local-engine-memory/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemWALableKVEngine.java similarity index 73% rename from base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemWALableKVEngine.java rename to base-kv/base-kv-local-engine-memory/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemWALableKVEngine.java index 838055af0..a35d6d73b 100644 --- a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemWALableKVEngine.java +++ b/base-kv/base-kv-local-engine-memory/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemWALableKVEngine.java @@ -19,21 +19,28 @@ package org.apache.bifromq.basekv.localengine.memory; +import com.google.protobuf.Struct; import org.apache.bifromq.basekv.localengine.metrics.KVSpaceOpMeters; import org.slf4j.Logger; -public class InMemWALableKVEngine extends InMemKVEngine { - public InMemWALableKVEngine(String overrideIdentity, InMemKVEngineConfigurator c) { - super(overrideIdentity, c); +class InMemWALableKVEngine extends InMemKVEngine { + + InMemWALableKVEngine(String overrideIdentity, Struct conf) { + super(overrideIdentity, conf); } @Override protected InMemWALableKVSpace doBuildKVSpace(String spaceId, - InMemKVEngineConfigurator configurator, + Struct conf, Runnable onDestroy, KVSpaceOpMeters opMeters, Logger logger, String... tags) { - return new InMemWALableKVSpace(spaceId, configurator, this, onDestroy, opMeters, logger); + return new InMemWALableKVSpace(spaceId, conf, this, onDestroy, opMeters, logger); + } + + @Override + protected Struct defaultConf() { + return InMemDefaultConfigs.WAL; } } diff --git a/base-kv/base-kv-local-engine-memory/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemWALableKVSpace.java b/base-kv/base-kv-local-engine-memory/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemWALableKVSpace.java new file mode 100644 index 000000000..3824aa97f --- /dev/null +++ b/base-kv/base-kv-local-engine-memory/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemWALableKVSpace.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine.memory; + +import com.google.protobuf.Struct; +import java.util.concurrent.CompletableFuture; +import org.apache.bifromq.basekv.localengine.IKVSpaceWriter; +import org.apache.bifromq.basekv.localengine.IWALableKVSpace; +import org.apache.bifromq.basekv.localengine.metrics.KVSpaceOpMeters; +import org.slf4j.Logger; + +class InMemWALableKVSpace extends InMemKVSpace + implements IWALableKVSpace { + private final InMemKVSpaceEpoch epoch; + + InMemWALableKVSpace(String id, + Struct conf, + InMemWALableKVEngine engine, + Runnable onDestroy, + KVSpaceOpMeters opMeters, + Logger logger, + String... tags) { + super(id, conf, engine, onDestroy, opMeters, logger, tags); + epoch = new InMemKVSpaceEpoch(); + } + + @Override + public CompletableFuture flush() { + return CompletableFuture.completedFuture(System.nanoTime()); + } + + @Override + public IKVSpaceWriter toWriter() { + return new InMemKVSpaceWriter<>(id, epoch, engine, syncContext, metadataUpdated -> { + if (metadataUpdated) { + this.loadMetadata(); + } + }, impact -> { + tracker.updateOnWrite(impact, epoch.dataMap()); + }, opMeters, logger); + } + + @Override + protected void doClose() { + + } + + @Override + protected InMemKVSpaceEpoch handle() { + return epoch; + } + + @Override + protected void doOpen() { + + } +} diff --git a/base-kv/base-kv-local-engine-memory/src/main/resources/META-INF/services/org.apache.bifromq.basekv.localengine.spi.IKVEngineProvider b/base-kv/base-kv-local-engine-memory/src/main/resources/META-INF/services/org.apache.bifromq.basekv.localengine.spi.IKVEngineProvider new file mode 100644 index 000000000..2cc3573a2 --- /dev/null +++ b/base-kv/base-kv-local-engine-memory/src/main/resources/META-INF/services/org.apache.bifromq.basekv.localengine.spi.IKVEngineProvider @@ -0,0 +1 @@ +org.apache.bifromq.basekv.localengine.memory.InMemKVEngineProvider diff --git a/base-kv/base-kv-local-engine-memory/src/test/java/org/apache/bifromq/basekv/localengine/memory/InMemCPableKVSpaceRestoreFlushListenerTest.java b/base-kv/base-kv-local-engine-memory/src/test/java/org/apache/bifromq/basekv/localengine/memory/InMemCPableKVSpaceRestoreFlushListenerTest.java new file mode 100644 index 000000000..a7f46bc21 --- /dev/null +++ b/base-kv/base-kv-local-engine-memory/src/test/java/org/apache/bifromq/basekv/localengine/memory/InMemCPableKVSpaceRestoreFlushListenerTest.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine.memory; + +import static org.testng.Assert.assertEquals; + +import com.google.protobuf.ByteString; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; +import org.apache.bifromq.basekv.localengine.ICPableKVSpace; +import org.apache.bifromq.basekv.localengine.IKVEngine; +import org.apache.bifromq.basekv.localengine.IRestoreSession; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class InMemCPableKVSpaceRestoreFlushListenerTest { + private IKVEngine engine; + + @BeforeMethod + public void setup() { + engine = new InMemCPableKVEngine(null, InMemDefaultConfigs.CP); + engine.start(); + } + + @AfterMethod + public void tearDown() { + if (engine != null) { + engine.stop(); + } + } + + @Test + public void testDoneReportsSingleAggregatedFlush() { + String spaceId = "inmem_flush"; + ICPableKVSpace space = engine.createIfMissing(spaceId); + + AtomicInteger callbackCount = new AtomicInteger(); + AtomicLong totalEntries = new AtomicLong(); + AtomicLong totalBytes = new AtomicLong(); + IRestoreSession session = space.startRestore((c, b) -> { + callbackCount.incrementAndGet(); + totalEntries.addAndGet(c); + totalBytes.addAndGet(b); + }); + + int n = 1000; + long expectBytes = 0; + for (int i = 0; i < n; i++) { + ByteString k = ByteString.copyFromUtf8("k" + i); + ByteString v = ByteString.copyFromUtf8("v" + i); + expectBytes += k.size() + v.size(); + session.put(k, v); + } + session.done(); + + assertEquals(callbackCount.get(), 1); + assertEquals(totalEntries.get(), n); + assertEquals(totalBytes.get(), expectBytes); + } +} diff --git a/base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/memory/InMemKVEngineTest.java b/base-kv/base-kv-local-engine-memory/src/test/java/org/apache/bifromq/basekv/localengine/memory/InMemKVEngineTest.java similarity index 73% rename from base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/memory/InMemKVEngineTest.java rename to base-kv/base-kv-local-engine-memory/src/test/java/org/apache/bifromq/basekv/localengine/memory/InMemKVEngineTest.java index 28cb205bc..9522051e8 100644 --- a/base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/memory/InMemKVEngineTest.java +++ b/base-kv/base-kv-local-engine-memory/src/test/java/org/apache/bifromq/basekv/localengine/memory/InMemKVEngineTest.java @@ -22,10 +22,17 @@ import org.apache.bifromq.basekv.localengine.AbstractKVEngineTest; import org.apache.bifromq.basekv.localengine.ICPableKVSpace; import org.apache.bifromq.basekv.localengine.IKVEngine; +import org.apache.bifromq.basekv.localengine.IKVSpaceWriter; -public class InMemKVEngineTest extends AbstractKVEngineTest { +public class InMemKVEngineTest extends AbstractKVEngineTest { @Override protected IKVEngine newEngine() { - return new InMemCPableKVEngine(null, new InMemKVEngineConfigurator()); + return new InMemCPableKVEngine(null, InMemDefaultConfigs.CP); + } + + @Override + protected IKVSpaceWriter writerOf(ICPableKVSpace space) { + // InMem CPable space returns a restorable writer which is also an IKVSpaceWriter + return space.toWriter(); } } diff --git a/base-kv/base-kv-local-engine-memory/src/test/resources/log4j2-test.xml b/base-kv/base-kv-local-engine-memory/src/test/resources/log4j2-test.xml new file mode 100644 index 000000000..1404c8668 --- /dev/null +++ b/base-kv/base-kv-local-engine-memory/src/test/resources/log4j2-test.xml @@ -0,0 +1,38 @@ + + + + + + + + + + + + + + + + + diff --git a/base-kv/base-kv-local-engine-rocksdb/pom.xml b/base-kv/base-kv-local-engine-rocksdb/pom.xml new file mode 100644 index 000000000..81ea28921 --- /dev/null +++ b/base-kv/base-kv-local-engine-rocksdb/pom.xml @@ -0,0 +1,85 @@ + + + + 4.0.0 + + org.apache.bifromq + base-kv + ${revision} + + base-kv-local-engine-rocksdb + + + + org.apache.bifromq + base-kv-local-engine-spi + + + org.apache.bifromq + base-kv-local-engine-spi + test-jar + ${project.version} + test + + + org.rocksdb + rocksdbjni + + + + org.awaitility + awaitility + test + + + org.mockito + mockito-core + test + + + org.openjdk.jmh + jmh-core + test + + + org.openjdk.jmh + jmh-generator-annprocess + ${jmh.version} + test + + + org.apache.logging.log4j + log4j-api + test + + + org.apache.logging.log4j + log4j-core + test + + + org.apache.logging.log4j + log4j-slf4j2-impl + test + + + diff --git a/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/AbstractRocksDBKVSpaceReader.java b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/AbstractRocksDBKVSpaceReader.java new file mode 100644 index 000000000..c510f5f86 --- /dev/null +++ b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/AbstractRocksDBKVSpaceReader.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine.rocksdb; + +import static com.google.protobuf.UnsafeByteOperations.unsafeWrap; +import static org.apache.bifromq.basekv.localengine.rocksdb.Keys.toDataKey; + +import com.google.protobuf.ByteString; +import java.util.Optional; +import org.apache.bifromq.basekv.localengine.AbstractKVSpaceReader; +import org.apache.bifromq.basekv.localengine.KVEngineException; +import org.apache.bifromq.basekv.localengine.metrics.KVSpaceOpMeters; +import org.apache.bifromq.basekv.proto.Boundary; +import org.rocksdb.ReadOptions; +import org.rocksdb.RocksDBException; +import org.slf4j.Logger; + +abstract class AbstractRocksDBKVSpaceReader extends AbstractKVSpaceReader { + protected AbstractRocksDBKVSpaceReader(String id, + KVSpaceOpMeters opMeters, + Logger logger) { + super(id, opMeters, logger); + } + + protected abstract IRocksDBKVSpaceEpoch handle(); + + protected abstract RocksDBSnapshot snapshot(); + + protected final long doSize(Boundary boundary) { + return RocksDBHelper.sizeOfBoundary(handle(), boundary); + } + + @Override + protected final boolean doExist(ByteString key) { + return get(key).isPresent(); + } + + @Override + protected final Optional doGet(ByteString key) { + try (ReadOptions readOptions = new ReadOptions()) { + readOptions.setSnapshot(snapshot().snapshot()); + IRocksDBKVSpaceEpoch dbHandle = handle(); + byte[] data = dbHandle.db().get(dbHandle.cf(), readOptions, toDataKey(key)); + return Optional.ofNullable(data == null ? null : unsafeWrap(data)); + } catch (RocksDBException rocksDBException) { + throw new KVEngineException("Get failed", rocksDBException); + } + } +} diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/AdaptiveWriteBudget.java b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/AdaptiveWriteBudget.java similarity index 95% rename from base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/AdaptiveWriteBudget.java rename to base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/AdaptiveWriteBudget.java index 6c95a30fe..a5a7622eb 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/AdaptiveWriteBudget.java +++ b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/AdaptiveWriteBudget.java @@ -17,8 +17,10 @@ * under the License. */ -package org.apache.bifromq.basekv.store.range; +package org.apache.bifromq.basekv.localengine.rocksdb; +// Package-visible adaptive write budget copied from base-kv-store-server module +// to avoid cross-package visibility; used by RestoreSession to tune flush cadence. final class AdaptiveWriteBudget { private static final long TARGET_LATENCY_MS = 50; private static final double EMA_ALPHA = 0.3; diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/AutoCleaner.java b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/AutoCleaner.java similarity index 100% rename from base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/AutoCleaner.java rename to base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/AutoCleaner.java diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/IRocksDBKVSpaceCheckpoint.java b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/IRocksDBKVSpaceCheckpoint.java similarity index 100% rename from base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/IRocksDBKVSpaceCheckpoint.java rename to base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/IRocksDBKVSpaceCheckpoint.java diff --git a/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/IRocksDBKVSpaceEpoch.java b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/IRocksDBKVSpaceEpoch.java new file mode 100644 index 000000000..64822a540 --- /dev/null +++ b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/IRocksDBKVSpaceEpoch.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine.rocksdb; + +import org.apache.bifromq.basekv.localengine.IKVSpaceEpoch; +import org.rocksdb.ColumnFamilyHandle; +import org.rocksdb.RocksDB; + +/** + * Interface for accessing RocksDB space epoch. + */ +interface IRocksDBKVSpaceEpoch extends IKVSpaceEpoch { + /** + * Get the backing RocksDB instance. + * + * @return the backing RocksDB instance + */ + RocksDB db(); + + /** + * Get the column family handle. + * + * @return the column family handle + */ + ColumnFamilyHandle cf(); +} diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVCheckpointIterator.java b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/IRocksDBKVSpaceEpochHandle.java similarity index 77% rename from base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVCheckpointIterator.java rename to base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/IRocksDBKVSpaceEpochHandle.java index 42199c551..d78f9bc36 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVCheckpointIterator.java +++ b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/IRocksDBKVSpaceEpochHandle.java @@ -17,11 +17,14 @@ * under the License. */ -package org.apache.bifromq.basekv.store.range; +package org.apache.bifromq.basekv.localengine.rocksdb; -import org.apache.bifromq.basekv.store.api.IKVIterator; - -public interface IKVCheckpointIterator extends IKVIterator, AutoCloseable { - @Override +/** + * Handle to a RocksDB KV space epoch. + */ +interface IRocksDBKVSpaceEpochHandle extends IRocksDBKVSpaceEpoch { + /** + * Close the handle and release any resources. + */ void close(); } diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/IWriteStatsRecorder.java b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/IWriteStatsRecorder.java similarity index 100% rename from base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/IWriteStatsRecorder.java rename to base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/IWriteStatsRecorder.java diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/IKVEngineConfigurator.java b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/IteratorOptions.java similarity index 84% rename from base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/IKVEngineConfigurator.java rename to base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/IteratorOptions.java index a2ed7ab70..b7e5d6dad 100644 --- a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/IKVEngineConfigurator.java +++ b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/IteratorOptions.java @@ -14,10 +14,10 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ -package org.apache.bifromq.basekv.localengine; +package org.apache.bifromq.basekv.localengine.rocksdb; -public interface IKVEngineConfigurator { +record IteratorOptions(boolean fillCache, long readAheadSize) { } diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/Keys.java b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/Keys.java similarity index 100% rename from base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/Keys.java rename to base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/Keys.java diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/NoopWriteStatsRecorder.java b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/NoopWriteStatsRecorder.java similarity index 100% rename from base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/NoopWriteStatsRecorder.java rename to base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/NoopWriteStatsRecorder.java diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVEngine.java b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVEngine.java similarity index 71% rename from base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVEngine.java rename to base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVEngine.java index 1f0b8e1ea..681203185 100644 --- a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVEngine.java +++ b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVEngine.java @@ -19,24 +19,28 @@ package org.apache.bifromq.basekv.localengine.rocksdb; -import org.apache.bifromq.basekv.localengine.KVEngineException; -import org.apache.bifromq.basekv.localengine.metrics.KVSpaceOpMeters; +import static org.apache.bifromq.basekv.localengine.StructUtil.strVal; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.DB_CHECKPOINT_ROOT_DIR; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.DB_ROOT_DIR; + +import com.google.protobuf.Struct; import io.micrometer.core.instrument.Gauge; import io.micrometer.core.instrument.Metrics; import io.micrometer.core.instrument.Tags; import java.io.File; import java.nio.file.Files; +import java.nio.file.Paths; +import org.apache.bifromq.basekv.localengine.KVEngineException; +import org.apache.bifromq.basekv.localengine.metrics.KVSpaceOpMeters; import org.slf4j.Logger; -public class RocksDBCPableKVEngine extends - RocksDBKVEngine { +class RocksDBCPableKVEngine extends RocksDBKVEngine { private final File cpRootDir; private MetricManager metricManager; - public RocksDBCPableKVEngine(String overrideIdentity, - RocksDBCPableKVEngineConfigurator configurator) { - super(overrideIdentity, configurator); - cpRootDir = new File(configurator.dbCheckpointRootDir()); + public RocksDBCPableKVEngine(String overrideIdentity, Struct conf) { + super(overrideIdentity, conf); + cpRootDir = new File(strVal(conf, DB_CHECKPOINT_ROOT_DIR)); try { Files.createDirectories(cpRootDir.getAbsoluteFile().toPath()); } catch (Throwable e) { @@ -46,12 +50,27 @@ public RocksDBCPableKVEngine(String overrideIdentity, @Override protected RocksDBCPableKVSpace doBuildKVSpace(String spaceId, - RocksDBCPableKVEngineConfigurator configurator, + Struct conf, Runnable onDestroy, KVSpaceOpMeters opMeters, Logger logger, String... tags) { - return new RocksDBCPableKVSpace(spaceId, configurator, this, onDestroy, opMeters, logger, tags); + return new RocksDBCPableKVSpace(spaceId, conf, this, onDestroy, opMeters, logger, tags); + } + + @Override + protected Struct defaultConf() { + return RocksDBDefaultConfigs.CP; + } + + @Override + protected void validateSemantics(Struct conf) { + try { + Paths.get(strVal(conf, DB_ROOT_DIR)); + Paths.get(strVal(conf, DB_CHECKPOINT_ROOT_DIR)); + } catch (Throwable t) { + throw new IllegalArgumentException("Invalid RocksDB data/checkpoint path in configuration", t); + } } @Override diff --git a/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVSpace.java b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVSpace.java new file mode 100644 index 000000000..665fe8f52 --- /dev/null +++ b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVSpace.java @@ -0,0 +1,556 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine.rocksdb; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static org.apache.bifromq.basekv.localengine.StructUtil.strVal; +import static org.apache.bifromq.basekv.localengine.metrics.KVSpaceMeters.getGauge; +import static org.apache.bifromq.basekv.localengine.rocksdb.Keys.LATEST_CP_KEY; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.DB_CHECKPOINT_ROOT_DIR; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBHelper.deleteDir; + +import com.github.benmanes.caffeine.cache.Cache; +import com.github.benmanes.caffeine.cache.Caffeine; +import com.google.protobuf.ByteString; +import com.google.protobuf.Struct; +import io.micrometer.core.instrument.Gauge; +import io.micrometer.core.instrument.Tags; +import io.micrometer.core.instrument.Timer; +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Arrays; +import java.util.Collections; +import java.util.Optional; +import java.util.UUID; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; +import java.util.stream.Collectors; +import lombok.SneakyThrows; +import org.apache.bifromq.basekv.localengine.ICPableKVSpace; +import org.apache.bifromq.basekv.localengine.IKVSpaceCheckpoint; +import org.apache.bifromq.basekv.localengine.IKVSpaceMigratableWriter; +import org.apache.bifromq.basekv.localengine.IKVSpaceRefreshableReader; +import org.apache.bifromq.basekv.localengine.IRestoreSession; +import org.apache.bifromq.basekv.localengine.KVEngineException; +import org.apache.bifromq.basekv.localengine.RestoreMode; +import org.apache.bifromq.basekv.localengine.metrics.GeneralKVSpaceMetric; +import org.apache.bifromq.basekv.localengine.metrics.KVSpaceMeters; +import org.apache.bifromq.basekv.localengine.metrics.KVSpaceOpMeters; +import org.apache.bifromq.basekv.localengine.rocksdb.metrics.RocksDBKVSpaceMetric; +import org.rocksdb.FlushOptions; +import org.rocksdb.RocksDBException; +import org.rocksdb.WriteOptions; +import org.slf4j.Logger; + +class RocksDBCPableKVSpace extends RocksDBKVSpace implements ICPableKVSpace { + public static final String ACTIVE_GEN_POINTER = "ACTIVE"; + private static final String CP_SUFFIX = ".cp"; + private final RocksDBCPableKVEngine engine; + private final File cpRootDir; + private final WriteOptions writeOptions; + private final AtomicReference latestCheckpointId = new AtomicReference<>(); + private final Cache checkpoints; + private final MetricManager metricMgr; + private final AtomicReference active = new AtomicReference<>(); + private File currentDBDir; + // keep a strong ref to latest checkpoint + private IKVSpaceCheckpoint latestCheckpoint; + + @SneakyThrows + RocksDBCPableKVSpace(String id, + Struct conf, + RocksDBCPableKVEngine engine, + Runnable onDestroy, + KVSpaceOpMeters opMeters, + Logger logger, + String... tags) { + super(id, conf, engine, onDestroy, opMeters, logger, tags); + this.engine = engine; + cpRootDir = new File(strVal(conf, DB_CHECKPOINT_ROOT_DIR), id); + checkpoints = Caffeine.newBuilder().weakValues().build(); + writeOptions = new WriteOptions().setDisableWAL(true); + Files.createDirectories(cpRootDir.getAbsoluteFile().toPath()); + metricMgr = new MetricManager(tags); + // Ensure space root exists and initialize current generation directory. + // The legacy layout migration is done at constructor time to keep open() path clean. + Files.createDirectories(spaceRootDir().getAbsoluteFile().toPath()); + initOrMigrateCurrentDBDir(); + } + + @Override + protected WriteOptions writeOptions() { + return writeOptions; + } + + @Override + protected RocksDBCPableKVSpaceEpochHandle handle() { + return active.get(); + } + + @Override + public String checkpoint() { + return metricMgr.checkpointTimer.record(() -> { + synchronized (this) { + IRocksDBKVSpaceCheckpoint cp = doCheckpoint(); + checkpoints.put(cp.cpId(), cp); + latestCheckpoint = cp; + return cp.cpId(); + } + }); + } + + @Override + public Optional openCheckpoint(String checkpointId) { + IRocksDBKVSpaceCheckpoint cp = checkpoints.getIfPresent(checkpointId); + return Optional.ofNullable(cp); + } + + @Override + public IRestoreSession startRestore(IRestoreSession.FlushListener flushListener) { + return new RestoreSession(RestoreMode.Replace, flushListener, logger); + } + + @Override + public IRestoreSession startReceiving(IRestoreSession.FlushListener flushListener) { + return new RestoreSession(RestoreMode.Overlay, flushListener, logger); + } + + @Override + public IKVSpaceMigratableWriter toWriter() { + return new RocksDBKVSpaceMigratableWriter(id, active.get(), engine, writeOptions(), syncContext, + writeStats.newRecorder(), this::publishMetadata, opMeters, logger); + } + + @Override + protected void doClose() { + logger.debug("Flush RocksDBCPableKVSpace[{}] before closing", id); + try (FlushOptions flushOptions = new FlushOptions().setWaitForFlush(true)) { + active.get().db.flush(flushOptions); + } catch (Throwable e) { + logger.error("Flush RocksDBCPableKVSpace[{}] error", id, e); + } + metricMgr.close(); + checkpoints.asMap().forEach((cpId, cp) -> cp.close()); + writeOptions.close(); + RocksDBCPableKVSpaceEpochHandle h = active.get(); + if (h != null) { + h.close(); + } + super.doClose(); + } + + @Override + protected void doDestroy() { + try { + deleteDir(cpRootDir.toPath()); + } catch (IOException e) { + logger.error("Failed to delete checkpoint root dir: {}", cpRootDir, e); + } finally { + super.doDestroy(); + } + } + + @Override + protected void doOpen() { + try { + // Use currentDBDir initialized during construction + this.active.set(newEpochHandle(currentDBDir)); + // cleanup inactive generations at startup + cleanInactiveOnStartup(); + loadLatestCheckpoint(); + super.doOpen(); + } catch (Throwable e) { + throw new KVEngineException("Failed to open CPable KVSpace", e); + } + } + + private RocksDBCPableKVSpaceEpochHandle newEpochHandle(File dir) { + return new RocksDBCPableKVSpaceEpochHandle(id, dir, this.conf, this::isRetired, logger, tags); + } + + private IRocksDBKVSpaceCheckpoint doCheckpoint() { + String cpId = genCheckpointId(); + File cpDir = Paths.get(cpRootDir.getAbsolutePath(), cpId).toFile(); + try { + logger.debug("KVSpace[{}] checkpoint start: checkpointId={}", id, cpId); + RocksDBCPableKVSpaceEpochHandle currentHandle = active.get(); + currentHandle.db.put(currentHandle.cf, LATEST_CP_KEY, cpId.getBytes()); + currentHandle.checkpoint.createCheckpoint(cpDir.toString()); + latestCheckpointId.set(cpId); + return new RocksDBKVSpaceCheckpoint(id, cpId, cpDir, this::isLatest, opMeters, logger); + } catch (Throwable e) { + throw new KVEngineException("Checkpoint key range error", e); + } + } + + @SneakyThrows + private IRocksDBKVSpaceCheckpoint doLoadLatestCheckpoint() { + RocksDBCPableKVSpaceEpochHandle currentHandle = active.get(); + byte[] cpIdBytes = currentHandle.db.get(currentHandle.cf, LATEST_CP_KEY); + if (cpIdBytes != null) { + try { + String cpId = new String(cpIdBytes, UTF_8); + File cpDir = Paths.get(cpRootDir.getAbsolutePath(), cpId).toFile(); + // cleanup obsolete checkpoints + for (String obsoleteId : obsoleteCheckpoints(cpId)) { + try { + cleanCheckpoint(obsoleteId); + } catch (Throwable e) { + logger.error("Clean checkpoint[{}] for kvspace[{}] error", obsoleteId, id, e); + } + } + logger.debug("Load latest checkpoint[{}] of kvspace[{}] in engine[{}] at path[{}]", + cpId, id, engine.id(), cpDir); + latestCheckpointId.set(cpId); + return new RocksDBKVSpaceCheckpoint(id, cpId, cpDir, this::isLatest, opMeters, logger); + } catch (Throwable e) { + logger.warn("Failed to load latest checkpoint, checkpoint now", e); + } + } + return doCheckpoint(); + } + + @SneakyThrows + private void loadLatestCheckpoint() { + IRocksDBKVSpaceCheckpoint checkpoint = doLoadLatestCheckpoint(); + assert !checkpoints.asMap().containsKey(checkpoint.cpId()); + checkpoints.put(checkpoint.cpId(), checkpoint); + latestCheckpoint = checkpoint; + } + + private String genCheckpointId() { + // we need generate global unique checkpoint id, since it will be used in raft snapshot + return UUID.randomUUID() + CP_SUFFIX; + } + + private boolean isLatest(String cpId) { + return cpId.equals(latestCheckpointId.get()); + } + + private File checkpointDir(String cpId) { + return Paths.get(cpRootDir.getAbsolutePath(), cpId).toFile(); + } + + private boolean isRetired(String genId) { + // A generation is retired if it is not the one pointed by ACTIVE_GEN_POINTER + File pointer = new File(spaceRootDir(), ACTIVE_GEN_POINTER); + try { + if (!pointer.exists()) { + // No pointer means no active generation defined; treat all as retired. + return true; + } + String activeUuid = Files.readString(pointer.toPath()).trim(); + return !genId.equals(activeUuid); + } catch (Throwable ignore) { + // On any error, be conservative and treat as retired to avoid leak + return true; + } + } + + private Iterable obsoleteCheckpoints(String skipId) { + File[] cpDirList = cpRootDir.listFiles(); + if (cpDirList == null) { + return Collections.emptyList(); + } + return Arrays.stream(cpDirList) + .filter(File::isDirectory) + .map(File::getName) + .filter(cpId -> !skipId.equals(cpId)) + .collect(Collectors.toList()); + } + + private void cleanCheckpoint(String cpId) { + logger.debug("Delete checkpoint[{}] of kvspace[{}]", cpId, id); + try { + deleteDir(checkpointDir(cpId).toPath()); + } catch (IOException e) { + logger.error("Failed to clean checkpoint[{}] for kvspace[{}] at path:{}", cpId, id, checkpointDir(cpId)); + } + } + + private void switchTo(RocksDBCPableKVSpaceEpochHandle handle) { + syncContext.mutator().run(() -> { + // inactive handle will be closed and cleaned up by cleaner + active.set(handle); + updateCurrentDBDir(handle.dir); + reloadMetadata(); + return true; + }); + } + + @SneakyThrows + private void initOrMigrateCurrentDBDir() { + File spaceRoot = spaceRootDir(); + File pointer = new File(spaceRoot, ACTIVE_GEN_POINTER); + // If pointer exists, honor it; otherwise detect legacy layout and migrate. + if (pointer.exists()) { + try { + String uuid = Files.readString(pointer.toPath()).trim(); + if (!uuid.isEmpty()) { + File dir = new File(spaceRoot, uuid); + if (dir.exists() && dir.isDirectory()) { + currentDBDir = dir; + return; + } + } + } catch (Throwable t) { + // fall through to create a new generation + logger.warn("Failed to read {} for {}, create new generation", ACTIVE_GEN_POINTER, id, t); + } + // pointer invalid or target missing -> create a new generation + File newGen = new File(spaceRoot, UUID.randomUUID().toString()); + Files.createDirectories(newGen.toPath()); + updateCurrentDBDir(newGen); + return; + } + + // No pointer file, check if space root is empty: empty means fresh deploy; non-empty means legacy layout. + String[] children = spaceRoot.list(); + if (children == null || children.length == 0) { + // Fresh deployment, bootstrap new generation + File newGen = new File(spaceRoot, UUID.randomUUID().toString()); + Files.createDirectories(newGen.toPath()); + updateCurrentDBDir(newGen); + return; + } + + // Legacy layout: move all existing contents under a new generation directory, then write pointer file. + File newGen = new File(spaceRoot, UUID.randomUUID().toString()); + Files.createDirectories(newGen.toPath()); + File[] entries = spaceRoot.listFiles(); + if (entries != null) { + for (File entry : entries) { + if (entry.getName().equals(ACTIVE_GEN_POINTER)) { + continue; + } + try { + Path target = new File(newGen, entry.getName()).toPath(); + Files.move(entry.toPath(), target); + } catch (Throwable moveEx) { + logger.warn("Failed to move legacy entry {} for space[{}] by Files.move, fallback to rename", + entry.getAbsolutePath(), id, moveEx); + // Fallback to rename if Files.move fails for any reason + boolean renamed = entry.renameTo(new File(newGen, entry.getName())); + if (!renamed) { + logger.warn("Failed to move legacy entry {} for space[{}]", entry.getAbsolutePath(), id); + } + } + } + } + updateCurrentDBDir(newGen); + } + + private void updateCurrentDBDir(File newDir) { + this.currentDBDir = newDir; + File pointer = new File(spaceRootDir(), ACTIVE_GEN_POINTER); + try { + Files.writeString(pointer.toPath(), newDir.getName()); + } catch (Throwable t) { + logger.warn("Failed to update {} pointer for {}", ACTIVE_GEN_POINTER, id, t); + } + } + + @SneakyThrows + private void cleanInactiveOnStartup() { + File pointer = new File(spaceRootDir(), ACTIVE_GEN_POINTER); + if (!pointer.exists()) { + return; + } + String activeUuid; + try { + activeUuid = Files.readString(pointer.toPath()).trim(); + } catch (Throwable t) { + return; + } + if (activeUuid.isEmpty()) { + return; + } + File root = spaceRootDir(); + File[] children = root.listFiles(File::isDirectory); + if (children != null) { + for (File c : children) { + if (!c.getName().equals(activeUuid)) { + deleteDir(c.toPath()); + } + } + } + File[] files = root.listFiles(File::isFile); + if (files != null) { + for (File f : files) { + if (!f.getName().equals(ACTIVE_GEN_POINTER)) { + Files.deleteIfExists(f.toPath()); + } + } + } + } + + @Override + public IKVSpaceRefreshableReader reader() { + return new RocksDBKVSpaceReader(id, opMeters, logger, syncContext.refresher(), this::handle, + this::currentMetadata, new IteratorOptions(true, 0)); + } + + private class RestoreSession implements IRestoreSession { + private final File stagingDir; + private final RocksDBKVSpaceWriterHelper helper; + private final AdaptiveWriteBudget adaptiveWriteBudget = new AdaptiveWriteBudget(); + private final IRestoreSession.FlushListener flushListener; + private final Logger logger; + private final AtomicBoolean closed = new AtomicBoolean(false); + private final RocksDBCPableKVSpaceEpochHandle stagingHandle; + private int ops = 0; + private long bytes = 0; + private long batchStartNanos = -1; + + RestoreSession(RestoreMode mode, FlushListener flushListener, Logger logger) { + this.flushListener = flushListener; + this.logger = logger; + try { + stagingDir = Paths.get(spaceRootDir().getAbsolutePath(), UUID.randomUUID().toString()).toFile(); + if (mode == RestoreMode.Overlay) { + active.get().checkpoint.createCheckpoint(stagingDir.toString()); + } else { + Files.createDirectories(stagingDir.toPath()); + } + stagingHandle = new RocksDBCPableKVSpaceEpochHandle(id, stagingDir, RocksDBCPableKVSpace.this.conf, + RocksDBCPableKVSpace.this::isRetired, logger, tags); + helper = new RocksDBKVSpaceWriterHelper(stagingHandle.db, writeOptions); + } catch (Throwable t) { + throw new KVEngineException("Begin restore failed", t); + } + } + + private void ensureOpen() { + if (closed.get()) { + throw new IllegalStateException("Restore session already closed"); + } + } + + private void flushIfNeeded() { + if (adaptiveWriteBudget.shouldFlush(ops, bytes)) { + long start = batchStartNanos > 0 ? batchStartNanos : System.nanoTime(); + helper.flush(); + long latencyMillis = Math.max(1L, TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start)); + adaptiveWriteBudget.recordFlush(ops, bytes, latencyMillis); + flushListener.onFlush(ops, bytes); + ops = 0; + bytes = 0; + batchStartNanos = -1; + } + } + + @Override + public IRestoreSession put(ByteString key, ByteString value) { + ensureOpen(); + try { + helper.put(stagingHandle.cf, key, value); + if (ops == 0 && bytes == 0) { + batchStartNanos = System.nanoTime(); + } + ops++; + bytes += key.size() + value.size(); + flushIfNeeded(); + return this; + } catch (RocksDBException e) { + throw new KVEngineException("Restore put failed", e); + } + } + + @Override + public IRestoreSession metadata(ByteString metaKey, ByteString metaValue) { + ensureOpen(); + try { + helper.metadata(stagingHandle.cf, metaKey, metaValue); + return this; + } catch (RocksDBException e) { + throw new KVEngineException("Restore metadata failed", e); + } + } + + @Override + public void done() { + if (closed.compareAndSet(false, true)) { + try { + // Flush remaining data in current batch if any + if (ops > 0 || bytes > 0) { + long start = batchStartNanos > 0 ? batchStartNanos : System.nanoTime(); + helper.flush(); + long latencyMillis = Math.max(1L, TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start)); + adaptiveWriteBudget.recordFlush(ops, bytes, latencyMillis); + ops = 0; + bytes = 0; + batchStartNanos = -1; + } + helper.done(); + // switch active to staging + switchTo(stagingHandle); + } catch (Throwable t) { + throw new KVEngineException("Restore done failed", t); + } + } + } + + @Override + public void abort() { + if (closed.compareAndSet(false, true)) { + try { + helper.abort(); + } catch (Throwable t) { + logger.warn("Abort restore session failed", t); + } + try { + stagingHandle.close(); + } catch (Throwable t) { + logger.warn("Close staging RocksDB failed", t); + } + try { + deleteDir(stagingDir.toPath()); + } catch (Throwable t) { + logger.warn("Delete staging dir failed: {}", stagingDir, t); + } + } + } + + @Override + public int count() { + return helper.count(); + } + } + + private class MetricManager { + private final Gauge checkpointGauge; // hold a strong reference + private final Timer checkpointTimer; + + MetricManager(String... metricTags) { + Tags tags = Tags.of(metricTags); + checkpointGauge = getGauge(id, GeneralKVSpaceMetric.CheckpointNumGauge, checkpoints::estimatedSize, tags); + checkpointTimer = KVSpaceMeters.getTimer(id, RocksDBKVSpaceMetric.CheckpointTimer, tags); + } + + void close() { + checkpointGauge.close(); + checkpointTimer.close(); + } + } +} diff --git a/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVSpaceEpochHandle.java b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVSpaceEpochHandle.java new file mode 100644 index 000000000..faa5436fd --- /dev/null +++ b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVSpaceEpochHandle.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine.rocksdb; + +import static org.apache.bifromq.basekv.localengine.IKVEngine.DEFAULT_NS; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBOptionsUtil.buildCPableCFDesc; + +import com.google.protobuf.Struct; +import io.micrometer.core.instrument.Tags; +import java.io.File; +import java.lang.ref.Cleaner; +import java.util.function.Predicate; +import org.apache.bifromq.baseenv.EnvProvider; +import org.rocksdb.ColumnFamilyDescriptor; +import org.rocksdb.DBOptions; +import org.slf4j.Logger; + +class RocksDBCPableKVSpaceEpochHandle extends RocksDBKVSpaceEpochHandle { + private static final Cleaner CLEANER = Cleaner.create( + EnvProvider.INSTANCE.newThreadFactory("kvspace-epoch-cleaner", true)); + private final SpaceMetrics metrics; + private final Cleaner.Cleanable cleanable; + + RocksDBCPableKVSpaceEpochHandle(String id, + File dir, + Struct conf, + Predicate isRetired, + Logger logger, + Tags tags) { + super(dir, conf, logger); + this.metrics = new SpaceMetrics(id, db, dbOptions, cf, cfDesc.getOptions(), + tags.and("gen", dir.getName()), logger); + cleanable = CLEANER.register(this, new ClosableResources(id, dir.getName(), dbOptions, cfDesc, cf, db, + checkpoint, dir, isRetired, metrics, logger)); + } + + @Override + public void close() { + cleanable.clean(); + } + + @Override + protected DBOptions buildDBOptions(Struct conf) { + return RocksDBOptionsUtil.buildCPableDBOption(conf); + } + + @Override + protected ColumnFamilyDescriptor buildCFDescriptor(Struct conf) { + return buildCPableCFDesc(DEFAULT_NS, conf); + } +} diff --git a/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBDefaultConfigs.java b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBDefaultConfigs.java new file mode 100644 index 000000000..4f3356f2f --- /dev/null +++ b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBDefaultConfigs.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine.rocksdb; + +import static org.apache.bifromq.basekv.localengine.StructUtil.toValue; + +import com.google.protobuf.Struct; +import org.apache.bifromq.baseenv.EnvProvider; +import org.rocksdb.StatsLevel; +import org.rocksdb.util.SizeUnit; + +/** + * Default configuration constants for RocksDB engines. + */ +public final class RocksDBDefaultConfigs { + public static final String DB_ROOT_DIR = "dbRootDir"; + public static final String DB_CHECKPOINT_ROOT_DIR = "dbCheckpointRootDir"; + public static final String ENABLE_STATS = "enableStats"; + public static final String STATS_LEVEL = "statsLevel"; + public static final String MANUAL_COMPACTION = "manualCompaction"; + public static final String COMPACT_MIN_TOMBSTONE_KEYS = "compactMinTombstoneKeys"; + public static final String COMPACT_MIN_TOMBSTONE_RANGES = "compactMinTombstoneRanges"; + public static final String COMPACT_TOMBSTONE_RATIO = "compactTombstoneRatio"; + public static final String BLOCK_CACHE_SIZE = "blockCacheSize"; + public static final String WRITE_BUFFER_SIZE = "writeBufferSize"; + public static final String MAX_WRITE_BUFFER_NUMBER = "maxWriteBufferNumber"; + public static final String MIN_WRITE_BUFFER_NUMBER_TO_MERGE = "minWriteBufferNumberToMerge"; + public static final String MIN_BLOB_SIZE = "minBlobSize"; + public static final String INCREASE_PARALLELISM = "increaseParallelism"; + public static final String MAX_BACKGROUND_JOBS = "maxBackgroundJobs"; + public static final String LEVEL0_FILE_NUM_COMPACTION_TRIGGER = "level0FileNumCompactionTrigger"; + public static final String LEVEL0_SLOWDOWN_WRITES_TRIGGER = "level0SlowdownWritesTrigger"; + public static final String LEVEL0_STOP_WRITES_TRIGGER = "level0StopWritesTrigger"; + public static final String MAX_BYTES_FOR_LEVEL_BASE = "maxBytesForLevelBase"; + public static final String TARGET_FILE_SIZE_BASE = "targetFileSizeBase"; + public static final String ASYNC_WAL_FLUSH = "asyncWALFlush"; + public static final String FSYNC_WAL = "fsyncWAL"; + public static final Struct CP; + public static final Struct WAL; + + static { + // Build CP with all defaults + Struct.Builder configBuilder = Struct.newBuilder(); + configBuilder.putFields(DB_ROOT_DIR, toValue("")); + configBuilder.putFields(ENABLE_STATS, toValue(false)); + configBuilder.putFields(STATS_LEVEL, toValue(StatsLevel.EXCEPT_DETAILED_TIMERS.name())); + configBuilder.putFields(MANUAL_COMPACTION, toValue(true)); + configBuilder.putFields(COMPACT_MIN_TOMBSTONE_KEYS, toValue(200000)); + configBuilder.putFields(COMPACT_MIN_TOMBSTONE_RANGES, toValue(100000)); + configBuilder.putFields(COMPACT_TOMBSTONE_RATIO, toValue(0.3)); + configBuilder.putFields(BLOCK_CACHE_SIZE, toValue(32 * SizeUnit.MB)); + configBuilder.putFields(WRITE_BUFFER_SIZE, toValue(128 * SizeUnit.MB)); + configBuilder.putFields(MAX_WRITE_BUFFER_NUMBER, toValue(6)); + configBuilder.putFields(MIN_WRITE_BUFFER_NUMBER_TO_MERGE, toValue(2)); + configBuilder.putFields(MIN_BLOB_SIZE, toValue(2 * SizeUnit.KB)); + configBuilder.putFields(INCREASE_PARALLELISM, + toValue(Math.max(EnvProvider.INSTANCE.availableProcessors() / 4, 2))); + configBuilder.putFields(MAX_BACKGROUND_JOBS, + toValue(Math.max(EnvProvider.INSTANCE.availableProcessors() / 4, 2))); + configBuilder.putFields(LEVEL0_FILE_NUM_COMPACTION_TRIGGER, toValue(8)); + configBuilder.putFields(LEVEL0_SLOWDOWN_WRITES_TRIGGER, toValue(20)); + configBuilder.putFields(LEVEL0_STOP_WRITES_TRIGGER, toValue(24)); + configBuilder.putFields(MAX_BYTES_FOR_LEVEL_BASE, toValue(128 * 2 * 8 * SizeUnit.MB)); + configBuilder.putFields(TARGET_FILE_SIZE_BASE, toValue(128 * 2 * SizeUnit.MB)); + Struct sharedConfig = configBuilder.build(); + CP = sharedConfig.toBuilder() + .putFields(DB_CHECKPOINT_ROOT_DIR, toValue("")) + .build(); + + // Build WAL based on CP + WAL = sharedConfig.toBuilder() + .putFields(ASYNC_WAL_FLUSH, toValue(true)) + .putFields(FSYNC_WAL, toValue(false)) + .build(); + } +} diff --git a/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBHelper.java b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBHelper.java new file mode 100644 index 000000000..3d2b012cc --- /dev/null +++ b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBHelper.java @@ -0,0 +1,126 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine.rocksdb; + +import static com.google.protobuf.UnsafeByteOperations.unsafeWrap; +import static java.util.Collections.singletonList; +import static org.apache.bifromq.basekv.localengine.rocksdb.Keys.DATA_SECTION_END; +import static org.apache.bifromq.basekv.localengine.rocksdb.Keys.DATA_SECTION_START; +import static org.apache.bifromq.basekv.localengine.rocksdb.Keys.META_SECTION_END; +import static org.apache.bifromq.basekv.localengine.rocksdb.Keys.META_SECTION_START; +import static org.apache.bifromq.basekv.localengine.rocksdb.Keys.fromMetaKey; +import static org.apache.bifromq.basekv.localengine.rocksdb.Keys.toDataKey; +import static org.apache.bifromq.basekv.utils.BoundaryUtil.compare; +import static org.rocksdb.SizeApproximationFlag.INCLUDE_FILES; +import static org.rocksdb.SizeApproximationFlag.INCLUDE_MEMTABLES; + +import com.google.protobuf.ByteString; +import java.io.File; +import java.io.IOException; +import java.nio.file.FileVisitResult; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.SimpleFileVisitor; +import java.nio.file.attribute.BasicFileAttributes; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import org.apache.bifromq.basekv.localengine.KVEngineException; +import org.apache.bifromq.basekv.proto.Boundary; +import org.rocksdb.ColumnFamilyDescriptor; +import org.rocksdb.ColumnFamilyHandle; +import org.rocksdb.DBOptions; +import org.rocksdb.Range; +import org.rocksdb.RocksDB; +import org.rocksdb.Slice; + +class RocksDBHelper { + static RocksDBHandle openDBInDir(File dir, DBOptions dbOptions, ColumnFamilyDescriptor cfDesc) { + try { + List cfHandles = new ArrayList<>(); + RocksDB db = RocksDB.open(dbOptions, dir.getAbsolutePath(), Collections.singletonList(cfDesc), cfHandles); + assert cfHandles.size() == 1; + ColumnFamilyHandle cf = cfHandles.get(0); + return new RocksDBHandle(db, cf); + } catch (Throwable e) { + throw new KVEngineException("Open RocksDB at dir failed", e); + } + } + + static void deleteDir(Path path) throws IOException { + if (!path.toFile().exists()) { + return; + } + Files.walkFileTree(path, new SimpleFileVisitor<>() { + @Override + public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) { + try { + Files.delete(file); + } catch (IOException e) { + // do nothing + } + return FileVisitResult.CONTINUE; + } + + @Override + public FileVisitResult postVisitDirectory(Path dir, IOException exc) { + try { + Files.delete(dir); + } catch (IOException e) { + // do nothing + } + return FileVisitResult.CONTINUE; + } + }); + } + + static long sizeOfBoundary(IRocksDBKVSpaceEpoch dbHandle, Boundary boundary) { + byte[] start = + !boundary.hasStartKey() ? DATA_SECTION_START : toDataKey(boundary.getStartKey().toByteArray()); + byte[] end = + !boundary.hasEndKey() ? DATA_SECTION_END : toDataKey(boundary.getEndKey().toByteArray()); + if (compare(start, end) < 0) { + try (Slice startSlice = new Slice(start); Slice endSlice = new Slice(end)) { + Range range = new Range(startSlice, endSlice); + return dbHandle.db() + .getApproximateSizes(dbHandle.cf(), singletonList(range), INCLUDE_MEMTABLES, INCLUDE_FILES)[0]; + } + } + return 0; + } + + static Map getMetadata(IRocksDBKVSpaceEpoch dbHandle) { + try (RocksDBKVEngineIterator metaItr = new RocksDBKVEngineIterator(dbHandle.db(), dbHandle.cf(), + null, + META_SECTION_START, + META_SECTION_END)) { + Map metaMap = new HashMap<>(); + for (metaItr.seekToFirst(); metaItr.isValid(); metaItr.next()) { + metaMap.put(fromMetaKey(metaItr.key()), unsafeWrap(metaItr.value())); + } + return metaMap; + } + } + + record RocksDBHandle(RocksDB db, ColumnFamilyHandle cf) { + } +} diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVEngine.java b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVEngine.java similarity index 87% rename from base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVEngine.java rename to base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVEngine.java index f308963ae..0cd1190aa 100644 --- a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVEngine.java +++ b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVEngine.java @@ -19,9 +19,11 @@ package org.apache.bifromq.basekv.localengine.rocksdb; -import org.apache.bifromq.basekv.localengine.AbstractKVEngine; -import org.apache.bifromq.basekv.localengine.KVEngineException; +import static org.apache.bifromq.basekv.localengine.StructUtil.strVal; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.DB_ROOT_DIR; + import com.google.common.base.Strings; +import com.google.protobuf.Struct; import io.micrometer.core.instrument.Gauge; import io.micrometer.core.instrument.Metrics; import io.micrometer.core.instrument.Tags; @@ -33,20 +35,26 @@ import java.nio.file.StandardOpenOption; import java.util.UUID; import java.util.stream.Stream; +import org.apache.bifromq.basekv.localengine.AbstractKVEngine; +import org.apache.bifromq.basekv.localengine.KVEngineException; +import org.rocksdb.RocksDB; + +abstract class RocksDBKVEngine extends AbstractKVEngine { + + public static final String IDENTITY_FILE = "IDENTITY"; + + static { + RocksDB.loadLibrary(); + } -abstract class RocksDBKVEngine< - E extends RocksDBKVEngine, - T extends RocksDBKVSpace, - C extends RocksDBKVEngineConfigurator - > extends AbstractKVEngine { private final File dbRootDir; private final String identity; private final boolean isCreate; private MetricManager metricManager; - public RocksDBKVEngine(String overrideIdentity, C configurator) { - super(overrideIdentity, configurator); - dbRootDir = new File(configurator.dbRootDir()); + public RocksDBKVEngine(String overrideIdentity, Struct confRoot) { + super(overrideIdentity, confRoot); + dbRootDir = new File(strVal(engineConf, DB_ROOT_DIR)); try { Files.createDirectories(dbRootDir.getAbsoluteFile().toPath()); isCreate = isEmpty(dbRootDir.toPath()); @@ -103,7 +111,7 @@ private void loadExisting(String... metricTags) { private String loadIdentity(boolean create) { try { - Path identityFilePath = Paths.get(dbRootDir.getAbsolutePath(), "IDENTITY"); + Path identityFilePath = Paths.get(dbRootDir.getAbsolutePath(), IDENTITY_FILE); if (create) { String identity = Strings.isNullOrEmpty(overrideIdentity) ? UUID.randomUUID().toString() : overrideIdentity.trim(); diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVEngineIterator.java b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVEngineIterator.java similarity index 86% rename from base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVEngineIterator.java rename to base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVEngineIterator.java index cb4eb1c08..be15b6afb 100644 --- a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVEngineIterator.java +++ b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVEngineIterator.java @@ -19,7 +19,6 @@ package org.apache.bifromq.basekv.localengine.rocksdb; -import java.lang.ref.Cleaner; import org.apache.bifromq.basekv.localengine.KVEngineException; import org.rocksdb.ColumnFamilyHandle; import org.rocksdb.ReadOptions; @@ -29,14 +28,10 @@ import org.rocksdb.Snapshot; class RocksDBKVEngineIterator implements AutoCloseable { - private static final Cleaner CLEANER = Cleaner.create(); private final RocksIterator rocksIterator; - private final Cleaner.Cleanable onClose; + private final Runnable onClose; - RocksDBKVEngineIterator(RocksDB db, - ColumnFamilyHandle cfHandle, - Snapshot snapshot, - byte[] startKey, + RocksDBKVEngineIterator(RocksDB db, ColumnFamilyHandle cfHandle, Snapshot snapshot, byte[] startKey, byte[] endKey) { this(db, cfHandle, snapshot, startKey, endKey, true); } @@ -62,8 +57,7 @@ class RocksDBKVEngineIterator implements AutoCloseable { readOptions.setSnapshot(snapshot); } rocksIterator = db.newIterator(cfHandle, readOptions); - onClose = CLEANER.register(this, new NativeState(rocksIterator, readOptions, lowerSlice, upperSlice)); - + onClose = new NativeState(rocksIterator, readOptions, lowerSlice, upperSlice); } public byte[] key() { @@ -102,9 +96,9 @@ public void seekForPrev(byte[] target) { rocksIterator.seekForPrev(target); } - public void refresh() { + public void refresh(Snapshot snapshot) { try { - rocksIterator.refresh(); + rocksIterator.refresh(snapshot); } catch (Throwable e) { throw new KVEngineException("Unable to refresh iterator", e); } @@ -112,7 +106,7 @@ public void refresh() { @Override public void close() { - onClose.clean(); + onClose.run(); } private record NativeState(RocksIterator itr, ReadOptions readOptions, Slice lowerSlice, Slice upperSlice) diff --git a/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVEngineProvider.java b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVEngineProvider.java new file mode 100644 index 000000000..3722d6a5a --- /dev/null +++ b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVEngineProvider.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine.rocksdb; + +import com.google.protobuf.Struct; +import org.apache.bifromq.basekv.localengine.ICPableKVSpace; +import org.apache.bifromq.basekv.localengine.IKVEngine; +import org.apache.bifromq.basekv.localengine.IWALableKVSpace; +import org.apache.bifromq.basekv.localengine.spi.IKVEngineProvider; + +/** + * Provider for RocksDB engine implementation. + */ +public class RocksDBKVEngineProvider implements IKVEngineProvider { + @Override + public Struct defaultsForCPable() { + return RocksDBDefaultConfigs.CP; + } + + @Override + public Struct defaultsForWALable() { + return RocksDBDefaultConfigs.WAL; + } + + @Override + public String type() { + return "rocksdb"; + } + + @Override + public IKVEngine createCPable(String overrideIdentity, Struct conf) { + return new RocksDBCPableKVEngine(overrideIdentity, conf); + } + + @Override + public IKVEngine createWALable(String overrideIdentity, Struct conf) { + return new RocksDBWALableKVEngine(overrideIdentity, conf); + } +} diff --git a/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpace.java b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpace.java new file mode 100644 index 000000000..adae463e9 --- /dev/null +++ b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpace.java @@ -0,0 +1,214 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine.rocksdb; + +import static org.apache.bifromq.basekv.localengine.StructUtil.boolVal; +import static org.apache.bifromq.basekv.localengine.StructUtil.numVal; +import static org.apache.bifromq.basekv.localengine.StructUtil.strVal; +import static org.apache.bifromq.basekv.localengine.metrics.KVSpaceMeters.getCounter; +import static org.apache.bifromq.basekv.localengine.metrics.KVSpaceMeters.getTimer; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.COMPACT_MIN_TOMBSTONE_KEYS; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.COMPACT_MIN_TOMBSTONE_RANGES; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.COMPACT_TOMBSTONE_RATIO; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.DB_ROOT_DIR; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.MANUAL_COMPACTION; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBHelper.deleteDir; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBHelper.getMetadata; + +import com.google.common.collect.Maps; +import com.google.protobuf.ByteString; +import com.google.protobuf.Struct; +import io.micrometer.core.instrument.Counter; +import io.micrometer.core.instrument.Metrics; +import io.micrometer.core.instrument.Tags; +import io.micrometer.core.instrument.Timer; +import io.micrometer.core.instrument.binder.jvm.ExecutorServiceMetrics; +import java.io.File; +import java.io.IOException; +import java.util.Collections; +import java.util.Map; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import lombok.SneakyThrows; +import org.apache.bifromq.baseenv.EnvProvider; +import org.apache.bifromq.basekv.localengine.AbstractKVSpace; +import org.apache.bifromq.basekv.localengine.ISyncContext; +import org.apache.bifromq.basekv.localengine.SyncContext; +import org.apache.bifromq.basekv.localengine.metrics.KVSpaceOpMeters; +import org.apache.bifromq.basekv.localengine.rocksdb.metrics.RocksDBKVSpaceMetric; +import org.apache.bifromq.basekv.proto.Boundary; +import org.rocksdb.CompactRangeOptions; +import org.rocksdb.WriteOptions; +import org.slf4j.Logger; + +abstract class RocksDBKVSpace extends AbstractKVSpace { + + protected final Struct conf; + protected final RocksDBKVEngine engine; + protected final ISyncContext syncContext; + protected final IWriteStatsRecorder writeStats; + private final File keySpaceDBDir; + private final ExecutorService compactionExecutor; + private final AtomicBoolean compacting; + private final ISyncContext.IRefresher metadataRefresher; + private SpaceMetrics spaceMetrics; + private volatile long lastCompactAt; + private volatile long nextCompactAt; + + @SneakyThrows + public RocksDBKVSpace(String id, + Struct conf, + RocksDBKVEngine engine, + Runnable onDestroy, + KVSpaceOpMeters opMeters, + Logger logger, + String... tags) { + super(id, onDestroy, opMeters, logger, tags); + this.conf = conf; + this.engine = engine; + syncContext = new SyncContext(); + metadataRefresher = syncContext.refresher(); + compacting = new AtomicBoolean(false); + compactionExecutor = ExecutorServiceMetrics.monitor(Metrics.globalRegistry, new ThreadPoolExecutor(1, 1, + 0L, TimeUnit.MILLISECONDS, new LinkedBlockingQueue<>(), + EnvProvider.INSTANCE.newThreadFactory("kvspace-compactor-" + id)), + "compactor", "kvspace", Tags.of(tags)); + if (boolVal(conf, MANUAL_COMPACTION)) { + int minKeys = (int) numVal(conf, COMPACT_MIN_TOMBSTONE_KEYS); + int minRanges = (int) numVal(conf, COMPACT_MIN_TOMBSTONE_RANGES); + double ratio = numVal(conf, COMPACT_TOMBSTONE_RATIO); + this.writeStats = new RocksDBKVSpaceCompactionTrigger(id, minKeys, minRanges, ratio, this::scheduleCompact, + tags); + } else { + this.writeStats = NoopWriteStatsRecorder.INSTANCE; + } + keySpaceDBDir = new File(strVal(conf, DB_ROOT_DIR), id); + } + + @Override + protected void doOpen() { + spaceMetrics = new SpaceMetrics(tags); + reloadMetadata(); + } + + // Load metadata from DB and publish, without refresher gating + protected void reloadMetadata() { + updateMetadata(getMetadata(handle())); + } + + protected void publishMetadata(Map metadataUpdates) { + if (metadataUpdates.isEmpty()) { + return; + } + metadataRefresher.runIfNeeded((genBumped) -> { + Map metaMap = Maps.newHashMap(currentMetadata()); + metaMap.putAll(metadataUpdates); + updateMetadata(Collections.unmodifiableMap(metaMap)); + }); + } + + @Override + protected void doClose() { + logger.debug("Close key range[{}]", id); + if (spaceMetrics != null) { + spaceMetrics.close(); + } + } + + @Override + protected void doDestroy() { + // Destroy the whole space root directory, including pointer file and all generations. + try { + if (keySpaceDBDir.exists()) { + deleteDir(keySpaceDBDir.toPath()); + } + } catch (IOException e) { + logger.error("Failed to delete space root dir: {}", keySpaceDBDir, e); + } + } + + protected File spaceRootDir() { + return keySpaceDBDir; + } + + protected abstract RocksDBKVSpaceEpochHandle handle(); + + protected abstract WriteOptions writeOptions(); + + private void scheduleCompact() { + if (state() != State.Opening) { + return; + } + spaceMetrics.compactionSchedCounter.increment(); + if (compacting.compareAndSet(false, true)) { + compactionExecutor.execute(spaceMetrics.compactionTimer.wrap(() -> { + logger.debug("KeyRange[{}] compaction start", id); + lastCompactAt = System.nanoTime(); + writeStats.reset(); + try (CompactRangeOptions options = new CompactRangeOptions() + .setBottommostLevelCompaction(CompactRangeOptions.BottommostLevelCompaction.kSkip) + .setExclusiveManualCompaction(false)) { + synchronized (compacting) { + if (state() == State.Opening) { + IRocksDBKVSpaceEpoch handle = handle(); + handle.db().compactRange(handle.cf(), null, null, options); + } + } + logger.debug("KeyRange[{}] compacted", id); + } catch (Throwable e) { + logger.error("KeyRange[{}] compaction error", id, e); + } finally { + compacting.set(false); + if (nextCompactAt > lastCompactAt) { + scheduleCompact(); + } + } + })); + } else { + nextCompactAt = System.nanoTime(); + } + } + + @Override + protected long doSize(Boundary boundary) { + if (state() != State.Opening) { + return 0; + } + return RocksDBHelper.sizeOfBoundary(handle(), boundary); + } + + private class SpaceMetrics { + private final Counter compactionSchedCounter; + private final Timer compactionTimer; + + SpaceMetrics(Tags metricTags) { + compactionSchedCounter = getCounter(id, RocksDBKVSpaceMetric.ManualCompactionCounter, metricTags); + compactionTimer = getTimer(id, RocksDBKVSpaceMetric.ManualCompactionTimer, metricTags); + } + + void close() { + compactionSchedCounter.close(); + compactionTimer.close(); + } + } +} diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpaceCheckpoint.java b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpaceCheckpoint.java similarity index 66% rename from base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpaceCheckpoint.java rename to base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpaceCheckpoint.java index dbdb9c5ec..83b6e395e 100644 --- a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpaceCheckpoint.java +++ b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpaceCheckpoint.java @@ -19,10 +19,9 @@ package org.apache.bifromq.basekv.localengine.rocksdb; -import static com.google.protobuf.UnsafeByteOperations.unsafeWrap; import static org.apache.bifromq.basekv.localengine.IKVEngine.DEFAULT_NS; -import static org.apache.bifromq.basekv.localengine.rocksdb.Keys.toMetaKey; -import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBKVSpace.deleteDir; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBHelper.deleteDir; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBHelper.getMetadata; import com.google.protobuf.ByteString; import java.io.File; @@ -30,14 +29,11 @@ import java.lang.ref.Cleaner; import java.util.ArrayList; import java.util.List; -import java.util.Optional; +import java.util.Map; import java.util.function.Predicate; -import java.util.function.Supplier; -import org.apache.bifromq.basekv.localengine.IKVSpaceIterator; -import org.apache.bifromq.basekv.localengine.ISyncContext; +import org.apache.bifromq.basekv.localengine.IKVSpaceReader; import org.apache.bifromq.basekv.localengine.KVEngineException; import org.apache.bifromq.basekv.localengine.metrics.KVSpaceOpMeters; -import org.apache.bifromq.basekv.proto.Boundary; import org.rocksdb.BlockBasedTableConfig; import org.rocksdb.ColumnFamilyDescriptor; import org.rocksdb.ColumnFamilyHandle; @@ -46,14 +42,17 @@ import org.rocksdb.RocksDBException; import org.slf4j.Logger; -class RocksDBKVSpaceCheckpoint extends RocksDBKVSpaceReader implements IRocksDBKVSpaceCheckpoint { +class RocksDBKVSpaceCheckpoint implements IRocksDBKVSpaceCheckpoint { private static final Cleaner CLEANER = Cleaner.create(); + private final String id; + private final KVSpaceOpMeters opMeters; + private final Logger logger; private final String cpId; private final DBOptions dbOptions; - private final RocksDB roDB; private final ColumnFamilyDescriptor cfDesc; - private final ColumnFamilyHandle cfHandle; private final Cleaner.Cleanable cleanable; + private final IRocksDBKVSpaceEpoch handle; + private final Map metadata; RocksDBKVSpaceCheckpoint(String id, String cpId, @@ -61,7 +60,9 @@ class RocksDBKVSpaceCheckpoint extends RocksDBKVSpaceReader implements IRocksDBK Predicate isLatest, KVSpaceOpMeters opMeters, Logger logger) { - super(id, opMeters, logger); + this.id = id; + this.opMeters = opMeters; + this.logger = logger; this.cpId = cpId; try { cfDesc = new ColumnFamilyDescriptor(DEFAULT_NS.getBytes()); @@ -73,9 +74,20 @@ class RocksDBKVSpaceCheckpoint extends RocksDBKVSpaceReader implements IRocksDBK List cfDescs = List.of(cfDesc); List handles = new ArrayList<>(); - roDB = RocksDB.openReadOnly(dbOptions, cpDir.getAbsolutePath(), cfDescs, handles); + RocksDB roDB = RocksDB.openReadOnly(dbOptions, cpDir.getAbsolutePath(), cfDescs, handles); - cfHandle = handles.get(0); + ColumnFamilyHandle cfHandle = handles.get(0); + handle = new IRocksDBKVSpaceEpoch() { + @Override + public RocksDB db() { + return roDB; + } + + @Override + public ColumnFamilyHandle cf() { + return cfHandle; + } + }; cleanable = CLEANER.register(this, new ClosableResources(id, cpId, cpDir, @@ -85,6 +97,7 @@ class RocksDBKVSpaceCheckpoint extends RocksDBKVSpaceReader implements IRocksDBK dbOptions, isLatest, this.logger)); + metadata = getMetadata(handle); } catch (RocksDBException e) { throw new KVEngineException("Failed to open checkpoint", e); } @@ -97,16 +110,8 @@ public String cpId() { } @Override - protected Optional doMetadata(ByteString metaKey) { - try { - byte[] valBytes = roDB.get(cfHandle(), toMetaKey(metaKey)); - if (valBytes == null) { - return Optional.empty(); - } - return Optional.of(unsafeWrap(valBytes)); - } catch (RocksDBException e) { - throw new KVEngineException("Failed to read metadata", e); - } + public IKVSpaceReader newReader() { + return new RocksDBKVSpaceCheckpointReader(id, opMeters, logger, this, handle, metadata); } @Override @@ -114,42 +119,6 @@ public void close() { cleanable.clean(); } - @Override - protected RocksDB db() { - return roDB; - } - - @Override - protected ColumnFamilyHandle cfHandle() { - return cfHandle; - } - - @Override - protected ISyncContext.IRefresher newRefresher() { - return new ISyncContext.IRefresher() { - - @Override - public void runIfNeeded(Runnable runnable) { - // no need to do any refresh, since it's readonly - } - - @Override - public T call(Supplier supplier) { - return supplier.get(); - } - }; - } - - @Override - protected IKVSpaceIterator doNewIterator() { - return new RocksDBKVSpaceIterator(db(), cfHandle(), null, Boundary.getDefaultInstance(), newRefresher(), false); - } - - @Override - protected IKVSpaceIterator doNewIterator(Boundary subBoundary) { - return new RocksDBKVSpaceIterator(db(), cfHandle(), null, subBoundary, newRefresher(), false); - } - private record ClosableResources( String id, String cpId, diff --git a/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpaceCheckpointReader.java b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpaceCheckpointReader.java new file mode 100644 index 000000000..9c689a848 --- /dev/null +++ b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpaceCheckpointReader.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine.rocksdb; + +import com.google.protobuf.ByteString; +import java.util.Map; +import java.util.Optional; +import org.apache.bifromq.basekv.localengine.IKVSpaceIterator; +import org.apache.bifromq.basekv.localengine.metrics.KVSpaceOpMeters; +import org.apache.bifromq.basekv.proto.Boundary; +import org.slf4j.Logger; + +class RocksDBKVSpaceCheckpointReader extends AbstractRocksDBKVSpaceReader { + private final IRocksDBKVSpaceEpoch checkpoint; + private final Map metadata; + // Hold a strong reference to prevent premature GC of checkpoint + private RocksDBKVSpaceCheckpoint strongRef; + + RocksDBKVSpaceCheckpointReader(String id, + KVSpaceOpMeters opMeters, + Logger logger, + RocksDBKVSpaceCheckpoint strongRef, + IRocksDBKVSpaceEpoch checkpoint, + Map metadata) { + super(id, opMeters, logger); + // keep strong reference during reader lifetime + this.strongRef = strongRef; + this.checkpoint = checkpoint; + this.metadata = metadata; + } + + @Override + protected IRocksDBKVSpaceEpoch handle() { + return checkpoint; + } + + @Override + protected RocksDBSnapshot snapshot() { + return new RocksDBSnapshot(this.handle(), null); + } + + @Override + protected Optional doMetadata(ByteString metaKey) { + return Optional.ofNullable(metadata.get(metaKey)); + } + + @Override + protected IKVSpaceIterator doNewIterator(Boundary subBoundary) { + return new RocksDBKVSpaceIterator(snapshot(), subBoundary, new IteratorOptions(false, 52428)); + } + + @Override + public void close() { + // release strong reference to allow GC of checkpoint + strongRef = null; + } +} diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpaceCompactionTrigger.java b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpaceCompactionTrigger.java similarity index 100% rename from base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpaceCompactionTrigger.java rename to base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpaceCompactionTrigger.java index fe157e6e4..09c4ac2d4 100644 --- a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpaceCompactionTrigger.java +++ b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpaceCompactionTrigger.java @@ -19,11 +19,11 @@ package org.apache.bifromq.basekv.localengine.rocksdb; -import org.apache.bifromq.basekv.localengine.metrics.KVSpaceMeters; -import org.apache.bifromq.basekv.localengine.rocksdb.metrics.RocksDBKVSpaceMetric; import io.micrometer.core.instrument.Gauge; import io.micrometer.core.instrument.Tags; import java.util.concurrent.atomic.AtomicInteger; +import org.apache.bifromq.basekv.localengine.metrics.KVSpaceMeters; +import org.apache.bifromq.basekv.localengine.rocksdb.metrics.RocksDBKVSpaceMetric; class RocksDBKVSpaceCompactionTrigger implements IWriteStatsRecorder { private final AtomicInteger totalKeyCount = new AtomicInteger(); diff --git a/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpaceEpochHandle.java b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpaceEpochHandle.java new file mode 100644 index 000000000..c0a9afde8 --- /dev/null +++ b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpaceEpochHandle.java @@ -0,0 +1,401 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine.rocksdb; + +import static org.apache.bifromq.basekv.localengine.metrics.KVSpaceMeters.getFunctionCounter; +import static org.apache.bifromq.basekv.localengine.metrics.KVSpaceMeters.getFunctionTimer; +import static org.apache.bifromq.basekv.localengine.metrics.KVSpaceMeters.getGauge; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBHelper.deleteDir; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBHelper.openDBInDir; + +import com.google.protobuf.Struct; +import io.micrometer.core.instrument.FunctionCounter; +import io.micrometer.core.instrument.FunctionTimer; +import io.micrometer.core.instrument.Gauge; +import io.micrometer.core.instrument.Tags; +import java.io.File; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.locks.ReentrantReadWriteLock; +import java.util.function.Function; +import java.util.function.Predicate; +import org.apache.bifromq.basekv.localengine.rocksdb.metrics.RocksDBKVSpaceMetric; +import org.rocksdb.BlockBasedTableConfig; +import org.rocksdb.Checkpoint; +import org.rocksdb.ColumnFamilyDescriptor; +import org.rocksdb.ColumnFamilyHandle; +import org.rocksdb.ColumnFamilyOptions; +import org.rocksdb.DBOptions; +import org.rocksdb.HistogramType; +import org.rocksdb.RocksDB; +import org.rocksdb.RocksDBException; +import org.rocksdb.Statistics; +import org.rocksdb.TickerType; +import org.slf4j.Logger; + +abstract class RocksDBKVSpaceEpochHandle implements IRocksDBKVSpaceEpochHandle { + protected final Logger logger; + final DBOptions dbOptions; + final ColumnFamilyDescriptor cfDesc; + final RocksDB db; + final ColumnFamilyHandle cf; + final File dir; + final Checkpoint checkpoint; + + RocksDBKVSpaceEpochHandle(File dir, Struct conf, Logger logger) { + this.dbOptions = buildDBOptions(conf); + this.cfDesc = buildCFDescriptor(conf); + RocksDBHelper.RocksDBHandle dbHandle = openDBInDir(dir, dbOptions, cfDesc); + this.db = dbHandle.db(); + this.cf = dbHandle.cf(); + this.dir = dir; + this.checkpoint = Checkpoint.create(db()); + this.logger = logger; + } + + @Override + public RocksDB db() { + return db; + } + + @Override + public ColumnFamilyHandle cf() { + return cf; + } + + protected abstract DBOptions buildDBOptions(Struct conf); + + protected abstract ColumnFamilyDescriptor buildCFDescriptor(Struct conf); + + protected record ClosableResources(String id, + String genId, + DBOptions dbOptions, + ColumnFamilyDescriptor cfDesc, + ColumnFamilyHandle cfHandle, + RocksDB db, + Checkpoint checkpoint, + File dir, + Predicate isRetired, + SpaceMetrics metrics, + Logger log) implements Runnable { + @Override + public void run() { + // Ensure no metric suppliers call into RocksDB during close + try (AutoCloseable guard = metrics.beginClose()) { + metrics.close(); + log.debug("Clean up generation[{}] of kvspace[{}]", genId, id); + // Close checkpoint before DB resources + checkpoint.close(); + try { + db.destroyColumnFamilyHandle(cfHandle); + } catch (Throwable e) { + log.error("Failed to destroy column family handle of generation[{}] for kvspace[{}]", genId, id, e); + } + try { + db.close(); + } catch (Throwable e) { + log.error("Failed to close RocksDB of generation[{}] for kvspace[{}]", genId, id, e); + } + cfDesc.getOptions().close(); + dbOptions.close(); + if (isRetired.test(genId)) { + log.debug("delete retired generation[{}] of kvspace[{}] in path: {}", genId, id, + dir.getAbsolutePath()); + try { + deleteDir(dir.toPath()); + } catch (Throwable e) { + log.error("Failed to clean retired generation at path:{}", dir, e); + } + } + } catch (Exception ignored) { + // ignore + } + } + } + + protected static class SpaceMetrics { + private final ReentrantReadWriteLock rw = new ReentrantReadWriteLock(); + private final Logger logger; + private final Gauge blockCacheSizeGauge; + private final Gauge tableReaderSizeGauge; + private final Gauge memTableSizeGauges; + private final Gauge pinedMemorySizeGauge; + private final Gauge totalSSTFileSizeGauge; + private final Gauge liveSSTFileSizeGauge; + private final Gauge liveDataSizeGauge; + private final Gauge estimateNumKeysGauge; + private final Gauge pendingCompactionBytesGauge; + private final Gauge numRunningCompactionsGauge; + private final Gauge numRunningFlushesGauge; + private final Gauge compactionPendingGauge; + private final Gauge memtableFlushPendingGauge; + private final Gauge backgroundErrorsGauge; + private final FunctionCounter bytesReadCounter; + private final FunctionCounter bytesWrittenCounter; + private final FunctionCounter blockCacheHitCounter; + private final FunctionCounter blockCacheMissCounter; + private final FunctionCounter blobCacheHitCounter; + private final FunctionCounter blobCacheMissCounter; + private final FunctionCounter bloomUsefulCounter; + private final FunctionTimer getLatencyTimer; + private final FunctionTimer writeLatencyTimer; + private final FunctionTimer seekLatencyTimer; + private final FunctionTimer blobGetLatencyTimer; + private final FunctionTimer blobWriteLatencyTimer; + private final FunctionTimer sstReadLatencyTimer; + private final FunctionTimer sstWriteLatencyTimer; + private final FunctionTimer flushLatencyTimer; + private final FunctionTimer compactionLatencyTimer; + private final FunctionTimer writeStallTimer; + private final Statistics statistics; + private volatile boolean closed = false; + + SpaceMetrics(String id, + RocksDB db, + DBOptions dbOptions, + ColumnFamilyHandle cfHandle, + ColumnFamilyOptions cfOptions, + Tags metricTags, + Logger logger) { + this.logger = logger; + this.statistics = dbOptions.statistics(); + blockCacheSizeGauge = getGauge(id, RocksDBKVSpaceMetric.BlockCache, () -> { + BlockBasedTableConfig cfg = (BlockBasedTableConfig) cfOptions.tableFormatConfig(); + if (!cfg.noBlockCache()) { + return safeGet(() -> db.getLongProperty(cfHandle, "rocksdb.block-cache-usage")); + } + return 0L; + }, metricTags); + tableReaderSizeGauge = getGauge(id, RocksDBKVSpaceMetric.TableReader, + () -> safeGet(() -> db.getLongProperty(cfHandle, "rocksdb.estimate-table-readers-mem")), metricTags); + memTableSizeGauges = getGauge(id, RocksDBKVSpaceMetric.MemTable, + () -> safeGet(() -> db.getLongProperty(cfHandle, "rocksdb.cur-size-all-mem-tables")), metricTags); + pinedMemorySizeGauge = getGauge(id, RocksDBKVSpaceMetric.PinnedMem, () -> { + BlockBasedTableConfig cfg = (BlockBasedTableConfig) cfOptions.tableFormatConfig(); + if (!cfg.noBlockCache()) { + return safeGet(() -> db.getLongProperty(cfHandle, "rocksdb.block-cache-pinned-usage")); + } + return 0L; + }, metricTags); + totalSSTFileSizeGauge = getGauge(id, RocksDBKVSpaceMetric.StateTotalSSTSize, + () -> safeGet(() -> db.getLongProperty(cfHandle, "rocksdb.total-sst-files-size")), metricTags); + liveSSTFileSizeGauge = getGauge(id, RocksDBKVSpaceMetric.StateLiveSSTSize, + () -> safeGet(() -> db.getLongProperty(cfHandle, "rocksdb.live-sst-files-size")), metricTags); + liveDataSizeGauge = getGauge(id, RocksDBKVSpaceMetric.StateLiveDataSize, + () -> safeGet(() -> db.getLongProperty(cfHandle, "rocksdb.estimate-live-data-size")), metricTags); + estimateNumKeysGauge = getGauge(id, RocksDBKVSpaceMetric.StateEstimateNumKeys, + () -> safeGet(() -> db.getLongProperty(cfHandle, "rocksdb.estimate-num-keys")), metricTags); + pendingCompactionBytesGauge = getGauge(id, RocksDBKVSpaceMetric.StatePendingCompactionBytes, + () -> safeGet(() -> db.getLongProperty(cfHandle, "rocksdb.estimate-pending-compaction-bytes")), + metricTags); + numRunningCompactionsGauge = getGauge(id, RocksDBKVSpaceMetric.StateRunningCompactions, + () -> safeGet(() -> db.getLongProperty(cfHandle, "rocksdb.num-running-compactions")), metricTags); + numRunningFlushesGauge = getGauge(id, RocksDBKVSpaceMetric.StateRunningFlushes, + () -> safeGet(() -> db.getLongProperty(cfHandle, "rocksdb.num-running-flushes")), metricTags); + compactionPendingGauge = getGauge(id, RocksDBKVSpaceMetric.StateCompactionPending, + () -> safeGet(() -> db.getLongProperty(cfHandle, "rocksdb.compaction-pending")), metricTags); + memtableFlushPendingGauge = getGauge(id, RocksDBKVSpaceMetric.StateMemTableFlushPending, + () -> safeGet(() -> db.getLongProperty(cfHandle, "rocksdb.mem-table-flush-pending")), metricTags); + backgroundErrorsGauge = getGauge(id, RocksDBKVSpaceMetric.StateBackgroundErrors, + () -> safeGet(() -> db.getLongProperty(cfHandle, "rocksdb.background-errors")), metricTags); + + // Statistics-based meters + if (statistics != null) { + bytesReadCounter = getFunctionCounter(id, RocksDBKVSpaceMetric.IOBytesReadCounter, statistics, + stats -> safeGetDouble(stats, (s) -> (double) s.getTickerCount(TickerType.BYTES_READ)), + metricTags); + bytesWrittenCounter = getFunctionCounter(id, RocksDBKVSpaceMetric.IOBytesWrittenCounter, statistics, + stats -> safeGetDouble(stats, (s) -> (double) stats.getTickerCount(TickerType.BYTES_WRITTEN)), + metricTags); + blockCacheHitCounter = getFunctionCounter(id, RocksDBKVSpaceMetric.BlockCacheHitCounter, statistics, + stats -> safeGetDouble(stats, (s) -> (double) stats.getTickerCount(TickerType.BLOCK_CACHE_HIT)), + metricTags); + blockCacheMissCounter = getFunctionCounter(id, RocksDBKVSpaceMetric.BlockCacheMissCounter, statistics, + stats -> safeGetDouble(stats, (s) -> (double) stats.getTickerCount(TickerType.BLOCK_CACHE_MISS)), + metricTags); + blobCacheHitCounter = getFunctionCounter(id, RocksDBKVSpaceMetric.BlobDBCacheHitCounter, statistics, + stats -> safeGetDouble(stats, (s) -> (double) stats.getTickerCount(TickerType.BLOB_DB_CACHE_HIT)), + metricTags); + blobCacheMissCounter = getFunctionCounter(id, RocksDBKVSpaceMetric.BlobDBCacheMissCounter, statistics, + stats -> safeGetDouble(stats, (s) -> (double) stats.getTickerCount(TickerType.BLOB_DB_CACHE_MISS)), + metricTags); + bloomUsefulCounter = getFunctionCounter(id, RocksDBKVSpaceMetric.BloomUsefulCounter, statistics, + stats -> safeGetDouble(stats, (s) -> (double) stats.getTickerCount(TickerType.BLOOM_FILTER_USEFUL)), + metricTags); + + getLatencyTimer = getFunctionTimer(id, RocksDBKVSpaceMetric.GetLatency, statistics, + stats -> safeGetLong(stats, s -> s.getHistogramData(HistogramType.DB_GET).getCount()), + stats -> safeGetLong(stats, s -> s.getHistogramData(HistogramType.DB_GET).getSum()), + TimeUnit.MICROSECONDS, + metricTags); + writeLatencyTimer = getFunctionTimer(id, RocksDBKVSpaceMetric.WriteLatency, statistics, + stats -> safeGetLong(stats, s -> s.getHistogramData(HistogramType.DB_WRITE).getCount()), + stats -> safeGetLong(stats, s -> s.getHistogramData(HistogramType.DB_WRITE).getSum()), + TimeUnit.MICROSECONDS, + metricTags); + seekLatencyTimer = getFunctionTimer(id, RocksDBKVSpaceMetric.SeekLatency, statistics, + stats -> safeGetLong(stats, s -> s.getHistogramData(HistogramType.DB_SEEK).getCount()), + stats -> safeGetLong(stats, s -> s.getHistogramData(HistogramType.DB_SEEK).getSum()), + TimeUnit.MICROSECONDS, + metricTags); + blobGetLatencyTimer = getFunctionTimer(id, RocksDBKVSpaceMetric.GetLatency, statistics, + stats -> safeGetLong(stats, s -> s.getHistogramData(HistogramType.BLOB_DB_GET_MICROS).getCount()), + stats -> safeGetLong(stats, s -> s.getHistogramData(HistogramType.BLOB_DB_GET_MICROS).getSum()), + TimeUnit.MICROSECONDS, + metricTags); + blobWriteLatencyTimer = getFunctionTimer(id, RocksDBKVSpaceMetric.WriteLatency, statistics, + stats -> safeGetLong(stats, s -> s.getHistogramData(HistogramType.BLOB_DB_WRITE_MICROS).getCount()), + stats -> safeGetLong(stats, s -> s.getHistogramData(HistogramType.BLOB_DB_WRITE_MICROS).getSum()), + TimeUnit.MICROSECONDS, + metricTags); + sstReadLatencyTimer = getFunctionTimer(id, RocksDBKVSpaceMetric.SSTReadLatency, statistics, + stats -> safeGetLong(stats, s -> s.getHistogramData(HistogramType.SST_READ_MICROS).getCount()), + stats -> safeGetLong(stats, s -> s.getHistogramData(HistogramType.SST_READ_MICROS).getSum()), + TimeUnit.MICROSECONDS, + metricTags); + sstWriteLatencyTimer = getFunctionTimer(id, RocksDBKVSpaceMetric.SSTWriteLatency, statistics, + stats -> safeGetLong(stats, s -> s.getHistogramData(HistogramType.SST_WRITE_MICROS).getCount()), + stats -> safeGetLong(stats, s -> s.getHistogramData(HistogramType.SST_WRITE_MICROS).getSum()), + TimeUnit.MICROSECONDS, + metricTags); + flushLatencyTimer = getFunctionTimer(id, RocksDBKVSpaceMetric.FlushLatency, statistics, + stats -> safeGetLong(stats, s -> s.getHistogramData(HistogramType.FLUSH_TIME).getCount()), + stats -> safeGetLong(stats, s -> s.getHistogramData(HistogramType.FLUSH_TIME).getSum()), + TimeUnit.MICROSECONDS, + metricTags); + compactionLatencyTimer = getFunctionTimer(id, RocksDBKVSpaceMetric.CompactionLatency, statistics, + stats -> safeGetLong(stats, s -> s.getHistogramData(HistogramType.COMPACTION_TIME).getCount()), + stats -> safeGetLong(stats, s -> s.getHistogramData(HistogramType.COMPACTION_TIME).getSum()), + TimeUnit.MICROSECONDS, + metricTags); + writeStallTimer = getFunctionTimer(id, RocksDBKVSpaceMetric.WriteStall, statistics, + stats -> safeGetLong(stats, s -> s.getHistogramData(HistogramType.WRITE_STALL).getCount()), + stats -> safeGetLong(stats, s -> s.getHistogramData(HistogramType.WRITE_STALL).getSum()), + TimeUnit.MICROSECONDS, + metricTags); + } else { + bytesReadCounter = null; + bytesWrittenCounter = null; + blockCacheHitCounter = null; + blockCacheMissCounter = null; + blobCacheHitCounter = null; + blobCacheMissCounter = null; + bloomUsefulCounter = null; + getLatencyTimer = null; + writeLatencyTimer = null; + seekLatencyTimer = null; + blobGetLatencyTimer = null; + blobWriteLatencyTimer = null; + sstReadLatencyTimer = null; + sstWriteLatencyTimer = null; + flushLatencyTimer = null; + compactionLatencyTimer = null; + writeStallTimer = null; + } + } + + private double safeGetDouble(T obj, Function func) { + return safeGet(obj, func, 0d); + } + + private long safeGetLong(T obj, Function func) { + return safeGet(obj, func, 0L); + } + + private R safeGet(T obj, Function func, R defVal) { + ReentrantReadWriteLock.ReadLock rl = rw.readLock(); + rl.lock(); + try { + if (closed) { + return defVal; + } + return func.apply(obj); + } catch (Throwable t) { + logger.warn("Unable to read RocksDB metric", t); + return defVal; + } finally { + rl.unlock(); + } + } + + private long safeGet(RocksDBLongGetter action) { + ReentrantReadWriteLock.ReadLock rl = rw.readLock(); + rl.lock(); + try { + if (closed) { + return 0L; + } + return action.get(); + } catch (Throwable t) { + logger.warn("Unable to read RocksDB metric", t); + return 0L; + } finally { + rl.unlock(); + } + } + + AutoCloseable beginClose() { + ReentrantReadWriteLock.WriteLock wl = rw.writeLock(); + wl.lock(); + closed = true; + return wl::unlock; + } + + void close() { + blockCacheSizeGauge.close(); + memTableSizeGauges.close(); + tableReaderSizeGauge.close(); + pinedMemorySizeGauge.close(); + totalSSTFileSizeGauge.close(); + liveSSTFileSizeGauge.close(); + liveDataSizeGauge.close(); + estimateNumKeysGauge.close(); + pendingCompactionBytesGauge.close(); + numRunningCompactionsGauge.close(); + numRunningFlushesGauge.close(); + compactionPendingGauge.close(); + memtableFlushPendingGauge.close(); + backgroundErrorsGauge.close(); + if (statistics != null) { + bytesReadCounter.close(); + bytesWrittenCounter.close(); + blockCacheHitCounter.close(); + blockCacheMissCounter.close(); + blobCacheHitCounter.close(); + blobCacheMissCounter.close(); + bloomUsefulCounter.close(); + getLatencyTimer.close(); + writeLatencyTimer.close(); + seekLatencyTimer.close(); + blobGetLatencyTimer.close(); + blobWriteLatencyTimer.close(); + sstReadLatencyTimer.close(); + sstWriteLatencyTimer.close(); + flushLatencyTimer.close(); + compactionLatencyTimer.close(); + writeStallTimer.close(); + statistics.close(); + } + } + + interface RocksDBLongGetter { + long get() throws RocksDBException; + } + } +} diff --git a/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpaceIterator.java b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpaceIterator.java new file mode 100644 index 000000000..2b8c472da --- /dev/null +++ b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpaceIterator.java @@ -0,0 +1,205 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine.rocksdb; + +import static com.google.protobuf.UnsafeByteOperations.unsafeWrap; +import static org.apache.bifromq.basekv.localengine.rocksdb.Keys.DATA_SECTION_END; +import static org.apache.bifromq.basekv.localengine.rocksdb.Keys.DATA_SECTION_START; +import static org.apache.bifromq.basekv.localengine.rocksdb.Keys.fromDataKey; +import static org.apache.bifromq.basekv.localengine.rocksdb.Keys.toDataKey; +import static org.apache.bifromq.basekv.utils.BoundaryUtil.endKeyBytes; +import static org.apache.bifromq.basekv.utils.BoundaryUtil.startKeyBytes; + +import com.google.protobuf.ByteString; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; +import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.basekv.localengine.IKVSpaceIterator; +import org.apache.bifromq.basekv.localengine.KVEngineException; +import org.apache.bifromq.basekv.proto.Boundary; +import org.rocksdb.ReadOptions; +import org.rocksdb.RocksIterator; +import org.rocksdb.Slice; + +@Slf4j +class RocksDBKVSpaceIterator implements IKVSpaceIterator { + private final byte[] startKey; + private final byte[] endKey; + private final IteratorOptions options; + private final AtomicReference rocksItrHolder = new AtomicReference<>(); + private final AtomicBoolean closed = new AtomicBoolean(); + private final CloseListener closeListener; + + public RocksDBKVSpaceIterator(RocksDBSnapshot snapshot, Boundary boundary, IteratorOptions options) { + this(snapshot, boundary, itr -> { + }, options); + } + + public RocksDBKVSpaceIterator(RocksDBSnapshot snapshot, + Boundary boundary, + CloseListener closeListener, + IteratorOptions options) { + byte[] boundaryStartKey = startKeyBytes(boundary); + byte[] boundaryEndKey = endKeyBytes(boundary); + startKey = boundaryStartKey != null ? toDataKey(boundaryStartKey) : DATA_SECTION_START; + endKey = boundaryEndKey != null ? toDataKey(boundaryEndKey) : DATA_SECTION_END; + this.options = options; + this.closeListener = closeListener; + refresh(snapshot); + } + + @Override + public ByteString key() { + return fromDataKey(rocksItrHolder.get().rocksIterator.key()); + } + + @Override + public ByteString value() { + return unsafeWrap(rocksItrHolder.get().rocksIterator.value()); + } + + @Override + public boolean isValid() { + return rocksItrHolder.get().rocksIterator.isValid(); + } + + @Override + public void next() { + rocksItrHolder.get().rocksIterator.next(); + } + + @Override + public void prev() { + rocksItrHolder.get().rocksIterator.prev(); + } + + @Override + public void seekToFirst() { + rocksItrHolder.get().rocksIterator.seekToFirst(); + } + + @Override + public void seekToLast() { + rocksItrHolder.get().rocksIterator.seekToLast(); + } + + @Override + public void seek(ByteString target) { + rocksItrHolder.get().rocksIterator.seek(toDataKey(target)); + } + + @Override + public void seekForPrev(ByteString target) { + rocksItrHolder.get().rocksIterator.seekForPrev(toDataKey(target)); + } + + public void refresh(RocksDBSnapshot snapshot) { + if (closed.get()) { + return; + } + try { + RocksDBItrHolder rocksItrHolder = this.rocksItrHolder.get(); + if (rocksItrHolder == null) { + this.rocksItrHolder.set(build(snapshot)); + } else if (rocksItrHolder.epoch == snapshot.epoch()) { + // same epoch, just refresh the snapshot + rocksItrHolder.rocksIterator.refresh(snapshot.snapshot()); + } else { + this.rocksItrHolder.set(build(snapshot)); + rocksItrHolder.close(); + } + } catch (Throwable e) { + throw new KVEngineException("Unable to refresh iterator", e); + } + } + + @Override + public void close() { + if (closed.compareAndSet(false, true)) { + try { + RocksDBItrHolder rocksItrHolder = this.rocksItrHolder.getAndSet(null); + if (rocksItrHolder != null) { + rocksItrHolder.close(); + } + } finally { + closeListener.onClose(this); + } + } + } + + private RocksDBItrHolder build(RocksDBSnapshot snapshot) { + ReadOptions readOptions = new ReadOptions() + .setPinData(true) + .setFillCache(options.fillCache()) + .setReadaheadSize(options.readAheadSize()) + .setAutoPrefixMode(true); + Slice lowerSlice = new Slice(startKey); + readOptions.setIterateLowerBound(lowerSlice); + Slice upperSlice = new Slice(endKey); + readOptions.setIterateUpperBound(upperSlice); + if (snapshot != null) { + readOptions.setSnapshot(snapshot.snapshot()); + } + RocksIterator rocksItr = snapshot.epoch().db().newIterator(snapshot.epoch().cf(), readOptions); + return new RocksDBItrHolder(snapshot.epoch(), rocksItr, readOptions, lowerSlice, upperSlice); + } + + interface CloseListener { + void onClose(RocksDBKVSpaceIterator itr); + } + + /** + * Holder for RocksDB iterator and its resources; close is idempotent. + */ + private static final class RocksDBItrHolder { + final IRocksDBKVSpaceEpoch epoch; + final RocksIterator rocksIterator; + final ReadOptions readOptions; + final Slice lowerSlice; + final Slice upperSlice; + private final AtomicBoolean closed = new AtomicBoolean(); + + RocksDBItrHolder(IRocksDBKVSpaceEpoch epoch, + RocksIterator rocksIterator, + ReadOptions readOptions, + Slice lowerSlice, + Slice upperSlice) { + this.epoch = epoch; + this.rocksIterator = rocksIterator; + this.readOptions = readOptions; + this.lowerSlice = lowerSlice; + this.upperSlice = upperSlice; + } + + public void close() { + // Ensure native resources are freed exactly once + if (closed.compareAndSet(false, true)) { + rocksIterator.close(); + readOptions.close(); + if (lowerSlice != null) { + lowerSlice.close(); + } + if (upperSlice != null) { + upperSlice.close(); + } + } + } + } +} diff --git a/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpaceMigratableWriter.java b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpaceMigratableWriter.java new file mode 100644 index 000000000..8c961eb76 --- /dev/null +++ b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpaceMigratableWriter.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine.rocksdb; + +import com.google.protobuf.ByteString; +import java.util.Map; +import java.util.function.Consumer; +import org.apache.bifromq.basekv.localengine.IKVSpaceIterator; +import org.apache.bifromq.basekv.localengine.IKVSpaceMigratableWriter; +import org.apache.bifromq.basekv.localengine.IRestoreSession; +import org.apache.bifromq.basekv.localengine.ISyncContext; +import org.apache.bifromq.basekv.localengine.KVEngineException; +import org.apache.bifromq.basekv.localengine.metrics.KVSpaceOpMeters; +import org.apache.bifromq.basekv.proto.Boundary; +import org.rocksdb.WriteOptions; +import org.slf4j.Logger; + +class RocksDBKVSpaceMigratableWriter extends RocksDBKVSpaceWriter + implements IKVSpaceMigratableWriter { + + RocksDBKVSpaceMigratableWriter(String id, + IRocksDBKVSpaceEpochHandle dbHandle, + RocksDBKVEngine engine, + WriteOptions writeOptions, + ISyncContext syncContext, + IWriteStatsRecorder.IRecorder writeStatsRecorder, + Consumer> afterWrite, + KVSpaceOpMeters opMeters, + Logger logger) { + super(id, dbHandle, engine, writeOptions, syncContext, writeStatsRecorder, afterWrite, opMeters, logger); + } + + @Override + public IRestoreSession migrateTo(String targetSpaceId, Boundary boundary) { + try { + RocksDBCPableKVSpace targetKVSpace = (RocksDBCPableKVSpace) engine.createIfMissing(targetSpaceId); + IRestoreSession targetSpaceRestoreSession = targetKVSpace.startRestore((count, bytes) -> + logger.debug("Migrate {} kv to space[{}] from space[{}]: startKey={}, endKey={}", + count, targetSpaceId, id, boundary.getStartKey().toStringUtf8(), + boundary.getEndKey().toStringUtf8())); + try (IKVSpaceIterator itr = new RocksDBKVSpaceIterator(new RocksDBSnapshot(dbHandle, null), boundary, + new IteratorOptions(false, 52428))) { + for (itr.seekToFirst(); itr.isValid(); itr.next()) { + targetSpaceRestoreSession.put(itr.key(), itr.value()); + } + } + // clear moved data in left range + helper.clear(dbHandle.cf(), boundary); + return targetSpaceRestoreSession; + } catch (Throwable e) { + throw new KVEngineException("Delete range in batch failed", e); + } + } +} diff --git a/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpaceReader.java b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpaceReader.java new file mode 100644 index 000000000..2e710bde3 --- /dev/null +++ b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpaceReader.java @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine.rocksdb; + +import static org.apache.bifromq.basekv.utils.BoundaryUtil.isValid; + +import com.google.common.collect.Sets; +import com.google.protobuf.ByteString; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.atomic.AtomicReference; +import java.util.function.Supplier; +import org.apache.bifromq.basekv.localengine.IKVSpaceIterator; +import org.apache.bifromq.basekv.localengine.IKVSpaceRefreshableReader; +import org.apache.bifromq.basekv.localengine.ISyncContext; +import org.apache.bifromq.basekv.localengine.metrics.KVSpaceOpMeters; +import org.apache.bifromq.basekv.proto.Boundary; +import org.slf4j.Logger; + +class RocksDBKVSpaceReader extends AbstractRocksDBKVSpaceReader implements IKVSpaceRefreshableReader { + private final ISyncContext.IRefresher refresher; + private final Supplier dbSupplier; + private final Supplier> metadataSupplier; + private final AtomicReference snapshot = new AtomicReference<>(); + private final Set openedIterators = Sets.newConcurrentHashSet(); + private final IteratorOptions iteratorOptions; + + RocksDBKVSpaceReader(String id, + KVSpaceOpMeters opMeters, + Logger logger, + ISyncContext.IRefresher refresher, + Supplier dbSupplier, + Supplier> metadataSupplier, + IteratorOptions iteratorOptions) { + super(id, opMeters, logger); + this.refresher = refresher; + this.dbSupplier = dbSupplier; + this.metadataSupplier = metadataSupplier; + this.snapshot.set(RocksDBSnapshot.take(dbSupplier.get())); + this.iteratorOptions = iteratorOptions; + } + + @Override + public void refresh() { + refresher.runIfNeeded((genBumped) -> { + snapshot.getAndSet(RocksDBSnapshot.take(dbSupplier.get())).release(); + openedIterators.forEach(itr -> itr.refresh(snapshot.get())); + }); + } + + @Override + public void close() { + openedIterators.forEach(RocksDBKVSpaceIterator::close); + RocksDBSnapshot oldSnapshot = snapshot.getAndSet(null); + oldSnapshot.release(); + } + + @Override + protected IRocksDBKVSpaceEpoch handle() { + return dbSupplier.get(); + } + + @Override + protected RocksDBSnapshot snapshot() { + return snapshot.get(); + } + + @Override + protected Optional doMetadata(ByteString metaKey) { + return refresher.call(() -> { + Map metaMap = metadataSupplier.get(); + return Optional.ofNullable(metaMap.get(metaKey)); + }); + } + + @Override + protected IKVSpaceIterator doNewIterator(Boundary subBoundary) { + assert isValid(subBoundary); + RocksDBKVSpaceIterator itr = new RocksDBKVSpaceIterator(snapshot(), subBoundary, + openedIterators::remove, iteratorOptions); + openedIterators.add(itr); + return itr; + } +} diff --git a/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpaceWriter.java b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpaceWriter.java new file mode 100644 index 000000000..a907d8c3f --- /dev/null +++ b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpaceWriter.java @@ -0,0 +1,163 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine.rocksdb; + +import com.google.protobuf.ByteString; +import java.util.Map; +import java.util.function.Consumer; +import org.apache.bifromq.basekv.localengine.IKVSpaceWriter; +import org.apache.bifromq.basekv.localengine.ISyncContext; +import org.apache.bifromq.basekv.localengine.KVEngineException; +import org.apache.bifromq.basekv.localengine.metrics.KVSpaceOpMeters; +import org.apache.bifromq.basekv.proto.Boundary; +import org.rocksdb.RocksDBException; +import org.rocksdb.WriteOptions; +import org.slf4j.Logger; + +class RocksDBKVSpaceWriter implements IKVSpaceWriter { + protected final String id; + protected final KVSpaceOpMeters opMeters; + protected final Logger logger; + protected final RocksDBKVEngine engine; + protected final RocksDBKVSpaceWriterHelper helper; + protected final IRocksDBKVSpaceEpochHandle dbHandle; + private final IWriteStatsRecorder.IRecorder writeStatsRecorder; + + RocksDBKVSpaceWriter(String id, + IRocksDBKVSpaceEpochHandle dbHandle, + RocksDBKVEngine engine, + WriteOptions writeOptions, + ISyncContext syncContext, + IWriteStatsRecorder.IRecorder writeStatsRecorder, + Consumer> afterWrite, + KVSpaceOpMeters opMeters, + Logger logger) { + this(id, dbHandle, engine, syncContext, new RocksDBKVSpaceWriterHelper(dbHandle.db(), writeOptions), + writeStatsRecorder, afterWrite, opMeters, logger); + } + + private RocksDBKVSpaceWriter(String id, + IRocksDBKVSpaceEpochHandle dbHandle, + RocksDBKVEngine engine, + ISyncContext syncContext, + RocksDBKVSpaceWriterHelper writerHelper, + IWriteStatsRecorder.IRecorder writeStatsRecorder, + Consumer> afterWrite, + KVSpaceOpMeters opMeters, + Logger logger) { + this.id = id; + this.opMeters = opMeters; + this.logger = logger; + this.dbHandle = dbHandle; + this.engine = engine; + this.helper = writerHelper; + this.writeStatsRecorder = writeStatsRecorder; + writerHelper.addMutator(syncContext.mutator()); + writerHelper.addAfterWriteCallback(dbHandle.cf(), afterWrite); + } + + @Override + public String id() { + return id; + } + + @Override + public IKVSpaceWriter metadata(ByteString metaKey, ByteString metaValue) { + try { + helper.metadata(dbHandle.cf(), metaKey, metaValue); + return this; + } catch (RocksDBException e) { + throw new KVEngineException("Put in batch failed", e); + } + } + + @Override + public IKVSpaceWriter insert(ByteString key, ByteString value) { + try { + helper.insert(dbHandle.cf(), key, value); + writeStatsRecorder.recordInsert(); + return this; + } catch (RocksDBException e) { + throw new KVEngineException("Insert in batch failed", e); + } + } + + @Override + public IKVSpaceWriter put(ByteString key, ByteString value) { + try { + helper.put(dbHandle.cf(), key, value); + writeStatsRecorder.recordPut(); + return this; + } catch (RocksDBException e) { + throw new KVEngineException("Put in batch failed", e); + } + } + + @Override + public IKVSpaceWriter delete(ByteString key) { + try { + helper.delete(dbHandle.cf(), key); + writeStatsRecorder.recordDelete(); + return this; + } catch (RocksDBException e) { + throw new KVEngineException("Single delete in batch failed", e); + } + } + + @Override + public IKVSpaceWriter clear() { + return clear(Boundary.getDefaultInstance()); + } + + @Override + public IKVSpaceWriter clear(Boundary boundary) { + try { + helper.clear(dbHandle.cf(), boundary); + writeStatsRecorder.recordDeleteRange(); + } catch (Throwable e) { + throw new KVEngineException("Delete range in batch failed", e); + } + return this; + } + + @Override + public void done() { + opMeters.batchWriteCallTimer.record(() -> { + try { + opMeters.writeBatchSizeSummary.record(helper.count()); + helper.done(); + writeStatsRecorder.stop(); + } catch (Throwable e) { + logger.error("Write Batch commit failed", e); + throw new KVEngineException("Batch commit failed", e); + } + }); + } + + @Override + public void abort() { + helper.abort(); + } + + @Override + public int count() { + return helper.count(); + } +} diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpaceWriterHelper.java b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpaceWriterHelper.java similarity index 87% rename from base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpaceWriterHelper.java rename to base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpaceWriterHelper.java index ab39b7fdd..f486057d4 100644 --- a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpaceWriterHelper.java +++ b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpaceWriterHelper.java @@ -57,7 +57,7 @@ class RocksDBKVSpaceWriterHelper { } - void addMutators(ISyncContext.IMutator mutator) { + void addMutator(ISyncContext.IMutator mutator) { mutators.add(mutator); } @@ -102,19 +102,14 @@ void flush() { if (batch.count() == 0) { return; } - if (hasPendingMetadata()) { - throw new IllegalStateException("Flush is not allowed when metadata changes exist"); - } - runInMutators(() -> { - try { - if (batch.count() > 0) { - db.write(writeOptions, batch); - batch.clear(); - } - } catch (Throwable e) { - throw new KVEngineException("Range write error", e); + try { + if (batch.count() > 0) { + db.write(writeOptions, batch); + batch.clear(); } - }); + } catch (Throwable e) { + throw new KVEngineException("Range write error", e); + } } void done() { @@ -131,6 +126,7 @@ void done() { batch.close(); } } + return false; }); for (ColumnFamilyHandle columnFamilyHandle : afterWriteCallbacks.keySet()) { Map updatedMetadata = metadataChanges.get(columnFamilyHandle); @@ -156,20 +152,20 @@ boolean hasPendingMetadata() { return metadataChanges.values().stream().anyMatch(map -> !map.isEmpty()); } - private void runInMutators(Runnable runnable) { + private void runInMutators(ISyncContext.IMutation mutation) { if (mutators.isEmpty()) { - runnable.run(); + mutation.mutate(); return; } - AtomicReference finalRun = new AtomicReference<>(); + AtomicReference finalRun = new AtomicReference<>(); for (ISyncContext.IMutator mutator : mutators) { if (finalRun.get() == null) { - finalRun.set(() -> mutator.run(runnable)); + finalRun.set(() -> mutator.run(mutation)); } else { - Runnable innerRun = finalRun.get(); + ISyncContext.IMutation innerRun = finalRun.get(); finalRun.set(() -> mutator.run(innerRun)); } } - finalRun.get().run(); + finalRun.get().mutate(); } } diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVEngineConfigurator.java b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBOptionsUtil.java similarity index 55% rename from base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVEngineConfigurator.java rename to base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBOptionsUtil.java index 6860e7820..96a5550c4 100644 --- a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVEngineConfigurator.java +++ b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBOptionsUtil.java @@ -14,109 +14,67 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.localengine.rocksdb; +import static org.apache.bifromq.basekv.localengine.StructUtil.boolVal; +import static org.apache.bifromq.basekv.localengine.StructUtil.numVal; +import static org.apache.bifromq.basekv.localengine.StructUtil.strVal; import static org.apache.bifromq.basekv.localengine.rocksdb.AutoCleaner.autoRelease; -import static java.lang.Math.max; - -import org.apache.bifromq.baseenv.EnvProvider; -import org.apache.bifromq.basekv.localengine.IKVEngineConfigurator; -import lombok.Builder; -import lombok.Getter; -import lombok.NoArgsConstructor; -import lombok.Setter; -import lombok.experimental.Accessors; -import lombok.experimental.SuperBuilder; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.ASYNC_WAL_FLUSH; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.BLOCK_CACHE_SIZE; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.ENABLE_STATS; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.INCREASE_PARALLELISM; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.LEVEL0_FILE_NUM_COMPACTION_TRIGGER; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.LEVEL0_SLOWDOWN_WRITES_TRIGGER; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.LEVEL0_STOP_WRITES_TRIGGER; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.MAX_BACKGROUND_JOBS; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.MAX_BYTES_FOR_LEVEL_BASE; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.MAX_WRITE_BUFFER_NUMBER; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.MIN_BLOB_SIZE; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.MIN_WRITE_BUFFER_NUMBER_TO_MERGE; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.STATS_LEVEL; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.TARGET_FILE_SIZE_BASE; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.WRITE_BUFFER_SIZE; + +import com.google.protobuf.Struct; +import java.util.EnumSet; import org.rocksdb.BlockBasedTableConfig; import org.rocksdb.BloomFilter; +import org.rocksdb.ColumnFamilyDescriptor; import org.rocksdb.ColumnFamilyOptions; -import org.rocksdb.ColumnFamilyOptionsInterface; import org.rocksdb.CompactionStyle; import org.rocksdb.CompressionType; import org.rocksdb.DBOptions; -import org.rocksdb.DBOptionsInterface; import org.rocksdb.DataBlockIndexType; import org.rocksdb.Env; +import org.rocksdb.HistogramType; import org.rocksdb.IndexType; import org.rocksdb.LRUCache; -import org.rocksdb.MutableColumnFamilyOptionsInterface; -import org.rocksdb.MutableDBOptionsInterface; import org.rocksdb.PrepopulateBlobCache; import org.rocksdb.RateLimiter; +import org.rocksdb.Statistics; +import org.rocksdb.StatsLevel; import org.rocksdb.util.SizeUnit; -@NoArgsConstructor -@Getter -@Setter -@Accessors(chain = true, fluent = true) -@SuperBuilder(toBuilder = true) -public abstract class RocksDBKVEngineConfigurator> - implements IKVEngineConfigurator { - private String dbRootDir; - @Builder.Default - private boolean heuristicCompaction = false; - @Builder.Default - private int compactMinTombstoneKeys = 50000; - @Builder.Default - private int compactMinTombstoneRanges = 10000; - @Builder.Default - private double compactTombstoneKeysRatio = 0.3; - @Builder.Default - private long blockCacheSize = 32 * SizeUnit.MB; - @Builder.Default - private long writeBufferSize = 128 * SizeUnit.MB; - @Builder.Default - private int maxWriteBufferNumber = 6; - @Builder.Default - private int minWriteBufferNumberToMerge = 2; - @Builder.Default - private long minBlobSize = 2 * SizeUnit.KB; - @Builder.Default - private int increaseParallelism = max(EnvProvider.INSTANCE.availableProcessors() / 4, 2); - @Builder.Default - private int maxBackgroundJobs = max(EnvProvider.INSTANCE.availableProcessors() / 4, 2); - @Builder.Default - private int level0FileNumCompactionTrigger = 8; - @Builder.Default - private int level0SlowdownWritesTrigger = 20; - @Builder.Default - private int level0StopWritesTrigger = 24; - @Builder.Default - private long maxBytesForLevelBase = 512 * SizeUnit.MB; - @Builder.Default - private long targetFileSizeBase = 64 * SizeUnit.MB; - - public DBOptions dbOptions() { - DBOptions targetOption = new DBOptions(); - configDBOptions((DBOptionsInterface) targetOption); - configDBOptions((MutableDBOptionsInterface) targetOption); - // we don't need atomic flush in both use cases - targetOption.setAtomicFlush(false); - return targetOption; - } - - public ColumnFamilyOptions cfOptions(String name) { - ColumnFamilyOptions targetOption = new ColumnFamilyOptions(); - configCFOptions(name, (ColumnFamilyOptionsInterface) targetOption); - configCFOptions(name, (MutableColumnFamilyOptionsInterface) targetOption); - return targetOption; - } - - protected void configDBOptions(DBOptionsInterface targetOption) { - targetOption - .setEnv(Env.getDefault()) +/** + * Build RocksDB options from Struct configuration. Keys align with provider schema. + */ +final class RocksDBOptionsUtil { + private static DBOptions buildDBOptions(Struct conf) { + DBOptions opts = new DBOptions(); + opts.setEnv(Env.getDefault()) .setCreateIfMissing(true) .setCreateMissingColumnFamilies(true) .setAvoidUnnecessaryBlockingIO(true) .setMaxManifestFileSize(64 * SizeUnit.MB) - // log file settings - .setRecycleLogFileNum(4) + // info log file settings .setMaxLogFileSize(128 * SizeUnit.MB) .setKeepLogFileNum(4) // wal file settings + .setRecycleLogFileNum(4) .setWalSizeLimitMB(0) .setWalTtlSeconds(0) .setEnablePipelinedWrite(true) @@ -124,25 +82,69 @@ protected void configDBOptions(DBOptionsInterface targetOption) { .setRateLimiter(autoRelease(new RateLimiter(512 * SizeUnit.MB, RateLimiter.DEFAULT_REFILL_PERIOD_MICROS, RateLimiter.DEFAULT_FAIRNESS, - RateLimiter.DEFAULT_MODE, true), targetOption)); - } - - protected void configDBOptions(MutableDBOptionsInterface targetOption) { - targetOption + RateLimiter.DEFAULT_MODE, true), opts)) .setMaxOpenFiles(256) - .setIncreaseParallelism(increaseParallelism) - .setMaxBackgroundJobs(maxBackgroundJobs); - } - - protected void configCFOptions(String name, ColumnFamilyOptionsInterface targetOption) { - targetOption + .setIncreaseParallelism((int) numVal(conf, INCREASE_PARALLELISM)) + .setMaxBackgroundJobs((int) numVal(conf, MAX_BACKGROUND_JOBS)); + // Atomic flush not used in current scenarios + opts.setAtomicFlush(false); + + if (boolVal(conf, ENABLE_STATS)) { + EnumSet ignoreTypes = EnumSet.allOf(HistogramType.class); + ignoreTypes.remove(HistogramType.DB_GET); + ignoreTypes.remove(HistogramType.DB_WRITE); + ignoreTypes.remove(HistogramType.DB_SEEK); + ignoreTypes.remove(HistogramType.SST_READ_MICROS); + ignoreTypes.remove(HistogramType.SST_WRITE_MICROS); + ignoreTypes.remove(HistogramType.BLOB_DB_GET_MICROS); + ignoreTypes.remove(HistogramType.BLOB_DB_WRITE_MICROS); + ignoreTypes.remove(HistogramType.FLUSH_TIME); + ignoreTypes.remove(HistogramType.COMPACTION_TIME); + Statistics statistics = new Statistics(ignoreTypes); + String level = strVal(conf, STATS_LEVEL); + statistics.setStatsLevel(StatsLevel.valueOf(level)); + opts.setStatistics(statistics); + } + return opts; + } + + static DBOptions buildCPableDBOption(Struct conf) { + DBOptions dbOptions = buildDBOptions(conf); + dbOptions.setRecycleLogFileNum(0) + .setAllowConcurrentMemtableWrite(true) + .setBytesPerSync(1048576); + return dbOptions; + } + + static DBOptions buildWALableDBOption(Struct conf) { + DBOptions dbOptions = buildDBOptions(conf); + dbOptions.setManualWalFlush(boolVal(conf, ASYNC_WAL_FLUSH)) + .setBytesPerSync(1048576) + .setAllowConcurrentMemtableWrite(true); + return dbOptions; + } + + static ColumnFamilyDescriptor buildCPableCFDesc(String name, Struct conf) { + ColumnFamilyOptions cfOptions = buildCFOptions(conf); + cfOptions.setCompressionType(CompressionType.NO_COMPRESSION); + return new ColumnFamilyDescriptor(name.getBytes(), cfOptions); + } + + static ColumnFamilyDescriptor buildWAlableCFDesc(String name, Struct conf) { + return new ColumnFamilyDescriptor(name.getBytes(), buildCFOptions(conf)); + } + + private static ColumnFamilyOptions buildCFOptions(Struct conf) { + ColumnFamilyOptions cfOptions = new ColumnFamilyOptions(); + cfOptions + // immutable options start .setMergeOperatorName("uint64add") .setTableFormatConfig( new BlockBasedTableConfig() // // Begin to use partitioned index filters // https://github.com/facebook/rocksdb/wiki/Partitioned-Index-Filters#how-to-use-it .setIndexType(IndexType.kTwoLevelIndexSearch) // - .setFilterPolicy(autoRelease(new BloomFilter(16, false), targetOption)) + .setFilterPolicy(autoRelease(new BloomFilter(16, false), cfOptions)) .setPartitionFilters(true) // .setMetadataBlockSize(8 * SizeUnit.KB) // .setCacheIndexAndFilterBlocks(true) // @@ -155,27 +157,24 @@ protected void configCFOptions(String name, ColumnFamilyOptionsInterface targetOption) { - targetOption + .setPrepopulateBlobCache(PrepopulateBlobCache.PREPOPULATE_BLOB_FLUSH_ONLY) + // mutable options start .setCompressionType(CompressionType.LZ4_COMPRESSION) .setBottommostCompressionType(CompressionType.ZSTD_COMPRESSION) // Flushing options: // write_buffer_size sets the size of a single mem_table. Once mem_table exceeds // this size, it is marked immutable and a new one is created. - .setWriteBufferSize(writeBufferSize) + .setWriteBufferSize((long) numVal(conf, WRITE_BUFFER_SIZE)) // Flushing options: // max_write_buffer_number sets the maximum number of mem_tables, both active // and immutable. If the active mem_table fills up and the total number of // mem_tables is larger than max_write_buffer_number we stall further writes. // This may happen if the flush process is slower than the write rate. - .setMaxWriteBufferNumber(maxWriteBufferNumber) + .setMaxWriteBufferNumber((int) numVal(conf, MAX_WRITE_BUFFER_NUMBER)) // Flushing options: // min_write_buffer_number_to_merge is the minimum number of mem_tables to be // merged before flushing to storage. For example, if this option is set to 2, @@ -185,20 +184,20 @@ protected void configCFOptions(String name, MutableColumnFamilyOptionsInterface< // a single key. However, every Get() must traverse all immutable mem_tables // linearly to check if the key is there. Setting this option too high may hurt // read performance. - .setMinWriteBufferNumberToMerge(minWriteBufferNumberToMerge) + .setMinWriteBufferNumberToMerge((int) numVal(conf, MIN_WRITE_BUFFER_NUMBER_TO_MERGE)) // Level Style Compaction: // level0_file_num_compaction_trigger -- Once level 0 reaches this number of // files, L0->L1 compaction is triggered. We can therefore estimate level 0 // size in stable state as // write_buffer_size * min_write_buffer_number_to_merge * level0_file_num_compaction_trigger. - .setLevel0FileNumCompactionTrigger(level0FileNumCompactionTrigger) + .setLevel0FileNumCompactionTrigger((int) numVal(conf, LEVEL0_FILE_NUM_COMPACTION_TRIGGER)) // Level Style Compaction: // max_bytes_for_level_base and max_bytes_for_level_multiplier // -- max_bytes_for_level_base is total size of level 1. As mentioned, we // recommend that this be around the size of level 0. Each subsequent level // is max_bytes_for_level_multiplier larger than previous one. The default // is 10 and we do not recommend changing that. - .setMaxBytesForLevelBase(maxBytesForLevelBase) + .setMaxBytesForLevelBase((long) numVal(conf, MAX_BYTES_FOR_LEVEL_BASE)) // Level Style Compaction: // target_file_size_base and target_file_size_multiplier // -- Files in level 1 will have target_file_size_base bytes. Each next @@ -208,7 +207,7 @@ protected void configCFOptions(String name, MutableColumnFamilyOptionsInterface< // number of database files, which is generally a good thing. We recommend setting // target_file_size_base to be max_bytes_for_level_base / 10, so that there are // 10 files in level 1. - .setTargetFileSizeBase(targetFileSizeBase) + .setTargetFileSizeBase((long) numVal(conf, TARGET_FILE_SIZE_BASE)) // If prefix_extractor is set and memtable_prefix_bloom_size_ratio is not 0, // create prefix bloom for memtable with the size of // write_buffer_size * memtable_prefix_bloom_size_ratio. @@ -217,58 +216,15 @@ protected void configCFOptions(String name, MutableColumnFamilyOptionsInterface< // Soft limit on number of level-0 files. We start slowing down writes at this // point. A value 0 means that no writing slow down will be triggered by number // of files in level-0. - .setLevel0SlowdownWritesTrigger(level0SlowdownWritesTrigger) + .setLevel0SlowdownWritesTrigger((int) numVal(conf, LEVEL0_SLOWDOWN_WRITES_TRIGGER)) // Maximum number of level-0 files. We stop writes at this point. - .setLevel0StopWritesTrigger(level0StopWritesTrigger) + .setLevel0StopWritesTrigger((int) numVal(conf, LEVEL0_STOP_WRITES_TRIGGER)) .setLevelCompactionDynamicLevelBytes(false) // enable blob files .setEnableBlobFiles(true) - .setMinBlobSize(minBlobSize()) + .setPrepopulateBlobCache(PrepopulateBlobCache.PREPOPULATE_BLOB_FLUSH_ONLY) + .setMinBlobSize((long) numVal(conf, MIN_BLOB_SIZE)) .enableBlobGarbageCollection(); - } - - public String dbRootDir() { - return this.dbRootDir; - } - - public boolean heuristicCompaction() { - return this.heuristicCompaction; - } - - public int compactMinTombstoneKeys() { - return this.compactMinTombstoneKeys; - } - - public int compactMinTombstoneRanges() { - return this.compactMinTombstoneRanges; - } - - public double compactTombstoneKeysRatio() { - return this.compactTombstoneKeysRatio; - } - - public T dbRootDir(String dbRootDir) { - this.dbRootDir = dbRootDir; - return (T) this; - } - - public T heuristicCompaction(boolean heuristicCompaction) { - this.heuristicCompaction = heuristicCompaction; - return (T) this; - } - - public T compactMinTombstoneKeys(int compactMinTombstoneKeys) { - this.compactMinTombstoneKeys = compactMinTombstoneKeys; - return (T) this; - } - - public T compactMinTombstoneRanges(int compactMinTombstoneRanges) { - this.compactMinTombstoneRanges = compactMinTombstoneRanges; - return (T) this; - } - - public T compactTombstoneKeysRatio(double compactTombstoneKeysRatio) { - this.compactTombstoneKeysRatio = compactTombstoneKeysRatio; - return (T) this; + return cfOptions; } } diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceReader.java b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBSnapshot.java similarity index 66% rename from base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceReader.java rename to base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBSnapshot.java index 69e2a6a0a..38b1dc274 100644 --- a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceReader.java +++ b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBSnapshot.java @@ -17,18 +17,19 @@ * under the License. */ -package org.apache.bifromq.basekv.localengine; +package org.apache.bifromq.basekv.localengine.rocksdb; -import org.apache.bifromq.basekv.proto.Boundary; -import com.google.protobuf.ByteString; -import java.util.Optional; -public interface IKVSpaceReader extends IKVSpaceMetadata { - boolean exist(ByteString key); +import org.rocksdb.Snapshot; - Optional get(ByteString key); +record RocksDBSnapshot(IRocksDBKVSpaceEpoch epoch, Snapshot snapshot) { + static RocksDBSnapshot take(IRocksDBKVSpaceEpoch epoch) { + return new RocksDBSnapshot(epoch, epoch.db().getSnapshot()); + } - IKVSpaceIterator newIterator(); - - IKVSpaceIterator newIterator(Boundary subBoundary); + void release() { + if (snapshot != null) { + epoch.db().releaseSnapshot(snapshot); + } + } } diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBWALableKVEngine.java b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBWALableKVEngine.java similarity index 58% rename from base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBWALableKVEngine.java rename to base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBWALableKVEngine.java index 288cbfa40..69b468d4c 100644 --- a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBWALableKVEngine.java +++ b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBWALableKVEngine.java @@ -19,23 +19,41 @@ package org.apache.bifromq.basekv.localengine.rocksdb; +import static org.apache.bifromq.basekv.localengine.StructUtil.strVal; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.DB_ROOT_DIR; + +import com.google.protobuf.Struct; +import java.nio.file.Paths; import org.apache.bifromq.basekv.localengine.metrics.KVSpaceOpMeters; import org.slf4j.Logger; -public class RocksDBWALableKVEngine - extends RocksDBKVEngine { +class RocksDBWALableKVEngine extends RocksDBKVEngine { - public RocksDBWALableKVEngine(String overrideIdentity, RocksDBWALableKVEngineConfigurator configurator) { - super(overrideIdentity, configurator); + RocksDBWALableKVEngine(String overrideIdentity, Struct conf) { + super(overrideIdentity, conf); } @Override protected RocksDBWALableKVSpace doBuildKVSpace(String spaceId, - RocksDBWALableKVEngineConfigurator configurator, + Struct conf, Runnable onDestroy, KVSpaceOpMeters opMeters, Logger logger, String... tags) { - return new RocksDBWALableKVSpace(spaceId, configurator, this, onDestroy, opMeters, logger, tags); + return new RocksDBWALableKVSpace(spaceId, conf, this, onDestroy, opMeters, logger, tags); + } + + @Override + protected Struct defaultConf() { + return RocksDBDefaultConfigs.WAL; + } + + @Override + protected void validateSemantics(Struct conf) { + try { + Paths.get(strVal(conf, DB_ROOT_DIR)); + } catch (Throwable t) { + throw new IllegalArgumentException("Invalid '" + DB_ROOT_DIR + "' path", t); + } } } diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBWALableKVSpace.java b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBWALableKVSpace.java similarity index 67% rename from base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBWALableKVSpace.java rename to base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBWALableKVSpace.java index cb37d7029..11e0fd192 100644 --- a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBWALableKVSpace.java +++ b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBWALableKVSpace.java @@ -19,16 +19,16 @@ package org.apache.bifromq.basekv.localengine.rocksdb; -import org.apache.bifromq.baseenv.EnvProvider; -import org.apache.bifromq.basekv.localengine.IWALableKVSpace; -import org.apache.bifromq.basekv.localengine.KVEngineException; -import org.apache.bifromq.basekv.localengine.metrics.KVSpaceMeters; -import org.apache.bifromq.basekv.localengine.metrics.KVSpaceOpMeters; -import org.apache.bifromq.basekv.localengine.rocksdb.metrics.RocksDBKVSpaceMetric; +import static org.apache.bifromq.basekv.localengine.StructUtil.boolVal; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.ASYNC_WAL_FLUSH; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.FSYNC_WAL; + +import com.google.protobuf.Struct; import io.micrometer.core.instrument.Metrics; import io.micrometer.core.instrument.Tags; import io.micrometer.core.instrument.Timer; import io.micrometer.core.instrument.binder.jvm.ExecutorServiceMetrics; +import java.nio.file.Files; import java.util.Optional; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutorService; @@ -36,30 +36,35 @@ import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicReference; +import org.apache.bifromq.baseenv.EnvProvider; +import org.apache.bifromq.basekv.localengine.IKVSpaceRefreshableReader; +import org.apache.bifromq.basekv.localengine.IKVSpaceWriter; +import org.apache.bifromq.basekv.localengine.IWALableKVSpace; +import org.apache.bifromq.basekv.localengine.KVEngineException; +import org.apache.bifromq.basekv.localengine.metrics.KVSpaceMeters; +import org.apache.bifromq.basekv.localengine.metrics.KVSpaceOpMeters; +import org.apache.bifromq.basekv.localengine.rocksdb.metrics.RocksDBKVSpaceMetric; import org.rocksdb.WriteOptions; import org.slf4j.Logger; -public class RocksDBWALableKVSpace - extends RocksDBKVSpace - implements IWALableKVSpace { - private final RocksDBWALableKVEngineConfigurator configurator; +class RocksDBWALableKVSpace extends RocksDBKVSpace implements IWALableKVSpace { private final WriteOptions writeOptions; private final AtomicReference> flushFutureRef = new AtomicReference<>(); private final ExecutorService flushExecutor; private final MetricManager metricMgr; + private RocksDBWALableKVSpaceEpochHandle handle; - public RocksDBWALableKVSpace(String id, - RocksDBWALableKVEngineConfigurator configurator, - RocksDBWALableKVEngine engine, - Runnable onDestroy, - KVSpaceOpMeters opMeters, - Logger logger, - String... tags) { - super(id, configurator, engine, onDestroy, opMeters, logger, tags); - this.configurator = configurator; + RocksDBWALableKVSpace(String id, + Struct conf, + RocksDBWALableKVEngine engine, + Runnable onDestroy, + KVSpaceOpMeters opMeters, + Logger logger, + String... tags) { + super(id, conf, engine, onDestroy, opMeters, logger, tags); writeOptions = new WriteOptions().setDisableWAL(false); - if (!configurator.asyncWALFlush()) { - writeOptions.setSync(configurator.fsyncWAL()); + if (isSyncWALFlush()) { + writeOptions.setSync(isFsyncWAL()); } flushExecutor = ExecutorServiceMetrics.monitor(Metrics.globalRegistry, new ThreadPoolExecutor(1, 1, 0L, TimeUnit.MILLISECONDS, @@ -69,6 +74,23 @@ public RocksDBWALableKVSpace(String id, metricMgr = new MetricManager(tags); } + @Override + protected void doOpen() { + try { + // WALable uses space root as DB directory + Files.createDirectories(spaceRootDir().getAbsoluteFile().toPath()); + handle = new RocksDBWALableKVSpaceEpochHandle(id, spaceRootDir(), this.conf, logger, tags); + super.doOpen(); + } catch (Throwable e) { + throw new KVEngineException("Failed to open WALable KVSpace", e); + } + } + + @Override + protected RocksDBWALableKVSpaceEpochHandle handle() { + return handle; + } + @Override protected void doClose() { final CompletableFuture flushTaskFuture = Optional.ofNullable(flushFutureRef.get()).orElseGet(() -> { @@ -84,6 +106,8 @@ protected void doClose() { } writeOptions.close(); metricMgr.close(); + // close handle + handle.close(); super.doClose(); } @@ -97,7 +121,7 @@ public CompletableFuture flush() { if (state() != State.Opening) { return CompletableFuture.failedFuture(new KVEngineException("KVSpace not open")); } - if (!configurator.asyncWALFlush()) { + if (isSyncWALFlush()) { return CompletableFuture.completedFuture(System.nanoTime()); } CompletableFuture flushFuture; @@ -113,6 +137,12 @@ public CompletableFuture flush() { return flushFuture; } + @Override + public IKVSpaceWriter toWriter() { + return new RocksDBKVSpaceWriter(id, handle, engine, writeOptions(), syncContext, + writeStats.newRecorder(), this::publishMetadata, opMeters, logger); + } + private void doFlush(CompletableFuture onDone) { flushExecutor.submit(() -> { long flashStartAt = System.nanoTime(); @@ -120,7 +150,7 @@ private void doFlush(CompletableFuture onDone) { logger.trace("KVSpace[{}] flush wal start", id); try { Timer.Sample start = Timer.start(); - db.flushWal(configurator.fsyncWAL()); + handle().db().flushWal(isFsyncWAL()); start.stop(metricMgr.flushTimer); logger.trace("KVSpace[{}] flush complete", id); } catch (Throwable e) { @@ -136,11 +166,25 @@ private void doFlush(CompletableFuture onDone) { }); } + private boolean isSyncWALFlush() { + return !boolVal(conf, ASYNC_WAL_FLUSH); + } + + private boolean isFsyncWAL() { + return boolVal(conf, FSYNC_WAL); + } + + @Override + public IKVSpaceRefreshableReader reader() { + return new RocksDBKVSpaceReader(id, opMeters, logger, syncContext.refresher(), this::handle, + this::currentMetadata, new IteratorOptions(false, 524288)); + } + private class MetricManager { private final Timer flushTimer; MetricManager(String... metricTags) { - flushTimer = KVSpaceMeters.getTimer(id, RocksDBKVSpaceMetric.FlushTimer, Tags.of(metricTags)); + flushTimer = KVSpaceMeters.getTimer(id, RocksDBKVSpaceMetric.ManualFlushTimer, Tags.of(metricTags)); } void close() { diff --git a/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBWALableKVSpaceEpochHandle.java b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBWALableKVSpaceEpochHandle.java new file mode 100644 index 000000000..301050d15 --- /dev/null +++ b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBWALableKVSpaceEpochHandle.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine.rocksdb; + +import static org.apache.bifromq.basekv.localengine.IKVEngine.DEFAULT_NS; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBOptionsUtil.buildWALableDBOption; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBOptionsUtil.buildWAlableCFDesc; + +import com.google.protobuf.Struct; +import io.micrometer.core.instrument.Tags; +import java.io.File; +import org.rocksdb.ColumnFamilyDescriptor; +import org.rocksdb.DBOptions; +import org.slf4j.Logger; + +class RocksDBWALableKVSpaceEpochHandle extends RocksDBKVSpaceEpochHandle { + private final SpaceMetrics metrics; + private final ClosableResources closableResources; + + RocksDBWALableKVSpaceEpochHandle(String id, File dir, Struct conf, Logger logger, Tags tags) { + super(dir, conf, logger); + this.metrics = new SpaceMetrics(id, db, dbOptions, cf, cfDesc.getOptions(), tags.and("gen", "0"), logger); + closableResources = new ClosableResources(id, dir.getName(), dbOptions, cfDesc, cf, db, checkpoint, dir, + (test) -> false, metrics, logger); + } + + @Override + public void close() { + closableResources.run(); + } + + @Override + protected DBOptions buildDBOptions(Struct conf) { + return buildWALableDBOption(conf); + } + + @Override + protected ColumnFamilyDescriptor buildCFDescriptor(Struct conf) { + return buildWAlableCFDesc(DEFAULT_NS, conf); + } +} diff --git a/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/metrics/RocksDBKVSpaceMetric.java b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/metrics/RocksDBKVSpaceMetric.java new file mode 100644 index 000000000..8c3b796ff --- /dev/null +++ b/base-kv/base-kv-local-engine-rocksdb/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/metrics/RocksDBKVSpaceMetric.java @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine.rocksdb.metrics; + +import io.micrometer.core.instrument.Meter; +import org.apache.bifromq.basekv.localengine.metrics.IKVSpaceMetric; + +/** + * RocksDB specific metrics for KVSpace. + */ +public enum RocksDBKVSpaceMetric implements IKVSpaceMetric { + BlockCache("basekv.le.rocksdb.mem.blockcache", Meter.Type.GAUGE), + TableReader("basekv.le.rocksdb.mem.tablereader", Meter.Type.GAUGE), + MemTable("basekv.le.rocksdb.mem.memtable", Meter.Type.GAUGE), + PinnedMem("basekv.le.rocksdb.mem.pinned", Meter.Type.GAUGE), + // DB state and capacity gauges + StateTotalSSTSize("basekv.le.rocksdb.state.totalsstsize", Meter.Type.GAUGE), + StateLiveSSTSize("basekv.le.rocksdb.state.livesstsize", Meter.Type.GAUGE), + StateLiveDataSize("basekv.le.rocksdb.state.livedatasize", Meter.Type.GAUGE), + StateEstimateNumKeys("basekv.le.rocksdb.state.keys.est", Meter.Type.GAUGE), + StatePendingCompactionBytes("basekv.le.rocksdb.state.pending.compaction.bytes", Meter.Type.GAUGE), + StateRunningCompactions("basekv.le.rocksdb.state.running.compactions", Meter.Type.GAUGE), + StateRunningFlushes("basekv.le.rocksdb.state.running.flushes", Meter.Type.GAUGE), + StateCompactionPending("basekv.le.rocksdb.state.pending.compactions", Meter.Type.GAUGE), + StateMemTableFlushPending("basekv.le.rocksdb.state.pending.flushes", Meter.Type.GAUGE), + StateBackgroundErrors("basekv.le.rocksdb.state.bg.errors", Meter.Type.GAUGE), + // IO and cache efficiency + IOBytesReadCounter("basekv.le.rocksdb.io.read.bytes", Meter.Type.COUNTER, true), + IOBytesWrittenCounter("basekv.le.rocksdb.io.write.bytes", Meter.Type.COUNTER, true), + BlockCacheHitCounter("basekv.le.rocksdb.block.cache.hit", Meter.Type.COUNTER, true), + BlockCacheMissCounter("basekv.le.rocksdb.block.cache.miss", Meter.Type.COUNTER, true), + BlobDBCacheHitCounter("basekv.le.rocksdb.blob.cache.hit", Meter.Type.COUNTER, true), + BlobDBCacheMissCounter("basekv.le.rocksdb.blob.cache.miss", Meter.Type.COUNTER, true), + BloomUsefulCounter("basekv.le.rocksdb.bloom.useful.count", Meter.Type.COUNTER, true), + CheckpointTimer("basekv.le.rocksdb.checkpoint.time", Meter.Type.TIMER), + TotalKeysGauge("basekv.le.rocksdb.compaction.keys", Meter.Type.GAUGE), + TotalTombstoneKeysGauge("basekv.le.rocksdb.compaction.delkeys", Meter.Type.GAUGE), + TotalTombstoneRangesGauge("basekv.le.rocksdb.compaction.delranges", Meter.Type.GAUGE), + ManualCompactionCounter("basekv.le.rocksdb.manual.compaction.count", Meter.Type.COUNTER), + ManualCompactionTimer("basekv.le.rocksdb.manual.compaction.time", Meter.Type.TIMER), + ManualFlushTimer("basekv.le.rocksdb.manual.flush.time", Meter.Type.TIMER), + // Histogram exposure: counters and gauges + GetLatency("basekv.le.rocksdb.latency.get.time", Meter.Type.TIMER, true), + WriteLatency("basekv.le.rocksdb.latency.write.time", Meter.Type.TIMER, true), + SeekLatency("basekv.le.rocksdb.latency.seek.time", Meter.Type.TIMER, true), + BlobGetLatency("basekv.le.rocksdb.latency.blob.get.time", Meter.Type.TIMER, true), + BlobWriteLatency("basekv.le.rocksdb.latency.blob.write.time", Meter.Type.TIMER, true), + SSTReadLatency("basekv.le.rocksdb.latency.sstread.time", Meter.Type.TIMER, true), + SSTWriteLatency("basekv.le.rocksdb.latency.sstwrite.time", Meter.Type.TIMER, true), + FlushLatency("basekv.le.rocksdb.latency.flush.time", Meter.Type.TIMER, true), + CompactionLatency("basekv.le.rocksdb.latency.compaction.time", Meter.Type.TIMER, true), + WriteStall("basekv.le.rocksdb.write.stall", Meter.Type.TIMER, true); + + private final String metricName; + private final Meter.Type meterType; + private final boolean isFunction; + + RocksDBKVSpaceMetric(String metricName, Meter.Type meterType) { + this(metricName, meterType, false); + } + + RocksDBKVSpaceMetric(String metricName, Meter.Type meterType, boolean isFunction) { + this.metricName = metricName; + this.meterType = meterType; + this.isFunction = isFunction; + } + + @Override + public String metricName() { + return metricName; + } + + @Override + public Meter.Type meterType() { + return meterType; + } + + @Override + public boolean isFunction() { + return isFunction; + } +} diff --git a/base-kv/base-kv-local-engine-rocksdb/src/main/resources/META-INF/services/org.apache.bifromq.basekv.localengine.spi.IKVEngineProvider b/base-kv/base-kv-local-engine-rocksdb/src/main/resources/META-INF/services/org.apache.bifromq.basekv.localengine.spi.IKVEngineProvider new file mode 100644 index 000000000..a2c964ff1 --- /dev/null +++ b/base-kv/base-kv-local-engine-rocksdb/src/main/resources/META-INF/services/org.apache.bifromq.basekv.localengine.spi.IKVEngineProvider @@ -0,0 +1 @@ +org.apache.bifromq.basekv.localengine.rocksdb.RocksDBKVEngineProvider diff --git a/base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/BenchmarkState.java b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/BenchmarkState.java similarity index 77% rename from base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/BenchmarkState.java rename to base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/BenchmarkState.java index e15796d8c..90d9c84e3 100644 --- a/base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/BenchmarkState.java +++ b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/BenchmarkState.java @@ -14,15 +14,16 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.localengine.benchmark; -import org.apache.bifromq.basekv.localengine.ICPableKVSpace; -import org.apache.bifromq.basekv.localengine.IKVEngine; -import org.apache.bifromq.basekv.localengine.KVEngineFactory; -import org.apache.bifromq.basekv.localengine.rocksdb.RocksDBCPableKVEngineConfigurator; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.DB_CHECKPOINT_ROOT_DIR; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.DB_ROOT_DIR; + +import com.google.protobuf.Struct; +import com.google.protobuf.Value; import java.io.File; import java.io.IOException; import java.nio.file.Files; @@ -31,6 +32,9 @@ import java.util.Comparator; import java.util.UUID; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.basekv.localengine.ICPableKVSpace; +import org.apache.bifromq.basekv.localengine.IKVEngine; +import org.apache.bifromq.basekv.localengine.KVEngineFactory; import org.openjdk.jmh.annotations.Level; import org.openjdk.jmh.annotations.Setup; import org.openjdk.jmh.annotations.TearDown; @@ -49,11 +53,13 @@ abstract class BenchmarkState { String DB_NAME = "testDB"; String DB_CHECKPOINT_DIR = "testDB_cp"; String uuid = UUID.randomUUID().toString(); - RocksDBCPableKVEngineConfigurator configurator = RocksDBCPableKVEngineConfigurator.builder() - .dbCheckpointRootDir(Paths.get(dbRootDir.toString(), uuid, DB_CHECKPOINT_DIR).toString()) - .dbRootDir(Paths.get(dbRootDir.toString(), uuid, DB_NAME).toString()) + Struct conf = Struct.newBuilder() + .putFields(DB_ROOT_DIR, + Value.newBuilder().setStringValue(Paths.get(dbRootDir.toString(), uuid, DB_NAME).toString()).build()) + .putFields(DB_CHECKPOINT_ROOT_DIR, + Value.newBuilder().setStringValue(Paths.get(dbRootDir.toString(), uuid, DB_CHECKPOINT_DIR).toString()).build()) .build(); - kvEngine = KVEngineFactory.createCPable(null, configurator); + kvEngine = KVEngineFactory.createCPable(null, "rocksdb", conf); } @Setup(Level.Trial) diff --git a/base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/BenchmarkTemplate.java b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/BenchmarkTemplate.java similarity index 80% rename from base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/BenchmarkTemplate.java rename to base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/BenchmarkTemplate.java index aaf84c3f2..2ddfb7451 100644 --- a/base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/BenchmarkTemplate.java +++ b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/BenchmarkTemplate.java @@ -19,10 +19,11 @@ package org.apache.bifromq.basekv.localengine.benchmark; -import org.apache.bifromq.basekv.localengine.ICPableKVSpace; -import org.apache.bifromq.basekv.localengine.IKVEngine; -import org.apache.bifromq.basekv.localengine.KVEngineFactory; -import org.apache.bifromq.basekv.localengine.rocksdb.RocksDBCPableKVEngineConfigurator; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.DB_CHECKPOINT_ROOT_DIR; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.DB_ROOT_DIR; + +import com.google.protobuf.Struct; +import com.google.protobuf.Value; import java.io.File; import java.io.IOException; import java.nio.file.Files; @@ -31,6 +32,9 @@ import java.util.Comparator; import java.util.UUID; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.basekv.localengine.ICPableKVSpace; +import org.apache.bifromq.basekv.localengine.IKVEngine; +import org.apache.bifromq.basekv.localengine.KVEngineFactory; import org.openjdk.jmh.annotations.Setup; import org.openjdk.jmh.annotations.TearDown; import org.openjdk.jmh.runner.Runner; @@ -54,11 +58,13 @@ public void setup() { String DB_NAME = "testDB"; String DB_CHECKPOINT_DIR = "testDB_cp"; String uuid = UUID.randomUUID().toString(); - RocksDBCPableKVEngineConfigurator configurator = RocksDBCPableKVEngineConfigurator.builder() - .dbCheckpointRootDir(Paths.get(dbRootDir.toString(), uuid, DB_CHECKPOINT_DIR).toString()) - .dbRootDir(Paths.get(dbRootDir.toString(), uuid, DB_NAME).toString()) + Struct conf = Struct.newBuilder() + .putFields(DB_ROOT_DIR, + Value.newBuilder().setStringValue(Paths.get(dbRootDir.toString(), uuid, DB_NAME).toString()).build()) + .putFields(DB_CHECKPOINT_ROOT_DIR, + Value.newBuilder().setStringValue(Paths.get(dbRootDir.toString(), uuid, DB_CHECKPOINT_DIR).toString()).build()) .build(); - kvEngine = KVEngineFactory.createCPable(null, configurator); + kvEngine = KVEngineFactory.createCPable(null, "rocksdb", conf); kvEngine.start(); doSetup(); } diff --git a/base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/ContinuousKeyDeleteAndGet_Benchmark.java b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/ContinuousKeyDeleteAndGet_Benchmark.java similarity index 90% rename from base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/ContinuousKeyDeleteAndGet_Benchmark.java rename to base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/ContinuousKeyDeleteAndGet_Benchmark.java index 36c82a577..3876dd99a 100644 --- a/base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/ContinuousKeyDeleteAndGet_Benchmark.java +++ b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/ContinuousKeyDeleteAndGet_Benchmark.java @@ -22,15 +22,16 @@ import static org.apache.bifromq.basekv.localengine.TestUtil.toByteString; -import org.apache.bifromq.basekv.localengine.IKVSpace; -import org.apache.bifromq.basekv.localengine.IKVSpaceIterator; -import org.apache.bifromq.basekv.localengine.IKVSpaceWriter; -import org.apache.bifromq.basekv.proto.Boundary; import com.google.protobuf.ByteString; import java.util.Optional; import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.TimeUnit; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.basekv.localengine.ICPableKVSpace; +import org.apache.bifromq.basekv.localengine.IKVSpaceIterator; +import org.apache.bifromq.basekv.localengine.IKVSpaceRefreshableReader; +import org.apache.bifromq.basekv.localengine.IKVSpaceWriter; +import org.apache.bifromq.basekv.proto.Boundary; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; import org.openjdk.jmh.annotations.Group; @@ -47,24 +48,16 @@ public class ContinuousKeyDeleteAndGet_Benchmark extends BenchmarkTemplate { private static int keyCount = 1000000; private ByteString key = ByteString.copyFromUtf8("key"); - private IKVSpace kvSpace; + private ICPableKVSpace kvSpace; + private IKVSpaceRefreshableReader reader; private IKVSpaceIterator itr; private String rangeId = "testRange"; - @State(Scope.Thread) - public static class BenchmarkThreadState { - volatile int i = ThreadLocalRandom.current().nextInt(keyCount); - - @Setup(Level.Invocation) - public void inc() { - i = ThreadLocalRandom.current().nextInt(keyCount); - } - } - @Override protected void doSetup() { kvSpace = kvEngine.createIfMissing(rangeId); - itr = kvSpace.newIterator(); + reader = kvSpace.reader(); + itr = reader.newIterator(); IKVSpaceWriter writer = kvSpace.toWriter(); for (int i = 0; i < keyCount; i++) { writer.put(key.concat(toByteString(i)), ByteString.EMPTY); @@ -88,6 +81,16 @@ protected void doSetup() { @BenchmarkMode(Mode.Throughput) @OutputTimeUnit(TimeUnit.SECONDS) public Optional get(BenchmarkThreadState state) { - return kvSpace.get(key.concat(toByteString(state.i))); + return reader.get(key.concat(toByteString(state.i))); + } + + @State(Scope.Thread) + public static class BenchmarkThreadState { + volatile int i = ThreadLocalRandom.current().nextInt(keyCount); + + @Setup(Level.Invocation) + public void inc() { + i = ThreadLocalRandom.current().nextInt(keyCount); + } } } diff --git a/base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/ContinuousKeyDeleteAndSeek_Benchmark.java b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/ContinuousKeyDeleteAndSeek_Benchmark.java similarity index 91% rename from base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/ContinuousKeyDeleteAndSeek_Benchmark.java rename to base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/ContinuousKeyDeleteAndSeek_Benchmark.java index dfd1123ea..26a83c83d 100644 --- a/base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/ContinuousKeyDeleteAndSeek_Benchmark.java +++ b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/ContinuousKeyDeleteAndSeek_Benchmark.java @@ -22,14 +22,15 @@ import static org.apache.bifromq.basekv.localengine.TestUtil.toByteString; -import org.apache.bifromq.basekv.localengine.IKVSpace; -import org.apache.bifromq.basekv.localengine.IKVSpaceIterator; -import org.apache.bifromq.basekv.localengine.IKVSpaceWriter; -import org.apache.bifromq.basekv.proto.Boundary; import com.google.protobuf.ByteString; import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.TimeUnit; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.basekv.localengine.ICPableKVSpace; +import org.apache.bifromq.basekv.localengine.IKVSpaceIterator; +import org.apache.bifromq.basekv.localengine.IKVSpaceRefreshableReader; +import org.apache.bifromq.basekv.localengine.IKVSpaceWriter; +import org.apache.bifromq.basekv.proto.Boundary; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; import org.openjdk.jmh.annotations.Group; @@ -46,26 +47,18 @@ public class ContinuousKeyDeleteAndSeek_Benchmark extends BenchmarkTemplate { private static int keyCount = 1000000; private static ByteString key = ByteString.copyFromUtf8("key"); - private IKVSpace kvSpace; + private ICPableKVSpace kvSpace; + private IKVSpaceRefreshableReader reader; private IKVSpaceIterator itr; private String rangeId = "testRange"; // IKVEngineIterator itr; - @State(Scope.Thread) - public static class BenchmarkThreadState { - volatile int i = ThreadLocalRandom.current().nextInt(keyCount); - - @Setup(Level.Invocation) - public void inc() { - i = ThreadLocalRandom.current().nextInt(keyCount); - } - } - @Override protected void doSetup() { kvSpace = kvEngine.createIfMissing(rangeId); - itr = kvSpace.newIterator(); + reader = kvSpace.reader(); + itr = reader.newIterator(); IKVSpaceWriter writer = kvSpace.toWriter(); for (int i = 0; i < keyCount; i++) { writer.insert(key.concat(toByteString(i)), ByteString.copyFromUtf8("val" + i)); @@ -77,22 +70,13 @@ protected void doSetup() { writer.done(); } -// @Benchmark -// @Group("Seek") -// @GroupThreads(1) -// @BenchmarkMode(Mode.Throughput) -// @OutputTimeUnit(TimeUnit.SECONDS) -// public void delete(BenchmarkThreadState state) { -// kvEngine.deleteRange(DEFAULT_NS, key.concat(toByteString(state.i)), key.concat(toByteString(state.i + 1))); -// } - @Benchmark @Group("Seek") @GroupThreads(1) @BenchmarkMode(Mode.Throughput) @OutputTimeUnit(TimeUnit.SECONDS) public boolean iterator(BenchmarkThreadState state) { - IKVSpaceIterator itr = kvSpace.newIterator(Boundary.newBuilder() + IKVSpaceIterator itr = reader.newIterator(Boundary.newBuilder() .setStartKey(key.concat(toByteString(state.i))) .setEndKey(key.concat(toByteString(state.i + 1))) .build()); @@ -101,4 +85,23 @@ public boolean iterator(BenchmarkThreadState state) { itr.seek(key.concat(toByteString(state.i))); return itr.isValid(); } + +// @Benchmark +// @Group("Seek") +// @GroupThreads(1) +// @BenchmarkMode(Mode.Throughput) +// @OutputTimeUnit(TimeUnit.SECONDS) +// public void delete(BenchmarkThreadState state) { +// kvEngine.deleteRange(DEFAULT_NS, key.concat(toByteString(state.i)), key.concat(toByteString(state.i + 1))); +// } + + @State(Scope.Thread) + public static class BenchmarkThreadState { + volatile int i = ThreadLocalRandom.current().nextInt(keyCount); + + @Setup(Level.Invocation) + public void inc() { + i = ThreadLocalRandom.current().nextInt(keyCount); + } + } } diff --git a/base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/ContinuousKeySingleDeleteAndSeek.java b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/ContinuousKeySingleDeleteAndSeek.java similarity index 97% rename from base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/ContinuousKeySingleDeleteAndSeek.java rename to base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/ContinuousKeySingleDeleteAndSeek.java index 2594729cd..7aa89bd66 100644 --- a/base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/ContinuousKeySingleDeleteAndSeek.java +++ b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/ContinuousKeySingleDeleteAndSeek.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.localengine.benchmark; @@ -56,7 +56,7 @@ public void iterator(ContinuousKeySingleDeleteAndSeekState state, Blackhole bl) // IKVEngineIterator itr = kvEngine.newIterator(DEFAULT_NS, key.concat(toByteString(state.i)), // key.concat(toByteString(state.i + 1))); // try (IKVEngineIterator itr = state.kvEngine.newIterator(DEFAULT_NS)) { - state.itr.refresh(); + state.reader.refresh(); // state.itr.seekToLast(); state.itr.seek(state.key.concat(toByteString(ThreadLocalRandom.current().nextInt(state.keyCount)))); bl.consume(state.itr.isValid()); diff --git a/base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/ContinuousKeySingleDeleteAndSeekState.java b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/ContinuousKeySingleDeleteAndSeekState.java similarity index 86% rename from base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/ContinuousKeySingleDeleteAndSeekState.java rename to base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/ContinuousKeySingleDeleteAndSeekState.java index 27ab0b4ed..0e421d4cf 100644 --- a/base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/ContinuousKeySingleDeleteAndSeekState.java +++ b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/ContinuousKeySingleDeleteAndSeekState.java @@ -22,10 +22,11 @@ import static org.apache.bifromq.basekv.localengine.TestUtil.toByteString; -import org.apache.bifromq.basekv.localengine.IKVSpace; +import com.google.protobuf.ByteString; +import org.apache.bifromq.basekv.localengine.ICPableKVSpace; import org.apache.bifromq.basekv.localengine.IKVSpaceIterator; +import org.apache.bifromq.basekv.localengine.IKVSpaceRefreshableReader; import org.apache.bifromq.basekv.localengine.IKVSpaceWriter; -import com.google.protobuf.ByteString; import org.openjdk.jmh.annotations.Scope; import org.openjdk.jmh.annotations.State; @@ -33,14 +34,16 @@ public class ContinuousKeySingleDeleteAndSeekState extends BenchmarkState { int keyCount = 1000000; ByteString key = ByteString.copyFromUtf8("key"); - private IKVSpace kvSpace; + IKVSpaceRefreshableReader reader; IKVSpaceIterator itr; + private ICPableKVSpace kvSpace; private String rangeId = "testRange"; @Override protected void afterSetup() { kvSpace = kvEngine.createIfMissing(rangeId); - itr = kvSpace.newIterator(); + reader = kvSpace.reader(); + itr = reader.newIterator(); IKVSpaceWriter writer = kvSpace.toWriter(); for (int i = 0; i < keyCount; i++) { @@ -49,7 +52,7 @@ protected void afterSetup() { } writer.put(key.concat(toByteString(keyCount)), ByteString.EMPTY); writer.done(); - itr = kvSpace.newIterator(); + itr = reader.newIterator(); } @Override diff --git a/base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/HybridWorkload.java b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/HybridWorkload.java similarity index 100% rename from base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/HybridWorkload.java rename to base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/HybridWorkload.java diff --git a/base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/HybridWorkloadState.java b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/HybridWorkloadState.java similarity index 85% rename from base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/HybridWorkloadState.java rename to base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/HybridWorkloadState.java index 36e6a53ea..a0d9fb615 100644 --- a/base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/HybridWorkloadState.java +++ b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/HybridWorkloadState.java @@ -19,29 +19,31 @@ package org.apache.bifromq.basekv.localengine.benchmark; -import org.apache.bifromq.basekv.localengine.IKVSpace; -import org.apache.bifromq.basekv.localengine.IKVSpaceIterator; -import org.apache.bifromq.basekv.localengine.IKVSpaceWriter; import com.google.protobuf.ByteString; import java.util.Optional; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ThreadLocalRandom; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.basekv.localengine.IKVSpace; +import org.apache.bifromq.basekv.localengine.IKVSpaceIterator; +import org.apache.bifromq.basekv.localengine.IKVSpaceRefreshableReader; +import org.apache.bifromq.basekv.localengine.IKVSpaceWriter; import org.openjdk.jmh.annotations.Scope; import org.openjdk.jmh.annotations.State; @Slf4j @State(Scope.Benchmark) public class HybridWorkloadState extends BenchmarkState { + ConcurrentHashMap itrMap = new ConcurrentHashMap<>(); private String rangeId = "testRange"; private IKVSpace kvSpace; + private IKVSpaceRefreshableReader reader; private IKVSpaceWriter writer; - ConcurrentHashMap itrMap = new ConcurrentHashMap<>(); - @Override protected void afterSetup() { kvSpace = kvEngine.createIfMissing(rangeId); + reader = kvSpace.reader(); // itr = kvEngine.newIterator(rangeId); } @@ -68,24 +70,26 @@ public void randomPutAndDelete() { } public Optional randomGet() { - return kvSpace.get(randomBS()); + return reader.get(randomBS()); } public boolean randomExist() { - return kvSpace.exist(randomBS()); + return reader.exist(randomBS()); } public void seekToFirst() { IKVSpaceIterator itr = itrMap.computeIfAbsent(Thread.currentThread().getId(), - k -> kvSpace.newIterator()); - itr.refresh(); + k -> reader.newIterator()); + reader.refresh(); + itr = itrMap.get(Thread.currentThread().getId()); itr.seekToFirst(); } public void randomSeek() { IKVSpaceIterator itr = itrMap.computeIfAbsent(Thread.currentThread().getId(), - k -> kvSpace.newIterator()); - itr.refresh(); + k -> reader.newIterator()); + reader.refresh(); + itr = itrMap.get(Thread.currentThread().getId()); itr.seek(randomBS()); } diff --git a/base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/SingleKeyUpdateAndGet.java b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/SingleKeyUpdateAndGet.java similarity index 92% rename from base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/SingleKeyUpdateAndGet.java rename to base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/SingleKeyUpdateAndGet.java index 5bdf7f398..60f2c49bf 100644 --- a/base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/SingleKeyUpdateAndGet.java +++ b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/SingleKeyUpdateAndGet.java @@ -26,6 +26,7 @@ import java.util.concurrent.TimeUnit; import lombok.SneakyThrows; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.basekv.localengine.IKVSpaceRefreshableReader; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; import org.openjdk.jmh.annotations.Group; @@ -56,7 +57,9 @@ public static void main(String[] args) { @BenchmarkMode(Mode.Throughput) @OutputTimeUnit(TimeUnit.SECONDS) public void get(SingleKeyUpdateAndGetState state, Blackhole blackhole) { - blackhole.consume(state.kvSpace.get(state.key).get()); + try (IKVSpaceRefreshableReader reader = state.kvSpace.reader()) { + blackhole.consume(reader.get(state.key).get()); + } } @Benchmark diff --git a/base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/SingleKeyUpdateAndGetState.java b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/SingleKeyUpdateAndGetState.java similarity index 94% rename from base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/SingleKeyUpdateAndGetState.java rename to base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/SingleKeyUpdateAndGetState.java index 2dfd42d88..bdd06d90b 100644 --- a/base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/SingleKeyUpdateAndGetState.java +++ b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/benchmark/SingleKeyUpdateAndGetState.java @@ -22,9 +22,9 @@ import static org.apache.bifromq.basekv.localengine.TestUtil.toByteStringNativeOrder; -import org.apache.bifromq.basekv.localengine.IKVSpace; import com.google.protobuf.ByteString; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.basekv.localengine.ICPableKVSpace; import org.openjdk.jmh.annotations.Scope; import org.openjdk.jmh.annotations.State; @@ -32,9 +32,8 @@ @State(Scope.Group) public class SingleKeyUpdateAndGetState extends BenchmarkState { ByteString key = ByteString.copyFromUtf8("key"); + ICPableKVSpace kvSpace; private String rangeId = "testRange"; - IKVSpace kvSpace; - @Override protected void afterSetup() { diff --git a/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/metrics/KVSpaceMetersTest.java b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/metrics/KVSpaceMetersTest.java new file mode 100644 index 000000000..ed816d3c5 --- /dev/null +++ b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/metrics/KVSpaceMetersTest.java @@ -0,0 +1,149 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine.metrics; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; + +import io.micrometer.core.instrument.Counter; +import io.micrometer.core.instrument.FunctionCounter; +import io.micrometer.core.instrument.FunctionTimer; +import io.micrometer.core.instrument.MeterRegistry; +import io.micrometer.core.instrument.Metrics; +import io.micrometer.core.instrument.Tags; +import io.micrometer.core.instrument.composite.CompositeMeterRegistry; +import io.micrometer.core.instrument.simple.SimpleMeterRegistry; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.TimeUnit; +import org.apache.bifromq.basekv.localengine.rocksdb.metrics.RocksDBKVSpaceMetric; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class KVSpaceMetersTest { + private SimpleMeterRegistry registry; + private List savedRegistries; + private String id; + private Tags baseTags; + + @BeforeMethod + public void setUp() { + registry = new SimpleMeterRegistry(); + savedRegistries = new ArrayList<>(); + CompositeMeterRegistry composite = Metrics.globalRegistry; + savedRegistries.addAll(composite.getRegistries()); + for (MeterRegistry r : savedRegistries) { + Metrics.removeRegistry(r); + } + Metrics.addRegistry(registry); + id = "kv1"; + baseTags = Tags.of("env", "test"); + } + + @AfterMethod + public void tearDown() { + Metrics.removeRegistry(registry); + registry.close(); + for (MeterRegistry r : savedRegistries) { + Metrics.addRegistry(r); + } + } + + @Test + public void counterIncrementAndUnregister() { + Counter c = KVSpaceMeters.getCounter(id, RocksDBKVSpaceMetric.ManualCompactionCounter, baseTags); + c.increment(2.0); + c.increment(3.0); + assertEquals(c.count(), 5.0, 0.0001); + + assertFalse(registry + .find(RocksDBKVSpaceMetric.ManualCompactionCounter.metricName()) + .tags("env", "test", "kvspace", id) + .meters().isEmpty()); + + c.close(); + assertTrue(registry + .find(RocksDBKVSpaceMetric.ManualCompactionCounter.metricName()) + .tags("env", "test", "kvspace", id) + .meters().isEmpty()); + } + + @Test + public void functionCounterBehaviorAndUnregister() { + class Holder { + double v; + } + Holder h = new Holder(); + h.v = 7.0; + + FunctionCounter fc = KVSpaceMeters.getFunctionCounter( + id, RocksDBKVSpaceMetric.IOBytesReadCounter, h, x -> x.v, baseTags); + + assertEquals(fc.count(), 7.0, 0.0001); + h.v = 9.0; + assertEquals(fc.count(), 9.0, 0.0001); + + assertFalse(registry + .find(RocksDBKVSpaceMetric.IOBytesReadCounter.metricName()) + .tags("env", "test", "kvspace", id) + .meters().isEmpty()); + + fc.close(); + assertTrue(registry + .find(RocksDBKVSpaceMetric.IOBytesReadCounter.metricName()) + .tags("env", "test", "kvspace", id) + .meters().isEmpty()); + } + + @Test + public void functionTimerBehaviorAndUnregister() { + class T { + long c; + double t; + } + T h = new T(); + h.c = 2L; + h.t = 1.5; + + FunctionTimer ft = KVSpaceMeters.getFunctionTimer( + id, RocksDBKVSpaceMetric.GetLatency, h, x -> x.c, x -> x.t, TimeUnit.SECONDS, baseTags); + + assertEquals(ft.count(), 2.0, 0.0001); + assertEquals(ft.totalTime(TimeUnit.SECONDS), 1.5, 0.0001); + + h.c = 3L; + h.t = 2.5; + assertEquals(ft.count(), 3.0, 0.0001); + assertEquals(ft.totalTime(TimeUnit.SECONDS), 2.5, 0.0001); + + assertFalse(registry + .find(RocksDBKVSpaceMetric.GetLatency.metricName()) + .tags("env", "test", "kvspace", id) + .meters().isEmpty()); + + ft.close(); + assertTrue(registry + .find(RocksDBKVSpaceMetric.GetLatency.metricName()) + .tags("env", "test", "kvspace", id) + .meters().isEmpty()); + } +} diff --git a/base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/AbstractRawRocksDBTest.java b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/AbstractRawRocksDBTest.java similarity index 98% rename from base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/AbstractRawRocksDBTest.java rename to base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/AbstractRawRocksDBTest.java index c60398a3f..bdf3d15d2 100644 --- a/base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/AbstractRawRocksDBTest.java +++ b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/AbstractRawRocksDBTest.java @@ -14,17 +14,17 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.localengine.rocksdb; -import org.apache.bifromq.basekv.localengine.MockableTest; -import org.apache.bifromq.basekv.localengine.TestUtil; import java.lang.reflect.Method; import java.nio.file.Files; import java.nio.file.Path; import lombok.SneakyThrows; +import org.apache.bifromq.basekv.localengine.MockableTest; +import org.apache.bifromq.basekv.localengine.TestUtil; import org.mockito.Mockito; import org.rocksdb.ColumnFamilyHandle; import org.rocksdb.Options; @@ -35,10 +35,10 @@ abstract class AbstractRawRocksDBTest extends MockableTest { RocksDB.loadLibrary(); } - private Path dbRootDir; - private Options options; protected RocksDB db; protected ColumnFamilyHandle cfHandle; + private Path dbRootDir; + private Options options; @SneakyThrows @Override diff --git a/base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/AbstractRocksDBKVEngineTest.java b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/AbstractRocksDBCPableEngineTest.java similarity index 71% rename from base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/AbstractRocksDBKVEngineTest.java rename to base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/AbstractRocksDBCPableEngineTest.java index 0de42463c..e5877a3ac 100644 --- a/base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/AbstractRocksDBKVEngineTest.java +++ b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/AbstractRocksDBCPableEngineTest.java @@ -25,17 +25,19 @@ import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertTrue; -import org.apache.bifromq.basekv.localengine.AbstractKVEngineTest; -import org.apache.bifromq.basekv.localengine.IKVSpace; -import org.apache.bifromq.basekv.localengine.TestUtil; import com.google.protobuf.ByteString; import java.nio.file.Files; import java.nio.file.Path; import java.util.concurrent.atomic.AtomicBoolean; import lombok.SneakyThrows; +import org.apache.bifromq.basekv.localengine.AbstractCPableEngineTest; +import org.apache.bifromq.basekv.localengine.ICPableKVSpace; +import org.apache.bifromq.basekv.localengine.IKVSpace; +import org.apache.bifromq.basekv.localengine.IKVSpaceReader; +import org.apache.bifromq.basekv.localengine.TestUtil; import org.testng.annotations.Test; -public abstract class AbstractRocksDBKVEngineTest extends AbstractKVEngineTest { +public abstract class AbstractRocksDBCPableEngineTest extends AbstractCPableEngineTest { protected Path dbRootDir; @SneakyThrows @@ -66,12 +68,14 @@ public void loadExistingKeyRange() { ByteString metaValue = ByteString.copyFromUtf8("metaValue"); ByteString key = ByteString.copyFromUtf8("key"); ByteString value = ByteString.copyFromUtf8("value"); - IKVSpace keyRange = engine.createIfMissing(rangeId); + ICPableKVSpace keyRange = engine.createIfMissing(rangeId); keyRange.toWriter().put(key, value).metadata(metaKey, metaValue).done(); - assertTrue(keyRange.metadata(metaKey).isPresent()); - assertTrue(keyRange.metadata().blockingFirst().containsKey(metaKey)); - assertTrue(keyRange.exist(key)); - assertEquals(keyRange.get(key).get(), value); + try (IKVSpaceReader reader = keyRange.reader()) { // use reader for read APIs + assertTrue(reader.metadata(metaKey).isPresent()); + assertTrue(keyRange.metadata().blockingFirst().containsKey(metaKey)); + assertTrue(reader.exist(key)); + assertEquals(reader.get(key).get(), value); + } engine.stop(); engine = newEngine(); @@ -79,10 +83,12 @@ public void loadExistingKeyRange() { assertEquals(engine.spaces().size(), 1); IKVSpace keyRangeLoaded = engine.spaces().values().stream().findFirst().get(); assertEquals(keyRangeLoaded.id(), rangeId); - assertTrue(keyRangeLoaded.metadata(metaKey).isPresent()); - assertTrue(keyRangeLoaded.metadata().blockingFirst().containsKey(metaKey)); - assertTrue(keyRangeLoaded.exist(key)); - assertEquals(keyRangeLoaded.get(key).get(), value); + try (IKVSpaceReader reader = keyRangeLoaded.reader()) { + assertTrue(reader.metadata(metaKey).isPresent()); + assertTrue(keyRangeLoaded.metadata().blockingFirst().containsKey(metaKey)); + assertTrue(reader.exist(key)); + assertEquals(reader.get(key).get(), value); + } // stop again and start engine.stop(); @@ -91,10 +97,12 @@ public void loadExistingKeyRange() { assertEquals(engine.spaces().size(), 1); keyRangeLoaded = engine.spaces().values().stream().findFirst().get(); assertEquals(keyRangeLoaded.id(), rangeId); - assertTrue(keyRangeLoaded.metadata(metaKey).isPresent()); - assertTrue(keyRangeLoaded.metadata().blockingFirst().containsKey(metaKey)); - assertTrue(keyRangeLoaded.exist(key)); - assertEquals(keyRangeLoaded.get(key).get(), value); + try (IKVSpaceReader reader = keyRangeLoaded.reader()) { + assertTrue(reader.metadata(metaKey).isPresent()); + assertTrue(keyRangeLoaded.metadata().blockingFirst().containsKey(metaKey)); + assertTrue(reader.exist(key)); + assertEquals(reader.get(key).get(), value); + } } @Test @@ -102,13 +110,15 @@ public void flushOnClose() { String rangeId = "test_range1"; ByteString key = ByteString.copyFromUtf8("key"); ByteString value = ByteString.copyFromUtf8("value"); - IKVSpace keyRange = engine.createIfMissing(rangeId); + ICPableKVSpace keyRange = engine.createIfMissing(rangeId); keyRange.toWriter().put(key, value).done(); engine.stop(); engine = newEngine(); engine.start(); keyRange = engine.createIfMissing(rangeId); - assertTrue(keyRange.exist(key)); + try (IKVSpaceReader reader = keyRange.reader()) { + assertTrue(reader.exist(key)); + } } @Test diff --git a/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/AbstractRocksDBWALableEngineTest.java b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/AbstractRocksDBWALableEngineTest.java new file mode 100644 index 000000000..f402a1db6 --- /dev/null +++ b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/AbstractRocksDBWALableEngineTest.java @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine.rocksdb; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; + +import com.google.protobuf.ByteString; +import java.nio.file.Files; +import java.nio.file.Path; +import lombok.SneakyThrows; +import org.apache.bifromq.basekv.localengine.AbstractWALableEngineTest; +import org.apache.bifromq.basekv.localengine.IKVSpace; +import org.apache.bifromq.basekv.localengine.IKVSpaceReader; +import org.apache.bifromq.basekv.localengine.IWALableKVSpace; +import org.apache.bifromq.basekv.localengine.TestUtil; +import org.testng.annotations.Test; + +public abstract class AbstractRocksDBWALableEngineTest extends AbstractWALableEngineTest { + protected Path dbRootDir; + + @SneakyThrows + @Override + protected void beforeStart() { + dbRootDir = Files.createTempDirectory(""); + } + + + @Override + protected void afterStop() { + TestUtil.deleteDir(dbRootDir.toString()); + } + + @Test + public void identityKeptSame() { + String identity = engine.id(); + engine.stop(); + engine = newEngine(); + engine.start(); + assertEquals(identity, engine.id()); + } + + @Test + public void loadExistingKeyRange() { + String rangeId = "test_range1"; + ByteString metaKey = ByteString.copyFromUtf8("metaKey"); + ByteString metaValue = ByteString.copyFromUtf8("metaValue"); + ByteString key = ByteString.copyFromUtf8("key"); + ByteString value = ByteString.copyFromUtf8("value"); + IWALableKVSpace keyRange = engine.createIfMissing(rangeId); + keyRange.toWriter().put(key, value).metadata(metaKey, metaValue).done(); + try (IKVSpaceReader reader = keyRange.reader()) { // use reader for read APIs + assertTrue(reader.metadata(metaKey).isPresent()); + assertTrue(keyRange.metadata().blockingFirst().containsKey(metaKey)); + assertTrue(reader.exist(key)); + assertEquals(reader.get(key).get(), value); + } + engine.stop(); + + engine = newEngine(); + engine.start(); + assertEquals(engine.spaces().size(), 1); + IKVSpace keyRangeLoaded = engine.spaces().values().stream().findFirst().get(); + assertEquals(keyRangeLoaded.id(), rangeId); + try (IKVSpaceReader reader = keyRangeLoaded.reader()) { + assertTrue(reader.metadata(metaKey).isPresent()); + assertTrue(keyRangeLoaded.metadata().blockingFirst().containsKey(metaKey)); + assertTrue(reader.exist(key)); + assertEquals(reader.get(key).get(), value); + } + // stop again and start + engine.stop(); + + engine = newEngine(); + engine.start(); + assertEquals(engine.spaces().size(), 1); + keyRangeLoaded = engine.spaces().values().stream().findFirst().get(); + assertEquals(keyRangeLoaded.id(), rangeId); + try (IKVSpaceReader reader = keyRangeLoaded.reader()) { + assertTrue(reader.metadata(metaKey).isPresent()); + assertTrue(keyRangeLoaded.metadata().blockingFirst().containsKey(metaKey)); + assertTrue(reader.exist(key)); + assertEquals(reader.get(key).get(), value); + } + } + + @Test + public void flushOnClose() { + String rangeId = "test_range1"; + ByteString key = ByteString.copyFromUtf8("key"); + ByteString value = ByteString.copyFromUtf8("value"); + IWALableKVSpace keyRange = engine.createIfMissing(rangeId); + keyRange.toWriter().put(key, value).done(); + engine.stop(); + engine = newEngine(); + engine.start(); + keyRange = engine.createIfMissing(rangeId); + try (IKVSpaceReader reader = keyRange.reader()) { + assertTrue(reader.exist(key)); + } + } +} diff --git a/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/AdaptiveWriteBudgetTest.java b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/AdaptiveWriteBudgetTest.java new file mode 100644 index 000000000..35dc385fe --- /dev/null +++ b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/AdaptiveWriteBudgetTest.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine.rocksdb; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; + +import org.testng.annotations.Test; + +public class AdaptiveWriteBudgetTest { + + @Test + public void initialLimitsPositive() { + AdaptiveWriteBudget b = new AdaptiveWriteBudget(); + assertTrue(b.currentEntryLimit() > 0); + assertTrue(b.currentByteLimit() > 0); + assertFalse(b.shouldFlush(0, 0)); + assertTrue(b.shouldFlush(b.currentEntryLimit(), 0)); + assertTrue(b.shouldFlush(0, b.currentByteLimit())); + } + + @Test + public void fastRoundsIncreaseBudgets() { + AdaptiveWriteBudget b = new AdaptiveWriteBudget(); + long e1 = b.currentEntryLimit(); + long by1 = b.currentByteLimit(); + for (int i = 0; i < 3; i++) { + b.recordFlush(e1, by1, 10); // fast + } + assertTrue(b.currentEntryLimit() >= e1); + assertTrue(b.currentByteLimit() >= by1); + } + + @Test + public void slowRoundDecreasesBudgets() { + AdaptiveWriteBudget b = new AdaptiveWriteBudget(); + for (int i = 0; i < 3; i++) { + b.recordFlush(b.currentEntryLimit(), b.currentByteLimit(), 10); + } + long beforeE = b.currentEntryLimit(); + long beforeB = b.currentByteLimit(); + b.recordFlush(beforeE, beforeB, 200); // slow + assertTrue(b.currentEntryLimit() <= beforeE); + assertTrue(b.currentByteLimit() <= beforeB); + } + + @Test + public void emaAndClampBehavior() { + AdaptiveWriteBudget b = new AdaptiveWriteBudget(); + for (int i = 0; i < 10; i++) { + b.recordFlush(100, 1024 * 1024, 30); // fast-ish + b.recordFlush(100, 1024 * 1024, 120); // slow-ish + } + assertTrue(b.currentEntryLimit() >= 1); + assertTrue(b.currentByteLimit() >= 1); + } + + @Test + public void noopOnZeroInputs() { + AdaptiveWriteBudget b = new AdaptiveWriteBudget(); + long e = b.currentEntryLimit(); + long by = b.currentByteLimit(); + b.recordFlush(0, 0, 30); + b.recordFlush(100, 1024, 0); + assertEquals(b.currentEntryLimit(), e); + assertEquals(b.currentByteLimit(), by); + } +} diff --git a/base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/KVSpaceCheckpointTest.java b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/KVSpaceCheckpointTest.java similarity index 100% rename from base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/KVSpaceCheckpointTest.java rename to base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/KVSpaceCheckpointTest.java index 429d53426..5499dc115 100644 --- a/base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/KVSpaceCheckpointTest.java +++ b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/KVSpaceCheckpointTest.java @@ -25,8 +25,6 @@ import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; -import org.apache.bifromq.basekv.localengine.TestUtil; -import org.apache.bifromq.basekv.localengine.metrics.KVSpaceOpMeters; import io.micrometer.core.instrument.Tags; import java.io.File; import java.lang.reflect.Method; @@ -36,6 +34,8 @@ import java.util.UUID; import java.util.function.Predicate; import lombok.SneakyThrows; +import org.apache.bifromq.basekv.localengine.TestUtil; +import org.apache.bifromq.basekv.localengine.metrics.KVSpaceOpMeters; import org.mockito.Mock; import org.rocksdb.Checkpoint; import org.rocksdb.ColumnFamilyDescriptor; diff --git a/base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/OverrideIdentityTest.java b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/OverrideIdentityTest.java similarity index 61% rename from base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/OverrideIdentityTest.java rename to base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/OverrideIdentityTest.java index f999f0163..4f316c4cc 100644 --- a/base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/OverrideIdentityTest.java +++ b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/OverrideIdentityTest.java @@ -19,13 +19,11 @@ package org.apache.bifromq.basekv.localengine.rocksdb; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.DB_CHECKPOINT_ROOT_DIR; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.DB_ROOT_DIR; import static org.testng.Assert.assertEquals; -import org.apache.bifromq.basekv.localengine.ICPableKVEngineConfigurator; -import org.apache.bifromq.basekv.localengine.ICPableKVSpace; -import org.apache.bifromq.basekv.localengine.IKVEngine; -import org.apache.bifromq.basekv.localengine.KVEngineFactory; -import org.apache.bifromq.basekv.localengine.MockableTest; +import com.google.protobuf.Struct; import java.io.File; import java.lang.reflect.Method; import java.nio.file.Files; @@ -34,13 +32,18 @@ import java.util.Comparator; import java.util.UUID; import lombok.SneakyThrows; +import org.apache.bifromq.basekv.localengine.ICPableKVSpace; +import org.apache.bifromq.basekv.localengine.IKVEngine; +import org.apache.bifromq.basekv.localengine.KVEngineFactory; +import org.apache.bifromq.basekv.localengine.MockableTest; +import org.apache.bifromq.basekv.localengine.StructUtil; import org.testng.annotations.Test; public class OverrideIdentityTest extends MockableTest { private final String DB_NAME = "testDB"; private final String DB_CHECKPOINT_DIR = "testDB_cp"; - private IKVEngine engine; public Path dbRootDir; + private IKVEngine engine; @SneakyThrows protected void doSetup(Method method) { @@ -58,33 +61,31 @@ protected void doTeardown(Method method) { @Test public void testOverrideIdentity() { String overrideIdentity = UUID.randomUUID().toString(); - ICPableKVEngineConfigurator configurator = RocksDBCPableKVEngineConfigurator.builder() - .dbRootDir(Paths.get(dbRootDir.toString(), DB_NAME).toString()) - .dbCheckpointRootDir(Paths.get(dbRootDir.toString(), DB_CHECKPOINT_DIR).toString()) + Struct conf = RocksDBDefaultConfigs.CP.toBuilder() + .putFields(DB_ROOT_DIR, StructUtil.toValue(Paths.get(dbRootDir.toString(), DB_NAME).toString())) + .putFields(DB_CHECKPOINT_ROOT_DIR, StructUtil.toValue(Paths.get(dbRootDir.toString(), DB_CHECKPOINT_DIR).toString())) .build(); - engine = KVEngineFactory.createCPable(overrideIdentity, configurator); + engine = KVEngineFactory.createCPable(overrideIdentity, "rocksdb", conf); engine.start(); assertEquals(engine.id(), overrideIdentity); engine.stop(); // restart without overrideIdentity specified - configurator = RocksDBCPableKVEngineConfigurator.builder() - .dbRootDir(Paths.get(dbRootDir.toString(), DB_NAME).toString()) - .dbCheckpointRootDir(Paths.get(dbRootDir.toString(), DB_CHECKPOINT_DIR).toString()) + conf = RocksDBDefaultConfigs.CP.toBuilder() + .putFields(DB_ROOT_DIR, StructUtil.toValue(Paths.get(dbRootDir.toString(), DB_NAME).toString())) + .putFields(DB_CHECKPOINT_ROOT_DIR, StructUtil.toValue(Paths.get(dbRootDir.toString(), DB_CHECKPOINT_DIR).toString())) .build(); - - engine = KVEngineFactory.createCPable(null, configurator); + engine = KVEngineFactory.createCPable(null, "rocksdb", conf); engine.start(); assertEquals(engine.id(), overrideIdentity); engine.stop(); // restart with different overrideIdentity specified String another = UUID.randomUUID().toString(); - configurator = RocksDBCPableKVEngineConfigurator.builder() - .dbRootDir(Paths.get(dbRootDir.toString(), DB_NAME).toString()) - .dbCheckpointRootDir(Paths.get(dbRootDir.toString(), DB_CHECKPOINT_DIR).toString()) + conf = RocksDBDefaultConfigs.CP.toBuilder() + .putFields(DB_ROOT_DIR, StructUtil.toValue(Paths.get(dbRootDir.toString(), DB_NAME).toString())) + .putFields(DB_CHECKPOINT_ROOT_DIR, StructUtil.toValue(Paths.get(dbRootDir.toString(), DB_CHECKPOINT_DIR).toString())) .build(); - - engine = KVEngineFactory.createCPable(another, configurator); + engine = KVEngineFactory.createCPable(another, "rocksdb", conf); engine.start(); assertEquals(engine.id(), overrideIdentity); @@ -93,23 +94,21 @@ public void testOverrideIdentity() { @Test public void testCanOnlyOverrideWhenInit() { - ICPableKVEngineConfigurator configurator = RocksDBCPableKVEngineConfigurator.builder() - .dbRootDir(Paths.get(dbRootDir.toString(), DB_NAME).toString()) - .dbCheckpointRootDir(Paths.get(dbRootDir.toString(), DB_CHECKPOINT_DIR).toString()) + Struct conf = RocksDBDefaultConfigs.CP.toBuilder() + .putFields(DB_ROOT_DIR, StructUtil.toValue(Paths.get(dbRootDir.toString(), DB_NAME).toString())) + .putFields(DB_CHECKPOINT_ROOT_DIR, StructUtil.toValue(Paths.get(dbRootDir.toString(), DB_CHECKPOINT_DIR).toString())) .build(); - - engine = KVEngineFactory.createCPable(null, configurator); + engine = KVEngineFactory.createCPable(null, "rocksdb", conf); engine.start(); String identity = engine.id(); engine.stop(); // restart with overrideIdentity specified String overrideIdentity = UUID.randomUUID().toString(); - configurator = RocksDBCPableKVEngineConfigurator.builder() - .dbRootDir(Paths.get(dbRootDir.toString(), DB_NAME).toString()) - .dbCheckpointRootDir(Paths.get(dbRootDir.toString(), DB_CHECKPOINT_DIR).toString()) + conf = RocksDBDefaultConfigs.CP.toBuilder() + .putFields(DB_ROOT_DIR, StructUtil.toValue(Paths.get(dbRootDir.toString(), DB_NAME).toString())) + .putFields(DB_CHECKPOINT_ROOT_DIR, StructUtil.toValue(Paths.get(dbRootDir.toString(), DB_CHECKPOINT_DIR).toString())) .build(); - - engine = KVEngineFactory.createCPable(overrideIdentity, configurator); + engine = KVEngineFactory.createCPable(overrideIdentity, "rocksdb", conf); engine.start(); assertEquals(engine.id(), identity); diff --git a/base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVEngineTest.java b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVEngineTest.java similarity index 74% rename from base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVEngineTest.java rename to base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVEngineTest.java index ffc201350..df0974467 100644 --- a/base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVEngineTest.java +++ b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVEngineTest.java @@ -19,37 +19,41 @@ package org.apache.bifromq.basekv.localengine.rocksdb; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.DB_CHECKPOINT_ROOT_DIR; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.DB_ROOT_DIR; import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertTrue; -import org.apache.bifromq.basekv.localengine.ICPableKVSpace; -import org.apache.bifromq.basekv.localengine.IKVEngine; -import org.apache.bifromq.basekv.localengine.IKVSpace; +import com.google.protobuf.Struct; +import com.google.protobuf.Value; import io.reactivex.rxjava3.disposables.Disposable; import java.io.File; import java.nio.file.Paths; import lombok.SneakyThrows; +import org.apache.bifromq.basekv.localengine.ICPableKVSpace; +import org.apache.bifromq.basekv.localengine.IKVEngine; +import org.apache.bifromq.basekv.localengine.IKVSpace; import org.testng.annotations.Test; -public class RocksDBCPableKVEngineTest extends AbstractRocksDBKVEngineTest { +public class RocksDBCPableKVEngineTest extends AbstractRocksDBCPableEngineTest { private final String DB_NAME = "testDB"; private final String DB_CHECKPOINT_DIR = "testDB_cp"; - private RocksDBCPableKVEngineConfigurator configurator; + private Struct conf; @SneakyThrows @Override protected void beforeStart() { super.beforeStart(); - configurator = RocksDBCPableKVEngineConfigurator.builder() - .dbRootDir(Paths.get(dbRootDir.toString(), DB_NAME).toString()) - .dbCheckpointRootDir(Paths.get(dbRootDir.toString(), DB_CHECKPOINT_DIR).toString()) + conf = RocksDBDefaultConfigs.CP.toBuilder() + .putFields(DB_ROOT_DIR, Value.newBuilder().setStringValue(Paths.get(dbRootDir.toString(), DB_NAME).toString()).build()) + .putFields(DB_CHECKPOINT_ROOT_DIR, Value.newBuilder().setStringValue(Paths.get(dbRootDir.toString(), DB_CHECKPOINT_DIR).toString()).build()) .build(); } @SneakyThrows @Override protected IKVEngine newEngine() { - return new RocksDBCPableKVEngine(null, configurator); + return new RocksDBCPableKVEngine(null, conf); } @Test diff --git a/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVSpaceCleanupInactiveTest.java b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVSpaceCleanupInactiveTest.java new file mode 100644 index 000000000..536497b79 --- /dev/null +++ b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVSpaceCleanupInactiveTest.java @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine.rocksdb; + +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBCPableKVSpace.ACTIVE_GEN_POINTER; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBKVEngine.IDENTITY_FILE; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; + +import com.google.protobuf.Struct; +import com.google.protobuf.Value; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.UUID; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class RocksDBCPableKVSpaceCleanupInactiveTest { + private Path tmpRoot; + private RocksDBCPableKVEngine engine; + + @BeforeMethod + public void setup() throws Exception { + tmpRoot = Files.createTempDirectory("kvspace_cleanup_"); + } + + @AfterMethod + public void tearDown() throws Exception { + try { + if (engine != null) { + engine.stop(); + } + } catch (Throwable ignore) { + } + if (tmpRoot != null) { + Files.walk(tmpRoot) + .sorted((a, b) -> b.getNameCount() - a.getNameCount()) + .forEach(p -> { + try { + Files.deleteIfExists(p); + } catch (Throwable ignore) { + } + }); + } + } + + @Test + public void testCleanInactiveOnStartup() throws Exception { + String spaceId = "space_cleanup"; + Path dbRoot = tmpRoot.resolve("data"); + Path cpRoot = tmpRoot.resolve("cp"); + Path spaceRoot = dbRoot.resolve(spaceId); + Files.createDirectories(spaceRoot); + Files.createDirectories(cpRoot); + // Ensure engine identity exists when root is non-empty + Files.writeString(dbRoot.resolve(IDENTITY_FILE), "test-id"); + + String active = UUID.randomUUID().toString(); + String inactive = UUID.randomUUID().toString(); + Files.createDirectories(spaceRoot.resolve(active)); + Files.createDirectories(spaceRoot.resolve(inactive)); + Files.writeString(spaceRoot.resolve(ACTIVE_GEN_POINTER), active); + // An unrelated file under root should be removed as well + Files.writeString(spaceRoot.resolve("leftover.txt"), "x"); + + Struct conf = RocksDBDefaultConfigs.CP.toBuilder() + .putFields(RocksDBDefaultConfigs.DB_ROOT_DIR, Value.newBuilder().setStringValue(dbRoot.toString()).build()) + .putFields(RocksDBDefaultConfigs.DB_CHECKPOINT_ROOT_DIR, Value.newBuilder().setStringValue(cpRoot.toString()).build()) + .build(); + engine = new RocksDBCPableKVEngine(null, conf); + engine.start("tag", "value"); + + // After open, only active gen and pointer should remain + long dirs = Files.list(spaceRoot).filter(Files::isDirectory).count(); + long files = Files.list(spaceRoot).filter(Files::isRegularFile).count(); + assertEquals(dirs, 1); + assertEquals(files, 1); + assertTrue(Files.exists(spaceRoot.resolve(active))); + assertTrue(Files.exists(spaceRoot.resolve(ACTIVE_GEN_POINTER))); + } +} diff --git a/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVSpaceFreshBootstrapTest.java b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVSpaceFreshBootstrapTest.java new file mode 100644 index 000000000..73232fd2c --- /dev/null +++ b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVSpaceFreshBootstrapTest.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine.rocksdb; + +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBCPableKVSpace.ACTIVE_GEN_POINTER; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBKVEngine.IDENTITY_FILE; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; + +import com.google.protobuf.ByteString; +import com.google.protobuf.Struct; +import com.google.protobuf.Value; +import java.nio.file.Files; +import java.nio.file.Path; +import org.apache.bifromq.basekv.localengine.ICPableKVSpace; +import org.apache.bifromq.basekv.localengine.IKVSpaceReader; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class RocksDBCPableKVSpaceFreshBootstrapTest { + private Path tmpRoot; + private RocksDBCPableKVEngine engine; + + @BeforeMethod + public void setup() throws Exception { + tmpRoot = Files.createTempDirectory("kvspace_bootstrap_"); + } + + @AfterMethod + public void tearDown() throws Exception { + try { + if (engine != null) { + engine.stop(); + } + } catch (Throwable ignore) { + } + if (tmpRoot != null) { + Files.walk(tmpRoot) + .sorted((a, b) -> b.getNameCount() - a.getNameCount()) + .forEach(p -> { + try { + Files.deleteIfExists(p); + } catch (Throwable ignore) { + } + }); + } + } + + @Test + public void testFreshBootstrapCreatesPointerAndGenDir() throws Exception { + String spaceId = "space_bootstrap"; + Path dbRoot = tmpRoot.resolve("data"); + Path cpRoot = tmpRoot.resolve("cp"); + Files.createDirectories(dbRoot.resolve(spaceId)); + Files.createDirectories(cpRoot); + // Ensure engine identity exists when root is non-empty + Files.writeString(dbRoot.resolve(IDENTITY_FILE), "test-id"); + + Struct conf = RocksDBDefaultConfigs.CP.toBuilder() + .putFields(RocksDBDefaultConfigs.DB_ROOT_DIR, Value.newBuilder().setStringValue(dbRoot.toString()).build()) + .putFields(RocksDBDefaultConfigs.DB_CHECKPOINT_ROOT_DIR, + Value.newBuilder().setStringValue(cpRoot.toString()).build()) + .build(); + engine = new RocksDBCPableKVEngine(null, conf); + engine.start("tag", "value"); + + Path pointer = dbRoot.resolve(spaceId).resolve(ACTIVE_GEN_POINTER); + assertTrue(Files.exists(pointer)); + String uuid = Files.readString(pointer).trim(); + assertFalse(uuid.isEmpty()); + assertTrue(Files.isDirectory(dbRoot.resolve(spaceId).resolve(uuid))); + + ICPableKVSpace space = engine.spaces().get(spaceId); + space.toWriter().put(ByteString.copyFromUtf8("k"), ByteString.copyFromUtf8("v")).done(); + try (IKVSpaceReader reader = space.reader()) { + assertTrue(reader.get(ByteString.copyFromUtf8("k")).isPresent()); + } + } +} diff --git a/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVSpaceLegacyMigrationTest.java b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVSpaceLegacyMigrationTest.java new file mode 100644 index 000000000..c61818c8f --- /dev/null +++ b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVSpaceLegacyMigrationTest.java @@ -0,0 +1,124 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine.rocksdb; + +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBCPableKVSpace.ACTIVE_GEN_POINTER; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBKVEngine.IDENTITY_FILE; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertTrue; + +import com.google.protobuf.ByteString; +import com.google.protobuf.Struct; +import com.google.protobuf.Value; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Map; +import org.apache.bifromq.basekv.localengine.ICPableKVSpace; +import org.apache.bifromq.basekv.localengine.IKVSpaceReader; +import org.rocksdb.Options; +import org.rocksdb.RocksDB; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class RocksDBCPableKVSpaceLegacyMigrationTest { + private Path tmpRoot; + private RocksDBCPableKVEngine engine; + + @BeforeMethod + public void setup() throws Exception { + // Prepare a temp root for db and checkpoint + tmpRoot = Files.createTempDirectory("kvspace_legacy_migration_"); + } + + @AfterMethod + public void tearDown() throws Exception { + try { + if (engine != null) { + engine.stop(); + } + } catch (Throwable ignore) { + } + // Cleanup temp dir recursively + if (tmpRoot != null) { + Files.walk(tmpRoot) + .sorted((a, b) -> b.getNameCount() - a.getNameCount()) + .forEach(p -> { + try { + Files.deleteIfExists(p); + } catch (Throwable ignore) { + } + }); + } + } + + @Test + public void testLegacyLayoutMigratedInConstructor() throws Exception { + String spaceId = "space1"; + Path dbRoot = tmpRoot.resolve("data"); + Path cpRoot = tmpRoot.resolve("cp"); + Files.createDirectories(dbRoot); + Files.createDirectories(cpRoot); + // Ensure engine identity exists when root is non-empty + Files.writeString(dbRoot.resolve(IDENTITY_FILE), "test-id"); + + // Simulate legacy layout: create a valid RocksDB directly under / + Path legacyRoot = dbRoot.resolve(spaceId); + Files.createDirectories(legacyRoot); + try (Options options = new Options().setCreateIfMissing(true)) { + try (RocksDB db = RocksDB.open(options, legacyRoot.toString())) { + // put a tiny entry to ensure DB files created + db.put("k".getBytes(), "v".getBytes()); + } + } + + // Build engine and start: constructor of space should migrate legacy layout immediately + Struct conf = RocksDBDefaultConfigs.CP.toBuilder() + .putFields(RocksDBDefaultConfigs.DB_ROOT_DIR, Value.newBuilder().setStringValue(dbRoot.toString()).build()) + .putFields(RocksDBDefaultConfigs.DB_CHECKPOINT_ROOT_DIR, Value.newBuilder().setStringValue(cpRoot.toString()).build()) + .build(); + engine = new RocksDBCPableKVEngine(null, conf); + engine.start("tag", "value"); + + Map spaces = engine.spaces(); + ICPableKVSpace space = spaces.get(spaceId); + assertNotNull(space); + + // Verify pointer file created and points to a uuid directory + Path pointer = legacyRoot.resolve(ACTIVE_GEN_POINTER); + assertTrue(Files.exists(pointer)); + String uuid = Files.readString(pointer).trim(); + assertFalse(uuid.isEmpty()); + Path currentGenDir = legacyRoot.resolve(uuid); + assertTrue(Files.isDirectory(currentGenDir)); + + // Write and read to ensure DB works after migration + var writer = space.toWriter(); + writer.put(ByteString.copyFromUtf8("a"), ByteString.copyFromUtf8("b")); + writer.done(); + try (IKVSpaceReader reader = space.reader()) { + var val = reader.get(ByteString.copyFromUtf8("a")); + assertTrue(val.isPresent()); + assertEquals(val.get().toStringUtf8(), "b"); + } + } +} diff --git a/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVSpacePointerRecoveryTest.java b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVSpacePointerRecoveryTest.java new file mode 100644 index 000000000..91591cec7 --- /dev/null +++ b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVSpacePointerRecoveryTest.java @@ -0,0 +1,138 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine.rocksdb; + +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBCPableKVSpace.ACTIVE_GEN_POINTER; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBKVEngine.IDENTITY_FILE; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertNotEquals; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertTrue; + +import com.google.protobuf.ByteString; +import com.google.protobuf.Struct; +import com.google.protobuf.Value; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.UUID; +import org.apache.bifromq.basekv.localengine.ICPableKVSpace; +import org.apache.bifromq.basekv.localengine.IKVSpaceReader; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class RocksDBCPableKVSpacePointerRecoveryTest { + private Path tmpRoot; + private RocksDBCPableKVEngine engine; + + @BeforeMethod + public void setup() throws Exception { + tmpRoot = Files.createTempDirectory("kvspace_ptr_recover_"); + } + + @AfterMethod + public void tearDown() throws Exception { + try { + if (engine != null) { + engine.stop(); + } + } catch (Throwable ignore) { + } + if (tmpRoot != null) { + Files.walk(tmpRoot) + .sorted((a, b) -> b.getNameCount() - a.getNameCount()) + .forEach(p -> { + try { + Files.deleteIfExists(p); + } catch (Throwable ignore) { + } + }); + } + } + + @Test + public void testPointerPointsToMissingDir() throws Exception { + String spaceId = "space_ptr_missing"; + Path dbRoot = tmpRoot.resolve("data"); + Path cpRoot = tmpRoot.resolve("cp"); + Files.createDirectories(dbRoot.resolve(spaceId)); + Files.createDirectories(cpRoot); + // Ensure engine identity exists when root is non-empty + Files.writeString(dbRoot.resolve(IDENTITY_FILE), "test-id"); + + // Write pointer to a non-existing uuid + String missingUUID = UUID.randomUUID().toString(); + Files.writeString(dbRoot.resolve(spaceId).resolve(ACTIVE_GEN_POINTER), missingUUID); + + Struct conf = RocksDBDefaultConfigs.CP.toBuilder() + .putFields(RocksDBDefaultConfigs.DB_ROOT_DIR, Value.newBuilder().setStringValue(dbRoot.toString()).build()) + .putFields(RocksDBDefaultConfigs.DB_CHECKPOINT_ROOT_DIR, Value.newBuilder().setStringValue(cpRoot.toString()).build()) + .build(); + engine = new RocksDBCPableKVEngine(null, conf); + engine.start("tag", "value"); + + ICPableKVSpace space = engine.spaces().get(spaceId); + assertNotNull(space); + + String current = Files.readString(dbRoot.resolve(spaceId).resolve(ACTIVE_GEN_POINTER)).trim(); + assertFalse(current.isEmpty()); + assertNotEquals(missingUUID, current); + assertTrue(Files.isDirectory(dbRoot.resolve(spaceId).resolve(current))); + + // simple rw + space.toWriter().put(ByteString.copyFromUtf8("k"), ByteString.copyFromUtf8("v")).done(); + try (IKVSpaceReader reader = space.reader()) { + assertEquals(reader.get(ByteString.copyFromUtf8("k")).get().toStringUtf8(), "v"); + } + } + + @Test + public void testPointerValidRemains() throws Exception { + String spaceId = "space_ptr_valid"; + Path dbRoot = tmpRoot.resolve("data"); + Path cpRoot = tmpRoot.resolve("cp"); + Files.createDirectories(dbRoot); + Files.createDirectories(cpRoot); + Path spaceRoot = dbRoot.resolve(spaceId); + Files.createDirectories(spaceRoot); + // Ensure engine identity exists when root is non-empty + Files.writeString(dbRoot.resolve(IDENTITY_FILE), "test-id"); + String uuid = UUID.randomUUID().toString(); + Files.createDirectories(spaceRoot.resolve(uuid)); + Files.writeString(spaceRoot.resolve(ACTIVE_GEN_POINTER), uuid); + + Struct conf = RocksDBDefaultConfigs.CP.toBuilder() + .putFields(RocksDBDefaultConfigs.DB_ROOT_DIR, Value.newBuilder().setStringValue(dbRoot.toString()).build()) + .putFields(RocksDBDefaultConfigs.DB_CHECKPOINT_ROOT_DIR, Value.newBuilder().setStringValue(cpRoot.toString()).build()) + .build(); + engine = new RocksDBCPableKVEngine(null, conf); + engine.start("tag", "value"); + + String after = Files.readString(spaceRoot.resolve(ACTIVE_GEN_POINTER)).trim(); + assertEquals(after, uuid); + + ICPableKVSpace space = engine.spaces().get(spaceId); + space.toWriter().put(ByteString.copyFromUtf8("a"), ByteString.copyFromUtf8("b")).done(); + try (IKVSpaceReader reader = space.reader()) { + assertEquals(reader.get(ByteString.copyFromUtf8("a")).get().toStringUtf8(), "b"); + } + } +} diff --git a/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVSpaceRestoreAbortTest.java b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVSpaceRestoreAbortTest.java new file mode 100644 index 000000000..28eeec2e6 --- /dev/null +++ b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVSpaceRestoreAbortTest.java @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine.rocksdb; + +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBCPableKVSpace.ACTIVE_GEN_POINTER; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; + +import com.google.protobuf.ByteString; +import com.google.protobuf.Struct; +import com.google.protobuf.Value; +import java.nio.file.Files; +import java.nio.file.Path; +import org.apache.bifromq.basekv.localengine.ICPableKVSpace; +import org.apache.bifromq.basekv.localengine.IKVSpaceReader; +import org.apache.bifromq.basekv.localengine.IRestoreSession; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class RocksDBCPableKVSpaceRestoreAbortTest { + private Path tmpRoot; + private RocksDBCPableKVEngine engine; + + @BeforeMethod + public void setup() throws Exception { + tmpRoot = Files.createTempDirectory("kvspace_restore_abort_"); + } + + @AfterMethod + public void tearDown() throws Exception { + try { + if (engine != null) { + engine.stop(); + } + } catch (Throwable ignore) { + } + if (tmpRoot != null) { + Files.walk(tmpRoot) + .sorted((a, b) -> b.getNameCount() - a.getNameCount()) + .forEach(p -> { + try { + Files.deleteIfExists(p); + } catch (Throwable ignore) { + } + }); + } + } + + @Test + public void testAbortNoEffectAndPointerUnchanged() throws Exception { + String spaceId = "space_abort"; + Path dbRoot = tmpRoot.resolve("data"); + Path cpRoot = tmpRoot.resolve("cp"); + Files.createDirectories(dbRoot); + Files.createDirectories(cpRoot); + + Struct conf = RocksDBDefaultConfigs.CP.toBuilder() + .putFields(RocksDBDefaultConfigs.DB_ROOT_DIR, Value.newBuilder().setStringValue(dbRoot.toString()).build()) + .putFields(RocksDBDefaultConfigs.DB_CHECKPOINT_ROOT_DIR, Value.newBuilder().setStringValue(cpRoot.toString()).build()) + .build(); + engine = new RocksDBCPableKVEngine(null, conf); + engine.start("tag", "value"); + + ICPableKVSpace space = engine.createIfMissing(spaceId); + Path pointer = dbRoot.resolve(spaceId).resolve(ACTIVE_GEN_POINTER); + String before = Files.readString(pointer).trim(); + + IRestoreSession session = space.startRestore((c, b) -> {}); + session.put(ByteString.copyFromUtf8("k"), ByteString.copyFromUtf8("v")); + session.abort(); + + // key not present + try (IKVSpaceReader reader = space.reader()) { + assertFalse(reader.get(ByteString.copyFromUtf8("k")).isPresent()); + } + // pointer unchanged + String after = Files.readString(pointer).trim(); + assertEquals(after, before); + } +} diff --git a/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVSpaceRestoreBulkTest.java b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVSpaceRestoreBulkTest.java new file mode 100644 index 000000000..86d19cec1 --- /dev/null +++ b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVSpaceRestoreBulkTest.java @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine.rocksdb; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; + +import com.google.protobuf.ByteString; +import com.google.protobuf.Struct; +import com.google.protobuf.Value; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Random; +import org.apache.bifromq.basekv.localengine.ICPableKVSpace; +import org.apache.bifromq.basekv.localengine.IKVSpaceReader; +import org.apache.bifromq.basekv.localengine.IRestoreSession; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class RocksDBCPableKVSpaceRestoreBulkTest { + private Path tmpRoot; + private RocksDBCPableKVEngine engine; + + @BeforeMethod + public void setup() throws Exception { + tmpRoot = Files.createTempDirectory("kvspace_restore_bulk_"); + } + + @AfterMethod + public void tearDown() throws Exception { + try { + if (engine != null) { + engine.stop(); + } + } catch (Throwable ignore) { + } + if (tmpRoot != null) { + Files.walk(tmpRoot) + .sorted((a, b) -> b.getNameCount() - a.getNameCount()) + .forEach(p -> { + try { + Files.deleteIfExists(p); + } catch (Throwable ignore) { + } + }); + } + } + + @Test + public void testBulkRestoreReplace() throws Exception { + String spaceId = "space_bulk"; + Path dbRoot = tmpRoot.resolve("data"); + Path cpRoot = tmpRoot.resolve("cp"); + Files.createDirectories(dbRoot); + Files.createDirectories(cpRoot); + + Struct conf = RocksDBDefaultConfigs.CP.toBuilder() + .putFields(RocksDBDefaultConfigs.DB_ROOT_DIR, Value.newBuilder().setStringValue(dbRoot.toString()).build()) + .putFields(RocksDBDefaultConfigs.DB_CHECKPOINT_ROOT_DIR, Value.newBuilder().setStringValue(cpRoot.toString()).build()) + .build(); + engine = new RocksDBCPableKVEngine(null, conf); + engine.start("tag", "value"); + + ICPableKVSpace space = engine.createIfMissing(spaceId); + IRestoreSession session = space.startRestore((c, b) -> {}); + int n = 5000; + for (int i = 0; i < n; i++) { + session.put(ByteString.copyFromUtf8("k" + i), ByteString.copyFromUtf8("v" + i)); + } + session.done(); + + // random spot-check 10 keys + Random r = new Random(0); + try (IKVSpaceReader reader = space.reader()) { + for (int i = 0; i < 10; i++) { + int idx = r.nextInt(n); + assertEquals(reader.get(ByteString.copyFromUtf8("k" + idx)).get().toStringUtf8(), "v" + idx); + } + } + // ensure at least one key exists + assertTrue(space.size() > 0); + } +} diff --git a/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVSpaceRestoreFlushListenerTest.java b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVSpaceRestoreFlushListenerTest.java new file mode 100644 index 000000000..316a36dd4 --- /dev/null +++ b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVSpaceRestoreFlushListenerTest.java @@ -0,0 +1,137 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine.rocksdb; + +import static org.testng.Assert.assertTrue; + +import com.google.protobuf.ByteString; +import com.google.protobuf.Struct; +import com.google.protobuf.Value; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; +import org.apache.bifromq.basekv.localengine.ICPableKVSpace; +import org.apache.bifromq.basekv.localengine.IRestoreSession; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class RocksDBCPableKVSpaceRestoreFlushListenerTest { + private Path tmpRoot; + private RocksDBCPableKVEngine engine; + + @BeforeMethod + public void setup() throws Exception { + tmpRoot = Files.createTempDirectory("kvspace_restore_flush_"); + } + + @AfterMethod + public void tearDown() throws Exception { + try { + if (engine != null) { + engine.stop(); + } + } catch (Throwable ignore) { + } + if (tmpRoot != null) { + Files.walk(tmpRoot) + .sorted((a, b) -> b.getNameCount() - a.getNameCount()) + .forEach(p -> { + try { + Files.deleteIfExists(p); + } catch (Throwable ignore) { + } + }); + } + } + + @Test + public void testBulkRestoreReportsFlushes() throws Exception { + String spaceId = "space_flush_bulk"; + Path dbRoot = tmpRoot.resolve("data"); + Path cpRoot = tmpRoot.resolve("cp"); + Files.createDirectories(dbRoot); + Files.createDirectories(cpRoot); + + Struct conf = RocksDBDefaultConfigs.CP.toBuilder() + .putFields(RocksDBDefaultConfigs.DB_ROOT_DIR, Value.newBuilder().setStringValue(dbRoot.toString()).build()) + .putFields(RocksDBDefaultConfigs.DB_CHECKPOINT_ROOT_DIR, Value.newBuilder().setStringValue(cpRoot.toString()).build()) + .build(); + engine = new RocksDBCPableKVEngine(null, conf); + engine.start("tag", "value"); + + ICPableKVSpace space = engine.createIfMissing(spaceId); + AtomicInteger callbackCount = new AtomicInteger(); + AtomicLong totalEntries = new AtomicLong(); + AtomicLong totalBytes = new AtomicLong(); + IRestoreSession session = space.startRestore((c, b) -> { + // collect flush metrics + callbackCount.incrementAndGet(); + totalEntries.addAndGet(c); + totalBytes.addAndGet(b); + }); + + int n = 6000; // exceed default entry budget to trigger flush + for (int i = 0; i < n; i++) { + ByteString k = ByteString.copyFromUtf8("k" + i); + ByteString v = ByteString.copyFromUtf8("v" + i); + session.put(k, v); + } + session.done(); + + assertTrue(callbackCount.get() >= 1, "should flush at least once during bulk restore"); + assertTrue(totalEntries.get() > 0, "reported entries should be positive"); + assertTrue(totalBytes.get() > 0, "reported bytes should be positive"); + } + + @Test + public void testOverlayRestoreReportsFlushes() throws Exception { + String spaceId = "space_flush_overlay"; + Path dbRoot = tmpRoot.resolve("data2"); + Path cpRoot = tmpRoot.resolve("cp2"); + Files.createDirectories(dbRoot); + Files.createDirectories(cpRoot); + + Struct conf = RocksDBDefaultConfigs.CP.toBuilder() + .putFields(RocksDBDefaultConfigs.DB_ROOT_DIR, Value.newBuilder().setStringValue(dbRoot.toString()).build()) + .putFields(RocksDBDefaultConfigs.DB_CHECKPOINT_ROOT_DIR, Value.newBuilder().setStringValue(cpRoot.toString()).build()) + .build(); + engine = new RocksDBCPableKVEngine(null, conf); + engine.start("tag", "value"); + + ICPableKVSpace space = engine.createIfMissing(spaceId); + // pre-write something to ensure active DB is not empty + space.toWriter().put(ByteString.copyFromUtf8("pre"), ByteString.copyFromUtf8("p")).done(); + + AtomicInteger callbackCount = new AtomicInteger(); + IRestoreSession session = space.startReceiving((c, b) -> callbackCount.incrementAndGet()); + + int n = 4000; + for (int i = 0; i < n; i++) { + ByteString k = ByteString.copyFromUtf8("ok" + i); + ByteString v = ByteString.copyFromUtf8("ov" + i); + session.put(k, v); + } + session.done(); + + assertTrue(callbackCount.get() >= 1, "should flush at least once during overlay restore"); + } +} diff --git a/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVSpaceRestoreOverlayTest.java b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVSpaceRestoreOverlayTest.java new file mode 100644 index 000000000..1f2ba4c42 --- /dev/null +++ b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVSpaceRestoreOverlayTest.java @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine.rocksdb; + +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBCPableKVSpace.ACTIVE_GEN_POINTER; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotEquals; +import static org.testng.Assert.assertTrue; + +import com.google.protobuf.ByteString; +import com.google.protobuf.Struct; +import com.google.protobuf.Value; +import java.nio.file.Files; +import java.nio.file.Path; +import org.apache.bifromq.basekv.localengine.ICPableKVSpace; +import org.apache.bifromq.basekv.localengine.IKVSpaceReader; +import org.apache.bifromq.basekv.localengine.IRestoreSession; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class RocksDBCPableKVSpaceRestoreOverlayTest { + private Path tmpRoot; + private RocksDBCPableKVEngine engine; + + @BeforeMethod + public void setup() throws Exception { + tmpRoot = Files.createTempDirectory("kvspace_restore_overlay_"); + } + + @AfterMethod + public void tearDown() throws Exception { + try { + if (engine != null) { + engine.stop(); + } + } catch (Throwable ignore) { + } + if (tmpRoot != null) { + Files.walk(tmpRoot) + .sorted((a, b) -> b.getNameCount() - a.getNameCount()) + .forEach(p -> { + try { + Files.deleteIfExists(p); + } catch (Throwable ignore) { + } + }); + } + } + + @Test + public void testOverlayKeepsExistingAndOverrides() throws Exception { + String spaceId = "space_overlay"; + Path dbRoot = tmpRoot.resolve("data"); + Path cpRoot = tmpRoot.resolve("cp"); + Files.createDirectories(dbRoot); + Files.createDirectories(cpRoot); + + Struct conf = RocksDBDefaultConfigs.CP.toBuilder() + .putFields(RocksDBDefaultConfigs.DB_ROOT_DIR, Value.newBuilder().setStringValue(dbRoot.toString()).build()) + .putFields(RocksDBDefaultConfigs.DB_CHECKPOINT_ROOT_DIR, Value.newBuilder().setStringValue(cpRoot.toString()).build()) + .build(); + engine = new RocksDBCPableKVEngine(null, conf); + engine.start("tag", "value"); + ICPableKVSpace space = engine.createIfMissing(spaceId); + + // pre-existing data + ByteString keyA = ByteString.copyFromUtf8("A"); + ByteString oldA = ByteString.copyFromUtf8("oldA"); + ByteString keyB = ByteString.copyFromUtf8("B"); + ByteString oldB = ByteString.copyFromUtf8("oldB"); + ByteString mx = ByteString.copyFromUtf8("x"); + ByteString mvx = ByteString.copyFromUtf8("mx"); + space.toWriter().put(keyA, oldA).put(keyB, oldB).metadata(mx, mvx).done(); + + Path pointer = dbRoot.resolve(spaceId).resolve(ACTIVE_GEN_POINTER); + String before = Files.readString(pointer).trim(); + + IRestoreSession session = space.startReceiving((c, b) -> {}); + ByteString newA = ByteString.copyFromUtf8("newA"); + ByteString keyC = ByteString.copyFromUtf8("C"); + ByteString valC = ByteString.copyFromUtf8("valC"); + ByteString my = ByteString.copyFromUtf8("y"); + ByteString mvy = ByteString.copyFromUtf8("my"); + session.put(keyA, newA).put(keyC, valC).metadata(my, mvy); + session.done(); + + // A overridden, B kept, C added + try (IKVSpaceReader reader = space.reader()) { + assertEquals(reader.get(keyA).get().toStringUtf8(), "newA"); + assertEquals(reader.get(keyB).get().toStringUtf8(), "oldB"); + assertEquals(reader.get(keyC).get().toStringUtf8(), "valC"); + assertTrue(reader.metadata(mx).isPresent()); + assertTrue(reader.metadata(my).isPresent()); + } + + String after = Files.readString(pointer).trim(); + assertNotEquals(after, before); + } +} diff --git a/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVSpaceRestoreReplaceTest.java b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVSpaceRestoreReplaceTest.java new file mode 100644 index 000000000..118f836da --- /dev/null +++ b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVSpaceRestoreReplaceTest.java @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine.rocksdb; + +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBCPableKVSpace.ACTIVE_GEN_POINTER; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertNotEquals; +import static org.testng.Assert.assertTrue; + +import com.google.protobuf.ByteString; +import com.google.protobuf.Struct; +import com.google.protobuf.Value; +import java.nio.file.Files; +import java.nio.file.Path; +import org.apache.bifromq.basekv.localengine.ICPableKVSpace; +import org.apache.bifromq.basekv.localengine.IKVSpaceReader; +import org.apache.bifromq.basekv.localengine.IRestoreSession; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class RocksDBCPableKVSpaceRestoreReplaceTest { + private Path tmpRoot; + private RocksDBCPableKVEngine engine; + + @BeforeMethod + public void setup() throws Exception { + tmpRoot = Files.createTempDirectory("kvspace_restore_replace_"); + } + + @AfterMethod + public void tearDown() throws Exception { + try { + if (engine != null) { + engine.stop(); + } + } catch (Throwable ignore) { + } + if (tmpRoot != null) { + Files.walk(tmpRoot) + .sorted((a, b) -> b.getNameCount() - a.getNameCount()) + .forEach(p -> { + try { + Files.deleteIfExists(p); + } catch (Throwable ignore) { + } + }); + } + } + + @Test + public void testReplaceReplacesAllAndUpdatesPointer() throws Exception { + String spaceId = "space_replace"; + Path dbRoot = tmpRoot.resolve("data"); + Path cpRoot = tmpRoot.resolve("cp"); + Files.createDirectories(dbRoot); + Files.createDirectories(cpRoot); + + Struct conf = RocksDBDefaultConfigs.CP.toBuilder() + .putFields(RocksDBDefaultConfigs.DB_ROOT_DIR, Value.newBuilder().setStringValue(dbRoot.toString()).build()) + .putFields(RocksDBDefaultConfigs.DB_CHECKPOINT_ROOT_DIR, Value.newBuilder().setStringValue(cpRoot.toString()).build()) + .build(); + engine = new RocksDBCPableKVEngine(null, conf); + engine.start("tag", "value"); + + ICPableKVSpace space = engine.createIfMissing(spaceId); + // prepare existing data + ByteString k1 = ByteString.copyFromUtf8("k1"); + ByteString v1 = ByteString.copyFromUtf8("v1"); + ByteString m1 = ByteString.copyFromUtf8("m1"); + ByteString mv1 = ByteString.copyFromUtf8("mv1"); + space.toWriter().put(k1, v1).metadata(m1, mv1).done(); + try (IKVSpaceReader reader = space.reader()) { + assertTrue(reader.exist(k1)); + } + + Path pointer = dbRoot.resolve(spaceId).resolve(ACTIVE_GEN_POINTER); + String before = Files.readString(pointer).trim(); + + // restore with replace + IRestoreSession session = space.startRestore((c, b) -> {}); + ByteString k2 = ByteString.copyFromUtf8("k2"); + ByteString v2 = ByteString.copyFromUtf8("v2"); + ByteString m2 = ByteString.copyFromUtf8("m2"); + ByteString mv2 = ByteString.copyFromUtf8("mv2"); + session.put(k2, v2); + session.metadata(m2, mv2); + session.done(); + + // old key removed, new key present + try (IKVSpaceReader reader = space.reader()) { + assertFalse(reader.exist(k1)); + assertTrue(reader.exist(k2)); + assertTrue(reader.metadata(m2).isPresent()); + assertFalse(reader.metadata(m1).isPresent()); + } + + String after = Files.readString(pointer).trim(); + assertNotEquals(after, before); + + // idempotent overwrite in replace mode + IRestoreSession s2 = space.startRestore((c, b) -> {}); + s2.put(k2, ByteString.copyFromUtf8("v3")); + s2.put(k2, ByteString.copyFromUtf8("v4")); + s2.done(); + try (IKVSpaceReader reader = space.reader()) { + assertEquals(reader.get(k2).get().toStringUtf8(), "v4"); + } + } +} diff --git a/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVSpaceRestoreRestartCleanupTest.java b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVSpaceRestoreRestartCleanupTest.java new file mode 100644 index 000000000..c125bb215 --- /dev/null +++ b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVSpaceRestoreRestartCleanupTest.java @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine.rocksdb; + +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBCPableKVSpace.ACTIVE_GEN_POINTER; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotEquals; + +import com.google.protobuf.ByteString; +import com.google.protobuf.Struct; +import com.google.protobuf.Value; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.stream.Stream; +import org.apache.bifromq.basekv.localengine.ICPableKVSpace; +import org.apache.bifromq.basekv.localengine.IRestoreSession; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class RocksDBCPableKVSpaceRestoreRestartCleanupTest { + private Path tmpRoot; + private RocksDBCPableKVEngine engine; + + @BeforeMethod + public void setup() throws Exception { + tmpRoot = Files.createTempDirectory("kvspace_restore_restart_"); + } + + @AfterMethod + public void tearDown() throws Exception { + try { + if (engine != null) { + engine.stop(); + } + } catch (Throwable ignore) { + } + if (tmpRoot != null) { + Files.walk(tmpRoot) + .sorted((a, b) -> b.getNameCount() - a.getNameCount()) + .forEach(p -> { + try { + Files.deleteIfExists(p); + } catch (Throwable ignore) { + } + }); + } + } + + @Test + public void testRestartCleansInactiveGenerations() throws Exception { + String spaceId = "space_restart_cleanup"; + Path dbRoot = tmpRoot.resolve("data"); + Path cpRoot = tmpRoot.resolve("cp"); + Files.createDirectories(dbRoot); + Files.createDirectories(cpRoot); + + Struct conf = RocksDBDefaultConfigs.CP.toBuilder() + .putFields(RocksDBDefaultConfigs.DB_ROOT_DIR, Value.newBuilder().setStringValue(dbRoot.toString()).build()) + .putFields(RocksDBDefaultConfigs.DB_CHECKPOINT_ROOT_DIR, Value.newBuilder().setStringValue(cpRoot.toString()).build()) + .build(); + engine = new RocksDBCPableKVEngine(null, conf); + engine.start("tag", "value"); + ICPableKVSpace space = engine.createIfMissing(spaceId); + + // initial write + space.toWriter().put(ByteString.copyFromUtf8("k"), ByteString.copyFromUtf8("v")).done(); + Path spaceRoot = dbRoot.resolve(spaceId); + String before = Files.readString(spaceRoot.resolve(ACTIVE_GEN_POINTER)).trim(); + + // restore replace to switch generation + IRestoreSession session = space.startRestore((c, b) -> {}); + session.put(ByteString.copyFromUtf8("k2"), ByteString.copyFromUtf8("v2")); + session.done(); + String after = Files.readString(spaceRoot.resolve(ACTIVE_GEN_POINTER)).trim(); + assertNotEquals(after, before); + + // stop/start to trigger cleanInactiveOnStartup + engine.stop(); + engine = new RocksDBCPableKVEngine(null, conf); + engine.start("tag", "value"); + + long dirCount; + long fileCount; + try (Stream s = Files.list(spaceRoot)) { + dirCount = s.filter(Files::isDirectory).count(); + } + try (Stream s = Files.list(spaceRoot)) { + fileCount = s.filter(Files::isRegularFile).count(); + } + assertEquals(dirCount, 1); + assertEquals(fileCount, 1); + } +} diff --git a/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVSpaceRestoreSessionLifecycleTest.java b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVSpaceRestoreSessionLifecycleTest.java new file mode 100644 index 000000000..0b7d9b1ca --- /dev/null +++ b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVSpaceRestoreSessionLifecycleTest.java @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine.rocksdb; + +import static org.testng.Assert.assertThrows; +import static org.testng.Assert.assertTrue; + +import com.google.protobuf.ByteString; +import com.google.protobuf.Struct; +import com.google.protobuf.Value; +import java.nio.file.Files; +import java.nio.file.Path; +import org.apache.bifromq.basekv.localengine.ICPableKVSpace; +import org.apache.bifromq.basekv.localengine.IKVSpaceReader; +import org.apache.bifromq.basekv.localengine.IRestoreSession; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class RocksDBCPableKVSpaceRestoreSessionLifecycleTest { + private Path tmpRoot; + private RocksDBCPableKVEngine engine; + + @BeforeMethod + public void setup() throws Exception { + tmpRoot = Files.createTempDirectory("kvspace_restore_lifecycle_"); + } + + @AfterMethod + public void tearDown() throws Exception { + try { + if (engine != null) { + engine.stop(); + } + } catch (Throwable ignore) { + } + if (tmpRoot != null) { + Files.walk(tmpRoot) + .sorted((a, b) -> b.getNameCount() - a.getNameCount()) + .forEach(p -> { + try { + Files.deleteIfExists(p); + } catch (Throwable ignore) { + } + }); + } + } + + @Test + public void testSessionClosedAfterDoneOrAbort() throws Exception { + String spaceId = "space_lifecycle"; + Path dbRoot = tmpRoot.resolve("data"); + Path cpRoot = tmpRoot.resolve("cp"); + Files.createDirectories(dbRoot); + Files.createDirectories(cpRoot); + + Struct conf = RocksDBDefaultConfigs.CP.toBuilder() + .putFields(RocksDBDefaultConfigs.DB_ROOT_DIR, Value.newBuilder().setStringValue(dbRoot.toString()).build()) + .putFields(RocksDBDefaultConfigs.DB_CHECKPOINT_ROOT_DIR, Value.newBuilder().setStringValue(cpRoot.toString()).build()) + .build(); + engine = new RocksDBCPableKVEngine(null, conf); + engine.start("tag", "value"); + + ICPableKVSpace space = engine.createIfMissing(spaceId); + + // done closes session + IRestoreSession s1 = space.startRestore((c, b) -> {}); + s1.put(ByteString.copyFromUtf8("k"), ByteString.copyFromUtf8("v")); + s1.done(); + try (IKVSpaceReader reader = space.reader()) { + assertTrue(reader.get(ByteString.copyFromUtf8("k")).isPresent()); + } + assertThrows(IllegalStateException.class, () -> s1.put(ByteString.copyFromUtf8("k2"), ByteString.copyFromUtf8("v2"))); + assertThrows(IllegalStateException.class, () -> s1.metadata(ByteString.copyFromUtf8("m"), ByteString.copyFromUtf8("mv"))); + + // abort closes session + IRestoreSession s2 = space.startRestore((c, b) -> {}); + s2.put(ByteString.copyFromUtf8("x"), ByteString.copyFromUtf8("y")); + s2.abort(); + assertThrows(IllegalStateException.class, () -> s2.put(ByteString.copyFromUtf8("x2"), ByteString.copyFromUtf8("y2"))); + assertThrows(IllegalStateException.class, () -> s2.metadata(ByteString.copyFromUtf8("m2"), ByteString.copyFromUtf8("mv2"))); + } +} diff --git a/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVSpaceRestoreSwitchPointerTest.java b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVSpaceRestoreSwitchPointerTest.java new file mode 100644 index 000000000..339ff8817 --- /dev/null +++ b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVSpaceRestoreSwitchPointerTest.java @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine.rocksdb; + +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBCPableKVSpace.ACTIVE_GEN_POINTER; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBKVEngine.IDENTITY_FILE; +import static org.testng.Assert.assertNotEquals; +import static org.testng.Assert.assertTrue; + +import com.google.protobuf.ByteString; +import com.google.protobuf.Struct; +import com.google.protobuf.Value; +import java.nio.file.Files; +import java.nio.file.Path; +import org.apache.bifromq.basekv.localengine.ICPableKVSpace; +import org.apache.bifromq.basekv.localengine.IKVSpaceRefreshableReader; +import org.apache.bifromq.basekv.localengine.IRestoreSession; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class RocksDBCPableKVSpaceRestoreSwitchPointerTest { + private Path tmpRoot; + private RocksDBCPableKVEngine engine; + + @BeforeMethod + public void setup() throws Exception { + tmpRoot = Files.createTempDirectory("kvspace_restore_"); + } + + @AfterMethod + public void tearDown() throws Exception { + try { + if (engine != null) { + engine.stop(); + } + } catch (Throwable ignore) { + } + if (tmpRoot != null) { + Files.walk(tmpRoot) + .sorted((a, b) -> b.getNameCount() - a.getNameCount()) + .forEach(p -> { + try { + Files.deleteIfExists(p); + } catch (Throwable ignore) { + } + }); + } + } + + @Test + public void testPointerUpdatedAfterRestoreReplace() throws Exception { + String spaceId = "space_restore"; + Path dbRoot = tmpRoot.resolve("data"); + Path cpRoot = tmpRoot.resolve("cp"); + Files.createDirectories(dbRoot.resolve(spaceId)); + Files.createDirectories(cpRoot); + // Ensure engine identity exists when root is non-empty + Files.writeString(dbRoot.resolve(IDENTITY_FILE), "test-id"); + + Struct conf = RocksDBDefaultConfigs.CP.toBuilder() + .putFields(RocksDBDefaultConfigs.DB_ROOT_DIR, Value.newBuilder().setStringValue(dbRoot.toString()).build()) + .putFields(RocksDBDefaultConfigs.DB_CHECKPOINT_ROOT_DIR, Value.newBuilder().setStringValue(cpRoot.toString()).build()) + .build(); + engine = new RocksDBCPableKVEngine(null, conf); + engine.start("tag", "value"); + + ICPableKVSpace space = engine.spaces().get(spaceId); + + Path pointer = dbRoot.resolve(spaceId).resolve(ACTIVE_GEN_POINTER); + String before = Files.readString(pointer).trim(); + + IRestoreSession session = space.startRestore((c, b) -> {}); + session.put(ByteString.copyFromUtf8("k1"), ByteString.copyFromUtf8("v1")); + session.done(); + + String after = Files.readString(pointer).trim(); + assertNotEquals(after, before); + try (IKVSpaceRefreshableReader reader = space.reader()) { + assertTrue(reader.get(ByteString.copyFromUtf8("k1")).isPresent()); + } + } +} diff --git a/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVSpaceRuntimeCleanerTest.java b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVSpaceRuntimeCleanerTest.java new file mode 100644 index 000000000..06d6a2602 --- /dev/null +++ b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVSpaceRuntimeCleanerTest.java @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine.rocksdb; + +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBCPableKVSpace.ACTIVE_GEN_POINTER; +import static org.awaitility.Awaitility.await; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotEquals; +import static org.testng.Assert.assertTrue; + +import com.google.protobuf.ByteString; +import com.google.protobuf.Struct; +import com.google.protobuf.Value; +import java.nio.file.Files; +import java.nio.file.Path; +import java.time.Duration; +import java.util.stream.Stream; +import org.apache.bifromq.basekv.localengine.ICPableKVSpace; +import org.apache.bifromq.basekv.localengine.IRestoreSession; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class RocksDBCPableKVSpaceRuntimeCleanerTest { + private Path tmpRoot; + private RocksDBCPableKVEngine engine; + + @BeforeMethod + public void setup() throws Exception { + tmpRoot = Files.createTempDirectory("kvspace_runtime_cleaner_"); + } + + @AfterMethod + public void tearDown() throws Exception { + try { + if (engine != null) { + engine.stop(); + } + } catch (Throwable ignore) { + } + if (tmpRoot != null) { + Files.walk(tmpRoot) + .sorted((a, b) -> b.getNameCount() - a.getNameCount()) + .forEach(p -> { + try { + Files.deleteIfExists(p); + } catch (Throwable ignore) { + } + }); + } + } + + @Test + public void testCleanerDeletesInactiveGenerationAtRuntime() throws Exception { + String spaceId = "space_runtime_cleaner"; + Path dbRoot = tmpRoot.resolve("data"); + Path cpRoot = tmpRoot.resolve("cp"); + Files.createDirectories(dbRoot); + Files.createDirectories(cpRoot); + + Struct conf = RocksDBDefaultConfigs.CP.toBuilder() + .putFields(RocksDBDefaultConfigs.DB_ROOT_DIR, Value.newBuilder().setStringValue(dbRoot.toString()).build()) + .putFields(RocksDBDefaultConfigs.DB_CHECKPOINT_ROOT_DIR, Value.newBuilder().setStringValue(cpRoot.toString()).build()) + .build(); + engine = new RocksDBCPableKVEngine(null, conf); + engine.start("tag", "value"); + + ICPableKVSpace space = engine.createIfMissing(spaceId); + // write something to current generation + space.toWriter().put(ByteString.copyFromUtf8("k"), ByteString.copyFromUtf8("v")).done(); + + Path spaceRoot = dbRoot.resolve(spaceId); + Path pointer = spaceRoot.resolve(ACTIVE_GEN_POINTER); + String before = Files.readString(pointer).trim(); + assertTrue(Files.exists(spaceRoot.resolve(before))); + + // Replace restore to force generation switch + IRestoreSession session = space.startRestore((c, b) -> {}); + session.put(ByteString.copyFromUtf8("k2"), ByteString.copyFromUtf8("v2")); + session.done(); + + String after = Files.readString(pointer).trim(); + assertNotEquals(after, before); + // New generation directory should exist immediately + assertTrue(Files.exists(spaceRoot.resolve(after))); + + // Await eventual cleanup by Cleaner/GC to avoid relying on immediate deletion + await().atMost(Duration.ofSeconds(10)).pollInterval(Duration.ofMillis(100)).untilAsserted(() -> { + // Hint GC to speed up Cleaner, but do not rely on it + System.gc(); + System.runFinalization(); + + // Old generation directory should eventually be deleted at runtime by cleaner + assertTrue(Files.notExists(spaceRoot.resolve(before))); + + long dirCount; + long fileCount; + try (Stream s = Files.list(spaceRoot)) { + dirCount = s.filter(Files::isDirectory).count(); + } + try (Stream s = Files.list(spaceRoot)) { + fileCount = s.filter(Files::isRegularFile).count(); + } + assertEquals(dirCount, 1); + assertEquals(fileCount, 1); + }); + } +} diff --git a/base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpaceCompactionTriggerTest.java b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpaceCompactionTriggerTest.java similarity index 100% rename from base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpaceCompactionTriggerTest.java rename to base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpaceCompactionTriggerTest.java index bba276e1e..2cf4cb314 100644 --- a/base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpaceCompactionTriggerTest.java +++ b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpaceCompactionTriggerTest.java @@ -22,8 +22,8 @@ import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; -import org.apache.bifromq.basekv.localengine.MockableTest; import java.util.stream.IntStream; +import org.apache.bifromq.basekv.localengine.MockableTest; import org.mockito.Mock; import org.testng.annotations.Test; diff --git a/base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBWALableKVEngineTest.java b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBWALableKVEngineTest.java similarity index 67% rename from base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBWALableKVEngineTest.java rename to base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBWALableKVEngineTest.java index ca4c46187..5111207c9 100644 --- a/base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBWALableKVEngineTest.java +++ b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBWALableKVEngineTest.java @@ -19,27 +19,29 @@ package org.apache.bifromq.basekv.localengine.rocksdb; -import org.apache.bifromq.basekv.localengine.IKVEngine; -import org.apache.bifromq.basekv.localengine.IKVSpace; +import com.google.protobuf.Struct; +import com.google.protobuf.Value; import java.nio.file.Paths; import lombok.SneakyThrows; +import org.apache.bifromq.basekv.localengine.IKVEngine; +import org.apache.bifromq.basekv.localengine.IWALableKVSpace; -public class RocksDBWALableKVEngineTest extends AbstractRocksDBKVEngineTest { - protected RocksDBWALableKVEngineConfigurator configurator; +public class RocksDBWALableKVEngineTest extends AbstractRocksDBWALableEngineTest { + protected Struct conf; @SneakyThrows @Override protected void beforeStart() { super.beforeStart(); String DB_NAME = "testDB"; - configurator = RocksDBWALableKVEngineConfigurator.builder() - .dbRootDir(Paths.get(dbRootDir.toString(), DB_NAME).toString()) + conf = RocksDBDefaultConfigs.WAL.toBuilder() + .putFields(RocksDBDefaultConfigs.DB_ROOT_DIR, Value.newBuilder().setStringValue(Paths.get(dbRootDir.toString(), DB_NAME).toString()).build()) .build(); } @SneakyThrows @Override - protected IKVEngine newEngine() { - return new RocksDBWALableKVEngine(null, configurator); + protected IKVEngine newEngine() { + return new RocksDBWALableKVEngine(null, conf); } } diff --git a/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBWALableKVSpaceNoPointerTest.java b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBWALableKVSpaceNoPointerTest.java new file mode 100644 index 000000000..400efc3ff --- /dev/null +++ b/base-kv/base-kv-local-engine-rocksdb/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBWALableKVSpaceNoPointerTest.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine.rocksdb; + +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBCPableKVSpace.ACTIVE_GEN_POINTER; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; + +import com.google.protobuf.Struct; +import com.google.protobuf.Value; +import java.nio.file.Files; +import java.nio.file.Path; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class RocksDBWALableKVSpaceNoPointerTest { + private Path tmpRoot; + private RocksDBWALableKVEngine engine; + + @BeforeMethod + public void setup() throws Exception { + tmpRoot = Files.createTempDirectory("kvspace_wal_"); + } + + @AfterMethod + public void tearDown() throws Exception { + try { + if (engine != null) { + engine.stop(); + } + } catch (Throwable ignore) { + } + if (tmpRoot != null) { + Files.walk(tmpRoot) + .sorted((a, b) -> b.getNameCount() - a.getNameCount()) + .forEach(p -> { + try { + Files.deleteIfExists(p); + } catch (Throwable ignore) { + } + }); + } + } + + @Test + public void testNoPointerCreatedForWALable() throws Exception { + String spaceId = "wal_space"; + Path dbRoot = tmpRoot.resolve("data"); + Files.createDirectories(dbRoot); + + Struct conf = RocksDBDefaultConfigs.WAL.toBuilder() + .putFields(RocksDBDefaultConfigs.DB_ROOT_DIR, Value.newBuilder().setStringValue(dbRoot.toString()).build()) + .build(); + engine = new RocksDBWALableKVEngine(null, conf); + engine.start("tag", "value"); + engine.createIfMissing(spaceId); + + Path pointer = dbRoot.resolve(spaceId).resolve(ACTIVE_GEN_POINTER); + assertFalse(Files.exists(pointer)); + // DB files should be directly under space root + assertTrue(Files.list(dbRoot.resolve(spaceId)).anyMatch(Files::isRegularFile)); + } +} diff --git a/base-kv/base-kv-local-engine-rocksdb/src/test/resources/log4j2-test.xml b/base-kv/base-kv-local-engine-rocksdb/src/test/resources/log4j2-test.xml new file mode 100644 index 000000000..1404c8668 --- /dev/null +++ b/base-kv/base-kv-local-engine-rocksdb/src/test/resources/log4j2-test.xml @@ -0,0 +1,38 @@ + + + + + + + + + + + + + + + + + diff --git a/base-kv/base-kv-local-engine/pom.xml b/base-kv/base-kv-local-engine-spi/pom.xml similarity index 85% rename from base-kv/base-kv-local-engine/pom.xml rename to base-kv/base-kv-local-engine-spi/pom.xml index 398e6036a..4c729591d 100644 --- a/base-kv/base-kv-local-engine/pom.xml +++ b/base-kv/base-kv-local-engine-spi/pom.xml @@ -27,10 +27,13 @@ base-kv ${revision} - - base-kv-local-engine + base-kv-local-engine-spi + + org.apache.bifromq + base-hookloader + org.apache.bifromq base-kv-type-proto @@ -43,6 +46,10 @@ org.apache.bifromq base-logger + + org.pcollections + pcollections + io.reactivex.rxjava3 rxjava @@ -51,10 +58,6 @@ io.micrometer micrometer-core - - org.rocksdb - rocksdbjni - com.google.protobuf protobuf-java @@ -71,10 +74,11 @@ org.slf4j slf4j-api - + org.awaitility awaitility + test org.mockito @@ -114,6 +118,16 @@ org.xolstice.maven.plugins protobuf-maven-plugin + + maven-jar-plugin + + + + test-jar + + + + - \ No newline at end of file + diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/AbstractKVEngine.java b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/AbstractKVEngine.java similarity index 65% rename from base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/AbstractKVEngine.java rename to base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/AbstractKVEngine.java index fa6c77b05..b58284fcd 100644 --- a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/AbstractKVEngine.java +++ b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/AbstractKVEngine.java @@ -22,6 +22,8 @@ import static com.google.common.collect.Lists.newArrayList; import com.google.common.collect.Iterables; +import com.google.protobuf.Struct; +import com.google.protobuf.Value; import io.micrometer.core.instrument.Gauge; import io.micrometer.core.instrument.Metrics; import io.micrometer.core.instrument.Tags; @@ -30,6 +32,7 @@ import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicReference; +import lombok.NonNull; import org.apache.bifromq.basekv.localengine.metrics.KVSpaceOpMeters; import org.apache.bifromq.logger.MDCLogger; import org.slf4j.Logger; @@ -38,20 +41,20 @@ * The abstract class of KVEngine. * * @param the type of KV space created by the engine - * @param the type of configurator */ -public abstract class AbstractKVEngine implements IKVEngine { +public abstract class AbstractKVEngine implements IKVEngine { protected final String overrideIdentity; - protected final C configurator; + protected final Struct engineConf; private final AtomicReference state = new AtomicReference<>(State.INIT); private final Map kvSpaceMap = new ConcurrentHashMap<>(); protected Logger log; protected String[] metricTags; private Gauge gauge; - public AbstractKVEngine(String overrideIdentity, C configurator) { + public AbstractKVEngine(String overrideIdentity, @NonNull Struct conf) { + validateConf(conf); this.overrideIdentity = overrideIdentity; - this.configurator = configurator; + this.engineConf = conf; } @Override @@ -111,34 +114,73 @@ public final T createIfMissing(String spaceId) { assertStarted(); return kvSpaceMap.computeIfAbsent(spaceId, k -> { - T space = buildKVSpace(spaceId, configurator, () -> kvSpaceMap.remove(spaceId), metricTags); + T space = buildKVSpace(spaceId, engineConf, () -> kvSpaceMap.remove(spaceId), metricTags); space.open(); return space; }); } protected final void load(String spaceId) { - T space = buildKVSpace(spaceId, configurator, () -> kvSpaceMap.remove(spaceId), metricTags); + T space = buildKVSpace(spaceId, engineConf, () -> kvSpaceMap.remove(spaceId), metricTags); space.open(); T prev = kvSpaceMap.put(spaceId, space); assert prev == null; } - private T buildKVSpace(String spaceId, C configurator, Runnable onDestroy, String... tags) { + private T buildKVSpace(String spaceId, Struct conf, Runnable onDestroy, String... tags) { String[] tagList = newArrayList(Iterables.concat(List.of(tags), List.of("spaceId", spaceId))).toArray(String[]::new); KVSpaceOpMeters opMeters = new KVSpaceOpMeters(spaceId, Tags.of(tagList)); Logger logger = MDCLogger.getLogger("space.logger", tagList); - return doBuildKVSpace(spaceId, configurator, onDestroy, opMeters, logger, tagList); + return doBuildKVSpace(spaceId, conf, onDestroy, opMeters, logger, tagList); } protected abstract T doBuildKVSpace(String spaceId, - C configurator, + Struct conf, Runnable onDestroy, KVSpaceOpMeters opMeters, Logger logger, String... tags); + /** + * Validate provided configuration is complete and valid. + * This method enforces structural completeness based on defaultConf, + * non-empty string constraints from nonEmptyStringKeys, then delegates to validateSemantics. + * + * @param conf caller provided conf + * @throws IllegalArgumentException if conf is invalid + */ + protected final void validateConf(Struct conf) { + if (conf == null) { + throw new IllegalArgumentException("Engine configuration must not be null"); + } + Struct defaults = defaultConf(); + // Check required keys and non-null values + for (Map.Entry e : defaults.getFieldsMap().entrySet()) { + String key = e.getKey(); + if (!conf.getFieldsMap().containsKey(key)) { + throw new IllegalArgumentException("Missing required config key: " + key); + } + Value v = conf.getFieldsMap().get(key); + if (v.getKindCase() == Value.KindCase.KIND_NOT_SET || v.hasNullValue()) { + throw new IllegalArgumentException("Config key has null value: " + key); + } + } + validateSemantics(conf); + } + + /** + * Engine default full configuration used for completeness checking. + */ + protected abstract Struct defaultConf(); + + /** + * Additional engine-specific semantic validation. + */ + protected void validateSemantics(Struct conf) { + // no-op by default + } + private enum State { INIT, STARTING, STARTED, FATAL_FAILURE, STOPPING, STOPPED } diff --git a/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/AbstractKVSpace.java b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/AbstractKVSpace.java new file mode 100644 index 000000000..4a366deba --- /dev/null +++ b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/AbstractKVSpace.java @@ -0,0 +1,151 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine; + +import static io.reactivex.rxjava3.subjects.BehaviorSubject.createDefault; +import static java.util.Collections.emptyMap; + +import com.google.protobuf.ByteString; +import io.micrometer.core.instrument.Tags; +import io.reactivex.rxjava3.core.Observable; +import io.reactivex.rxjava3.subjects.BehaviorSubject; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.atomic.AtomicReference; +import org.apache.bifromq.basekv.localengine.metrics.KVSpaceOpMeters; +import org.apache.bifromq.basekv.proto.Boundary; +import org.slf4j.Logger; + +/** + * Base implementation of IKVSpace. + * + * @param

the type of epoch + */ +public abstract class AbstractKVSpace

implements IKVSpace { + protected final String id; + protected final KVSpaceOpMeters opMeters; + protected final Logger logger; + protected final Tags tags; + private final AtomicReference state; + private final Runnable onDestroy; + private final BehaviorSubject> metadataSubject; + + public AbstractKVSpace(String id, + Runnable onDestroy, + KVSpaceOpMeters opMeters, + Logger logger, + String... tags) { + this.id = id; + this.opMeters = opMeters; + this.logger = logger; + this.onDestroy = onDestroy; + state = new AtomicReference<>(State.Init); + metadataSubject = createDefault(emptyMap()); + this.tags = Tags.of(tags).and("spaceId", id); + } + + @Override + public final String id() { + return id; + } + + @Override + public final void open() { + if (state.compareAndSet(State.Init, State.Opening)) { + doOpen(); + } + } + + @Override + public final Observable> metadata() { + return metadataSubject; + } + + @Override + public final KVSpaceDescriptor describe() { + return new KVSpaceDescriptor(id, collectStats()); + } + + @Override + public final long size() { + return opMeters.sizeCallTimer.record(() -> doSize(Boundary.getDefaultInstance())); + } + + @Override + public final void destroy() { + close(); + if (state.compareAndSet(State.Closed, State.Destroying)) { + try { + doDestroy(); + } catch (Throwable e) { + throw new KVEngineException("Destroy KVRange error", e); + } finally { + onDestroy.run(); + state.set(State.Terminated); + } + } + } + + @Override + public final void close() { + if (state.compareAndSet(State.Opening, State.Closing)) { + try { + doClose(); + metadataSubject.onComplete(); + } finally { + state.set(State.Closed); + } + } + } + + protected Map currentMetadata() { + return metadataSubject.getValue(); + } + + protected void updateMetadata(Map newMetadata) { + metadataSubject.onNext(newMetadata); + } + + protected final State state() { + return state.get(); + } + + protected abstract void doClose(); + + protected void doDestroy() { + } + + protected abstract P handle(); + + protected abstract void doOpen(); + + protected abstract long doSize(Boundary boundary); + + private Map collectStats() { + Map stats = new HashMap<>(); + stats.put("size", (double) size()); + // TODO: more stats + return stats; + } + + protected enum State { + Init, Opening, Destroying, Closing, Closed, Terminated + } +} diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/AbstractKVSpaceReader.java b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/AbstractKVSpaceReader.java similarity index 89% rename from base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/AbstractKVSpaceReader.java rename to base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/AbstractKVSpaceReader.java index e176eb21a..f2b6df9ff 100644 --- a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/AbstractKVSpaceReader.java +++ b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/AbstractKVSpaceReader.java @@ -19,12 +19,15 @@ package org.apache.bifromq.basekv.localengine; -import org.apache.bifromq.basekv.localengine.metrics.KVSpaceOpMeters; -import org.apache.bifromq.basekv.proto.Boundary; import com.google.protobuf.ByteString; import java.util.Optional; +import org.apache.bifromq.basekv.localengine.metrics.KVSpaceOpMeters; +import org.apache.bifromq.basekv.proto.Boundary; import org.slf4j.Logger; +/** + * The base implementation of IKVSpaceReader with operation metrics. + */ public abstract class AbstractKVSpaceReader implements IKVSpaceReader { protected final String id; protected final KVSpaceOpMeters opMeters; @@ -46,27 +49,11 @@ public final Optional metadata(ByteString metaKey) { return opMeters.metadataCallTimer.record(() -> doMetadata(metaKey)); } - protected abstract Optional doMetadata(ByteString metaKey); - - @Override - public final long size() { - return size(Boundary.getDefaultInstance()); - } - - @Override - public final long size(Boundary boundary) { - return opMeters.sizeCallTimer.record(() -> doSize(boundary)); - } - - protected abstract long doSize(Boundary boundary); - @Override public final boolean exist(ByteString key) { return opMeters.existCallTimer.record(() -> doExist(key)); } - protected abstract boolean doExist(ByteString key); - @Override public final Optional get(ByteString key) { return opMeters.getCallTimer.record(() -> doGet(key).map(k -> { @@ -75,22 +62,32 @@ public final Optional get(ByteString key) { })); } - protected abstract Optional doGet(ByteString key); - @Override public final IKVSpaceIterator newIterator() { - return opMeters.iterNewCallTimer.record(() -> new MonitoredKeyRangeIterator(doNewIterator())); + return this.newIterator(Boundary.getDefaultInstance()); } - protected abstract IKVSpaceIterator doNewIterator(); - @Override public final IKVSpaceIterator newIterator(Boundary subBoundary) { - return opMeters.iterNewCallTimer.record(() -> new MonitoredKeyRangeIterator(doNewIterator(subBoundary))); + return opMeters.iterNewCallTimer.record( + () -> new MonitoredKeyRangeIterator(doNewIterator(subBoundary))); + } + + @Override + public final long size(Boundary boundary) { + return opMeters.sizeCallTimer.record(() -> doSize(boundary)); } + protected abstract Optional doMetadata(ByteString metaKey); + + protected abstract boolean doExist(ByteString key); + + protected abstract Optional doGet(ByteString key); + protected abstract IKVSpaceIterator doNewIterator(Boundary subBoundary); + protected abstract long doSize(Boundary boundary); + private class MonitoredKeyRangeIterator implements IKVSpaceIterator { final IKVSpaceIterator delegate; @@ -147,11 +144,6 @@ public void seekForPrev(ByteString target) { opMeters.iterSeekForPrevCallTimer.record(() -> delegate.seekForPrev(target)); } - @Override - public void refresh() { - opMeters.iterRefreshTimer.record(delegate::refresh); - } - @Override public void close() { delegate.close(); diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/ICPableKVSpace.java b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/ICPableKVSpace.java similarity index 72% rename from base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/ICPableKVSpace.java rename to base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/ICPableKVSpace.java index 4f82cfa9c..0ad97b2e3 100644 --- a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/ICPableKVSpace.java +++ b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/ICPableKVSpace.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.localengine; @@ -41,4 +41,25 @@ public interface ICPableKVSpace extends IKVSpace { * @return the range object for accessing the checkpoint */ Optional openCheckpoint(String checkpointId); + + /** + * Start a restore session for bulk restoring data into current space. + * + * @return the restore session + */ + IRestoreSession startRestore(IRestoreSession.FlushListener flushListener); + + /** + * Start a session for bulk migrating data into current space. + * + * @return the restore session + */ + IRestoreSession startReceiving(IRestoreSession.FlushListener flushListener); + + /** + * Get a writer to update range state which supports data restore. + * + * @return the writer object + */ + IKVSpaceMigratableWriter toWriter(); } diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/IKVEngine.java b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/IKVEngine.java similarity index 100% rename from base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/IKVEngine.java rename to base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/IKVEngine.java diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpace.java b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpace.java similarity index 72% rename from base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpace.java rename to base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpace.java index 058c84a48..de5454bea 100644 --- a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpace.java +++ b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpace.java @@ -26,11 +26,28 @@ /** * The interface of KV space, which is a logical range of key-value pairs. */ -public interface IKVSpace extends IKVSpaceReader { +public interface IKVSpace extends IKVSpaceIdentifiable, IKVSpaceSizeable { + /** + * Get an observable stream of metadata key-value pairs. + * + * @return the observable stream + */ Observable> metadata(); + /** + * Get the descriptor of the space. + * + * @return the descriptor + */ KVSpaceDescriptor describe(); + /** + * Get a refreshable consistent-view reader for the space. + * + * @return the reader + */ + IKVSpaceRefreshableReader reader(); + /** * Open the space. */ @@ -42,15 +59,7 @@ public interface IKVSpace extends IKVSpaceReader { void close(); /** - * Destroy the range, after destroy all data and associated resources will be cleared and released. The range object - * will transit to destroyed state. + * Destroy the space, all data and associated resources will be cleared and released. */ void destroy(); - - /** - * Get a writer to update range state. - * - * @return the writer object - */ - IKVSpaceWriter toWriter(); } diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceCheckpoint.java b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceCheckpoint.java similarity index 77% rename from base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceCheckpoint.java rename to base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceCheckpoint.java index 6a9e15825..d0abaeeb6 100644 --- a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceCheckpoint.java +++ b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceCheckpoint.java @@ -14,11 +14,22 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.localengine; -public interface IKVSpaceCheckpoint extends IKVSpaceReader { +/** + * The interface for a space that supports checkpointing. + */ +public interface IKVSpaceCheckpoint { + + /** + * Get the checkpoint id. + * + * @return the checkpoint id + */ String cpId(); + + IKVSpaceReader newReader(); } diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceDataUpdatable.java b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceDataUpdatable.java similarity index 76% rename from base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceDataUpdatable.java rename to base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceDataUpdatable.java index 3d0c39696..08bf2c91b 100644 --- a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceDataUpdatable.java +++ b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceDataUpdatable.java @@ -19,15 +19,15 @@ package org.apache.bifromq.basekv.localengine; -import org.apache.bifromq.basekv.proto.Boundary; import com.google.protobuf.ByteString; +import org.apache.bifromq.basekv.proto.Boundary; /** - * The interface for update kv space. + * The interface for updating data in kv space. * * @param the type of the updater */ -public interface IKVSpaceDataUpdatable> extends IKVSpaceReader { +public interface IKVSpaceDataUpdatable> { /** * Insert a key-value pair in to the range. * @@ -62,15 +62,4 @@ public interface IKVSpaceDataUpdatable> exten * @param boundary the boundary */ T clear(Boundary boundary); - - /** - * Migrate data in given boundary to target space, and returns the metadata updater for target boundary. - * - * @param targetSpaceId the id of target range - * @param boundary the boundary of data to be migrated - * @return the metadata updater of target boundary - */ - IKVSpaceMetadataWriter migrateTo(String targetSpaceId, Boundary boundary); - - IKVSpaceMetadataWriter migrateFrom(String fromSpaceId, Boundary boundary); } diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/IWALableKVEngineConfigurator.java b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceEpoch.java similarity index 91% rename from base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/IWALableKVEngineConfigurator.java rename to base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceEpoch.java index dfe64000e..d5928c268 100644 --- a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/IWALableKVEngineConfigurator.java +++ b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceEpoch.java @@ -19,5 +19,8 @@ package org.apache.bifromq.basekv.localengine; -public interface IWALableKVEngineConfigurator extends IKVEngineConfigurator { +/** + * Interface for accessing KV space epoch. + */ +public interface IKVSpaceEpoch { } diff --git a/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceIdentifiable.java b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceIdentifiable.java new file mode 100644 index 000000000..322eeae4c --- /dev/null +++ b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceIdentifiable.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine; + +/** + * Interface for identifying a key-value space. + */ +public interface IKVSpaceIdentifiable { + /** + * Get the id of the space. + * + * @return the id of the space + */ + String id(); +} diff --git a/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceIterator.java b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceIterator.java new file mode 100644 index 000000000..79daacebc --- /dev/null +++ b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceIterator.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine; + +import com.google.protobuf.ByteString; + +/** + * The interface of an iterator for a KV space. + */ +public interface IKVSpaceIterator extends AutoCloseable { + /** + * Get the key of the current entry. + * + * @return the key of the current entry + */ + ByteString key(); + + /** + * Get the value of the current entry. + * + * @return the value of the current entry + */ + ByteString value(); + + /** + * Check if the iterator is valid. + * + * @return true if the iterator is valid, false otherwise + */ + boolean isValid(); + + /** + * move the iterator to the next entry. + */ + void next(); + + /** + * move the iterator to the previous entry. + */ + void prev(); + + /** + * seek to the first entry. + */ + void seekToFirst(); + + /** + * seek to the last entry. + */ + void seekToLast(); + + /** + * Seek to the first entry that is at or past target. + * + * @param target the target key + */ + void seek(ByteString target); + + /** + * Seek to the last entry that is at or before target. + * + * @param target the target key + */ + void seekForPrev(ByteString target); + + /** + * Close the iterator and release associated resources. + */ + void close(); +} diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceMetadataUpdatable.java b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceMetadataUpdatable.java similarity index 96% rename from base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceMetadataUpdatable.java rename to base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceMetadataUpdatable.java index 13e09b780..05822e227 100644 --- a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceMetadataUpdatable.java +++ b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceMetadataUpdatable.java @@ -26,7 +26,7 @@ * * @param the type of the updater */ -public interface IKVSpaceMetadataUpdatable> extends IKVSpaceMetadata { +public interface IKVSpaceMetadataUpdatable> { /** * Update metadata in key-value pair. * diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceMetadataWriter.java b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceMetadataWriter.java similarity index 100% rename from base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceMetadataWriter.java rename to base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceMetadataWriter.java diff --git a/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceMigratable.java b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceMigratable.java new file mode 100644 index 000000000..d6aaec7d7 --- /dev/null +++ b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceMigratable.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine; + +import org.apache.bifromq.basekv.proto.Boundary; + +/** + * The interface for a space that supports data migration. + */ +public interface IKVSpaceMigratable { + /** + * Migrate data in given boundary to target space, and returns the metadata updater for target boundary. + * + * @param targetSpaceId the id of target range + * @param boundary the boundary of data to be migrated + * @return the restore session for target range + */ + IRestoreSession migrateTo(String targetSpaceId, Boundary boundary); +} diff --git a/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceMigratableWriter.java b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceMigratableWriter.java new file mode 100644 index 000000000..c5703dc54 --- /dev/null +++ b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceMigratableWriter.java @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine; + +/** + * A writer for update range state, only when done method is called the changes are persisted and visible. + */ +public interface IKVSpaceMigratableWriter extends IKVSpaceWriter, IKVSpaceMigratable { +} diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceMetadata.java b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceReader.java similarity index 58% rename from base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceMetadata.java rename to base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceReader.java index 5d17c5a6f..4b356fd3b 100644 --- a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceMetadata.java +++ b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceReader.java @@ -14,26 +14,19 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.localengine; -import org.apache.bifromq.basekv.proto.Boundary; import com.google.protobuf.ByteString; import java.util.Optional; +import org.apache.bifromq.basekv.proto.Boundary; /** - * Interface for accessing space metadata. + * The interface of a consistent-view reader for a KV space. */ -public interface IKVSpaceMetadata { - /** - * Get the id of the space. - * - * @return the id of the space - */ - String id(); - +public interface IKVSpaceReader extends IKVSpaceIdentifiable, AutoCloseable { /** * Get the metadata in key-value pair. * @@ -43,17 +36,46 @@ public interface IKVSpaceMetadata { Optional metadata(ByteString metaKey); /** - * Get the size of the space. + * Check if a key exists. * - * @return the size of the space + * @param key the key + * @return true if the key exists, false otherwise */ - long size(); + boolean exist(ByteString key); /** - * Get the size of the space in the specified boundary. + * Get the value of a key. + * + * @param key the key + * @return the value of the key, or empty if the key does not exist + */ + Optional get(ByteString key); + + /** + * Create a new iterator for the space. + * + * @return the iterator + */ + IKVSpaceIterator newIterator(); + + /** + * Create a new iterator for a sub-boundary of the space. + * + * @param subBoundary the sub-boundary + * @return the iterator + */ + IKVSpaceIterator newIterator(Boundary subBoundary); + + /** + * Get the estimated size of the data in the specified boundary. * * @param boundary the boundary * @return the size of the space in the specified boundary */ long size(Boundary boundary); + + /** + * Close the reader and release all resources. + */ + void close(); } diff --git a/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceRefreshableReader.java b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceRefreshableReader.java new file mode 100644 index 000000000..c986c610b --- /dev/null +++ b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceRefreshableReader.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine; + +/** + * The interface of a refreshable consistent-view reader for a KV space. + */ +public interface IKVSpaceRefreshableReader extends IKVSpaceReader { + /** + * Refresh the reader to the latest consistent-view of the KV space. + */ + void refresh(); +} diff --git a/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceSizeable.java b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceSizeable.java new file mode 100644 index 000000000..2cc36d988 --- /dev/null +++ b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceSizeable.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine; + +/** + * Interface for accessing space metadata. + */ +public interface IKVSpaceSizeable { + /** + * Get the size of the space. + * + * @return the size of the space + */ + long size(); +} diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceWriteable.java b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceWriteable.java similarity index 100% rename from base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceWriteable.java rename to base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceWriteable.java diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceWriter.java b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceWriter.java similarity index 89% rename from base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceWriter.java rename to base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceWriter.java index 078b51ff6..9515da6b7 100644 --- a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceWriter.java +++ b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceWriter.java @@ -23,5 +23,5 @@ * A writer for update range state, only when done method is called the changes are persisted and visible. */ public interface IKVSpaceWriter - extends IKVSpaceMetadataWriter, IKVSpaceDataUpdatable, IKVSpaceWriteable { + extends IKVSpaceIdentifiable, IKVSpaceMetadataWriter, IKVSpaceDataUpdatable, IKVSpaceWriteable { } diff --git a/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/IRestoreSession.java b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/IRestoreSession.java new file mode 100644 index 000000000..37d903b2c --- /dev/null +++ b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/IRestoreSession.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine; + +import com.google.protobuf.ByteString; + +/** + * Restore session with atomic visibility semantics. + */ +public interface IRestoreSession { + /** + * Put data into staging. + */ + IRestoreSession put(ByteString key, ByteString value); + + /** + * Update metadata in staging. + */ + IRestoreSession metadata(ByteString metaKey, ByteString metaValue); + + /** + * Atomically publish staging to be visible. + */ + void done(); + + /** + * Abort this restore session and drop all staged data. + */ + void abort(); + + /** + * Staged operations count. + */ + int count(); + + /** + * Listener to be notified on flush. + */ + interface FlushListener { + void onFlush(int count, long bytes); + } +} diff --git a/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/ISyncContext.java b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/ISyncContext.java new file mode 100644 index 000000000..6b69f543e --- /dev/null +++ b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/ISyncContext.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine; + +import java.util.function.Supplier; + +/** + * A synchronization context that provides synchronized access to a resource with read-write locks. + * It supports refreshing the state when needed and performing mutations with proper locking. + */ +public interface ISyncContext { + /** + * Get the refresher for read operations. + * + * @return the refresher + */ + IRefresher refresher(); + + /** + * Get the mutator for write operations. + * + * @return the mutator + */ + IMutator mutator(); + + /** + * Callback interface for refreshing state. + */ + interface IRefresh { + void refresh(boolean genBumped); + } + + /** + * Interface for performing read operations with refresh capability. + */ + interface IRefresher { + void runIfNeeded(IRefresh refresh); + + T call(Supplier supplier); + } + + /** + * Callback interface for performing mutations. + */ + interface IMutation { + /** + * Perform mutation, return true if generation is bumped. + * + * @return true if generation is bumped + */ + boolean mutate(); + } + + /** + * Interface for performing mutations with write locking. + */ + interface IMutator { + /** + * Perform mutation with write lock. + * + * @param mutation the mutation to perform + * @return boolean indicating if generation is bumped after the mutation + */ + boolean run(IMutation mutation); + } +} diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/IWALableKVSpace.java b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/IWALableKVSpace.java similarity index 88% rename from base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/IWALableKVSpace.java rename to base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/IWALableKVSpace.java index dc4e770a9..3ae555413 100644 --- a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/IWALableKVSpace.java +++ b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/IWALableKVSpace.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.localengine; @@ -32,4 +32,11 @@ public interface IWALableKVSpace extends IKVSpace { * @return the flush start nanos time */ CompletableFuture flush(); + + /** + * Get a writer to update range state. + * + * @return the writer object + */ + IKVSpaceWriter toWriter(); } diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/KVEngineException.java b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/KVEngineException.java similarity index 100% rename from base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/KVEngineException.java rename to base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/KVEngineException.java diff --git a/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/KVEngineFactory.java b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/KVEngineFactory.java new file mode 100644 index 000000000..2507eb328 --- /dev/null +++ b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/KVEngineFactory.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine; + +import com.google.protobuf.Struct; +import java.util.Map; +import org.apache.bifromq.basehookloader.BaseHookLoader; +import org.apache.bifromq.basekv.localengine.spi.IKVEngineProvider; + +public class KVEngineFactory { + public static IKVEngine createCPable(String overrideIdentity, String type, Struct conf) { + Map providers = BaseHookLoader.load(IKVEngineProvider.class); + for (IKVEngineProvider provider : providers.values()) { + if (provider.type().equalsIgnoreCase(type)) { + return provider.createCPable(overrideIdentity, conf); + } + } + throw new UnsupportedOperationException("No CP-able KVEngineProvider found for type: " + type); + } + + public static IKVEngine createWALable(String overrideIdentity, + String type, + Struct conf) { + Map providers = BaseHookLoader.load(IKVEngineProvider.class); + for (IKVEngineProvider provider : providers.values()) { + if (provider.type().equalsIgnoreCase(type)) { + return provider.createWALable(overrideIdentity, conf); + } + } + throw new UnsupportedOperationException("No WAL-able KVEngineProvider found for type: " + type); + } +} diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/KVSpaceDescriptor.java b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/KVSpaceDescriptor.java similarity index 100% rename from base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/KVSpaceDescriptor.java rename to base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/KVSpaceDescriptor.java diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/ICPableKVEngineConfigurator.java b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/RestoreMode.java similarity index 89% rename from base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/ICPableKVEngineConfigurator.java rename to base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/RestoreMode.java index 2868cbafd..1c0bbf2c9 100644 --- a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/ICPableKVEngineConfigurator.java +++ b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/RestoreMode.java @@ -19,5 +19,10 @@ package org.apache.bifromq.basekv.localengine; -public interface ICPableKVEngineConfigurator extends IKVEngineConfigurator { +/** + * Restore mode for restore session. + */ +public enum RestoreMode { + Replace, + Overlay } diff --git a/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/StructUtil.java b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/StructUtil.java new file mode 100644 index 000000000..530ec4d9c --- /dev/null +++ b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/StructUtil.java @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine; + +import com.google.protobuf.NullValue; +import com.google.protobuf.Struct; +import com.google.protobuf.Value; +import java.util.Map; + +public class StructUtil { + public static Struct fromMap(Map config) { + Struct.Builder b = Struct.newBuilder(); + config.forEach((k, v) -> b.putFields(k, toValue(v))); + return b.build(); + } + + public static Value toValue(Object v) { + if (v == null) { + return Value.newBuilder().setNullValue(NullValue.NULL_VALUE).build(); + } + if (v instanceof Boolean) { + return Value.newBuilder().setBoolValue((Boolean) v).build(); + } + if (v instanceof Number) { + return Value.newBuilder().setNumberValue(((Number) v).doubleValue()).build(); + } + return Value.newBuilder().setStringValue(String.valueOf(v)).build(); + } + + public static Object fromValue(Value v) { + switch (v.getKindCase()) { + case NULL_VALUE -> { + return null; + } + case BOOL_VALUE -> { + return v.getBoolValue(); + } + case NUMBER_VALUE -> { + return coerceNumber(v.getNumberValue()); + } + case STRING_VALUE -> { + return v.getStringValue(); + } + default -> { + throw new UnsupportedOperationException("Unsupported engine config type: " + v.getKindCase()); + } + } + } + + public static boolean boolVal(Struct conf, String key) { + return conf.getFieldsOrThrow(key).getBoolValue(); + } + + public static double numVal(Struct conf, String key) { + return conf.getFieldsOrThrow(key).getNumberValue(); + } + + public static String strVal(Struct conf, String key) { + return conf.getFieldsOrThrow(key).getStringValue(); + } + + private static Object coerceNumber(double d) { + if (Double.isFinite(d) && d == Math.rint(d)) { + if (d >= Integer.MIN_VALUE && d <= Integer.MAX_VALUE) { + return (int) d; + } + if (d >= Long.MIN_VALUE && d <= Long.MAX_VALUE) { + return (long) d; + } + } + return d; + } +} diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/SyncContext.java b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/SyncContext.java similarity index 74% rename from base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/SyncContext.java rename to base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/SyncContext.java index eba4299d7..e9d8e2ae4 100644 --- a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/SyncContext.java +++ b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/SyncContext.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.localengine; @@ -26,11 +26,23 @@ import java.util.function.Supplier; public class SyncContext implements ISyncContext { - private final AtomicLong stateModVer = new AtomicLong(); + private final AtomicLong stateGenVer = new AtomicLong(0); + private final AtomicLong stateModVer = new AtomicLong(0); private final ReadWriteLock rwLock = new ReentrantReadWriteLock(); + @Override + public IRefresher refresher() { + return new Refresher(); + } + + @Override + public IMutator mutator() { + return new Mutator(); + } + private class Refresher implements IRefresher { private final Lock rLock; + private long genVer = 0; private long readVer = -1; Refresher() { @@ -38,14 +50,16 @@ private class Refresher implements IRefresher { } @Override - public void runIfNeeded(Runnable refresh) { + public void runIfNeeded(IRefresh refresh) { rLock.lock(); try { - if (readVer == stateModVer.get()) { + boolean sameGen = genVer == stateGenVer.get(); + if (sameGen && readVer == stateModVer.get()) { return; } + genVer = stateGenVer.get(); readVer = stateModVer.get(); - refresh.run(); + refresh.refresh(!sameGen); } finally { rLock.unlock(); } @@ -70,24 +84,20 @@ private class Mutator implements IMutator { } @Override - public void run(Runnable mutation) { + public boolean run(IMutation mutation) { wLock.lock(); try { - mutation.run(); - stateModVer.incrementAndGet(); + boolean genBumped = mutation.mutate(); + if (genBumped) { + stateGenVer.incrementAndGet(); + stateModVer.set(0); + } else { + stateModVer.incrementAndGet(); + } + return genBumped; } finally { wLock.unlock(); } } } - - @Override - public IRefresher refresher() { - return new Refresher(); - } - - @Override - public IMutator mutator() { - return new Mutator(); - } } diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/metrics/GeneralKVSpaceMetric.java b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/metrics/GeneralKVSpaceMetric.java similarity index 94% rename from base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/metrics/GeneralKVSpaceMetric.java rename to base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/metrics/GeneralKVSpaceMetric.java index d81536033..b5732a24f 100644 --- a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/metrics/GeneralKVSpaceMetric.java +++ b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/metrics/GeneralKVSpaceMetric.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.localengine.metrics; @@ -44,4 +44,9 @@ public String metricName() { public Meter.Type meterType() { return meterType; } + + @Override + public boolean isFunction() { + return false; + } } diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/metrics/IKVSpaceMetric.java b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/metrics/IKVSpaceMetric.java similarity index 97% rename from base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/metrics/IKVSpaceMetric.java rename to base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/metrics/IKVSpaceMetric.java index 6e8595048..c4aaf4f32 100644 --- a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/metrics/IKVSpaceMetric.java +++ b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/metrics/IKVSpaceMetric.java @@ -38,4 +38,6 @@ public interface IKVSpaceMetric { * @return the metric type */ Meter.Type meterType(); + + boolean isFunction(); } diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/metrics/KVSpaceMeters.java b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/metrics/KVSpaceMeters.java similarity index 70% rename from base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/metrics/KVSpaceMeters.java rename to base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/metrics/KVSpaceMeters.java index 81733d24b..5589568a8 100644 --- a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/metrics/KVSpaceMeters.java +++ b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/metrics/KVSpaceMeters.java @@ -23,6 +23,8 @@ import com.github.benmanes.caffeine.cache.Caffeine; import io.micrometer.core.instrument.Counter; import io.micrometer.core.instrument.DistributionSummary; +import io.micrometer.core.instrument.FunctionCounter; +import io.micrometer.core.instrument.FunctionTimer; import io.micrometer.core.instrument.Gauge; import io.micrometer.core.instrument.Meter; import io.micrometer.core.instrument.Metrics; @@ -34,6 +36,8 @@ import java.util.concurrent.Callable; import java.util.concurrent.TimeUnit; import java.util.function.Supplier; +import java.util.function.ToDoubleFunction; +import java.util.function.ToLongFunction; public class KVSpaceMeters { private static final Cleaner CLEANER = Cleaner.create(); @@ -57,6 +61,35 @@ public static Counter getCounter(String id, IKVSpaceMetric metric, Tags tags) { .register(Metrics.globalRegistry))); } + public static FunctionCounter getFunctionCounter(String id, + IKVSpaceMetric metric, + C ctr, + ToDoubleFunction supplier, + Tags tags) { + assert metric.meterType() == Meter.Type.COUNTER && metric.isFunction(); + return (FunctionCounter) METERS.get(new MeterKey(id, metric, tags), + k -> new FunctionCounterWrapper(FunctionCounter.builder(metric.metricName(), ctr, supplier) + .tags(tags) + .tags("kvspace", id) + .register(Metrics.globalRegistry))); + } + + public static FunctionTimer getFunctionTimer(String id, + IKVSpaceMetric metric, + C timedObj, + ToLongFunction countFunction, + ToDoubleFunction totalTimeFunction, + TimeUnit timeUnit, + Tags tags) { + assert metric.meterType() == Meter.Type.TIMER && metric.isFunction(); + return (FunctionTimer) METERS.get(new MeterKey(id, metric, tags), + k -> new FunctionTimerWrapper( + FunctionTimer.builder(metric.metricName(), timedObj, countFunction, totalTimeFunction, timeUnit) + .tags(tags) + .tags("kvspace", id) + .register(Metrics.globalRegistry))); + } + public static Gauge getGauge(String id, IKVSpaceMetric metric, Supplier numProvider, Tags tags) { assert metric.meterType() == Meter.Type.GAUGE; return (Gauge) METERS.get(new MeterKey(id, metric, tags), @@ -75,7 +108,6 @@ public static DistributionSummary getSummary(String id, IKVSpaceMetric metric, T .register(Metrics.globalRegistry))); } - private record MeterKey(String id, IKVSpaceMetric metric, Tags tags) { } @@ -153,6 +185,68 @@ public void close() { } } + private static final class FunctionTimerWrapper implements FunctionTimer { + private final FunctionTimer delegate; + private final Cleaner.Cleanable cleanable; + + private FunctionTimerWrapper(FunctionTimer delegate) { + this.delegate = delegate; + cleanable = CLEANER.register(this, new State(delegate)); + } + + @Override + public Id getId() { + return delegate.getId(); + } + + @Override + public void close() { + delegate.close(); + cleanable.clean(); + } + + @Override + public double count() { + return delegate.count(); + } + + @Override + public double totalTime(TimeUnit unit) { + return delegate.totalTime(unit); + } + + @Override + public TimeUnit baseTimeUnit() { + return delegate.baseTimeUnit(); + } + } + + private static final class FunctionCounterWrapper implements FunctionCounter { + private final FunctionCounter delegate; + private final Cleaner.Cleanable cleanable; + + private FunctionCounterWrapper(FunctionCounter delegate) { + this.delegate = delegate; + cleanable = CLEANER.register(this, new State(delegate)); + } + + @Override + public double count() { + return delegate.count(); + } + + @Override + public Id getId() { + return delegate.getId(); + } + + @Override + public void close() { + delegate.close(); + cleanable.clean(); + } + } + private static final class CounterWrapper implements Counter { private final Counter delegate; private final Cleaner.Cleanable cleanable; @@ -258,7 +352,7 @@ public double totalAmount() { @Override public double max() { - return delegate.totalAmount(); + return delegate.max(); } @Override diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/metrics/KVSpaceOpMeters.java b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/metrics/KVSpaceOpMeters.java similarity index 100% rename from base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/metrics/KVSpaceOpMeters.java rename to base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/metrics/KVSpaceOpMeters.java diff --git a/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/spi/IKVEngineProvider.java b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/spi/IKVEngineProvider.java new file mode 100644 index 000000000..8a2854103 --- /dev/null +++ b/base-kv/base-kv-local-engine-spi/src/main/java/org/apache/bifromq/basekv/localengine/spi/IKVEngineProvider.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine.spi; + +import com.google.protobuf.Struct; +import javax.annotation.Nullable; +import org.apache.bifromq.basekv.localengine.ICPableKVSpace; +import org.apache.bifromq.basekv.localengine.IKVEngine; +import org.apache.bifromq.basekv.localengine.IWALableKVSpace; + +/** + * Service provider for IKVEngine runtime binding. + */ +public interface IKVEngineProvider { + /** + * Engine type identifier used in configuration, such as "memory" or "rocksdb". + */ + String type(); + + /** + * The default config for CPable KVEngine. + * + * @return the default config in struct + */ + Struct defaultsForCPable(); + + /** + * The default config for WALable KVEngine. + * + * @return the default config in struct + */ + Struct defaultsForWALable(); + + /** + * Create CPable KVEngine. + * + * @param overrideIdentity the override identity could be null + * @param conf the complete config + * @return the engine instance + */ + IKVEngine createCPable(@Nullable String overrideIdentity, Struct conf); + + /** + * Create WALable KVEngine. + * + * @param overrideIdentity the override identity could be null + * @param conf the complete config + * @return the engine instance + */ + IKVEngine createWALable(String overrideIdentity, Struct conf); +} diff --git a/base-kv/base-kv-local-engine-spi/src/test/java/org/apache/bifromq/basekv/localengine/AbstractCPableEngineTest.java b/base-kv/base-kv-local-engine-spi/src/test/java/org/apache/bifromq/basekv/localengine/AbstractCPableEngineTest.java new file mode 100644 index 000000000..185009734 --- /dev/null +++ b/base-kv/base-kv-local-engine-spi/src/test/java/org/apache/bifromq/basekv/localengine/AbstractCPableEngineTest.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; + +import com.google.protobuf.ByteString; +import org.apache.bifromq.basekv.proto.Boundary; +import org.testng.annotations.Test; + +// CPable-specific tests; common behaviors are in AbstractKVEngineTest +public abstract class AbstractCPableEngineTest extends AbstractKVEngineTest { + @Override + protected IKVSpaceWriter writerOf(ICPableKVSpace space) { + // Restorable writer is-a IKVSpaceWriter for common operations + return space.toWriter(); + } + + @Test + public void migrateTo() { + String leftRangeId = "test_range1"; + String rightRangeId = "test_range2"; + ByteString key1 = ByteString.copyFromUtf8("1"); + ByteString value1 = ByteString.copyFromUtf8("1"); + ByteString key2 = ByteString.copyFromUtf8("6"); + ByteString value2 = ByteString.copyFromUtf8("6"); + ByteString splitKey = ByteString.copyFromUtf8("5"); + + ByteString metaKey = ByteString.copyFromUtf8("metaKey"); + ByteString metaVal = ByteString.copyFromUtf8("metaVal"); + + ICPableKVSpace leftRange = engine.createIfMissing(leftRangeId); + leftRange.toWriter() + .put(key1, value1) + .put(key2, value2) + .done(); + IKVSpaceMigratableWriter leftSpaceWriter = leftRange.toWriter(); + IRestoreSession rightSpaceRestoreSession = leftSpaceWriter + .migrateTo(rightRangeId, Boundary.newBuilder().setStartKey(splitKey).build()) + .metadata(metaKey, metaVal); + leftSpaceWriter.done(); + rightSpaceRestoreSession.done(); + + IKVSpace rightRange = engine.createIfMissing(rightRangeId); + + try (IKVSpaceRefreshableReader leftReader = leftRange.reader(); + IKVSpaceRefreshableReader rightReader = rightRange.reader()) { + assertFalse(leftReader.metadata(metaKey).isPresent()); + assertTrue(rightReader.metadata(metaKey).isPresent()); + assertEquals(rightReader.metadata(metaKey).get(), metaVal); + assertFalse(leftReader.exist(key2)); + assertTrue(rightReader.exist(key2)); + } + } +} diff --git a/base-kv/base-kv-local-engine-spi/src/test/java/org/apache/bifromq/basekv/localengine/AbstractKVEngineTest.java b/base-kv/base-kv-local-engine-spi/src/test/java/org/apache/bifromq/basekv/localengine/AbstractKVEngineTest.java new file mode 100644 index 000000000..2b6391cd9 --- /dev/null +++ b/base-kv/base-kv-local-engine-spi/src/test/java/org/apache/bifromq/basekv/localengine/AbstractKVEngineTest.java @@ -0,0 +1,300 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; + +import com.google.protobuf.ByteString; +import java.lang.reflect.Method; +import org.apache.bifromq.basekv.proto.Boundary; +import org.testng.annotations.Test; + +public abstract class AbstractKVEngineTest extends MockableTest { + // Use covariant engine type to accept concrete space subtypes + protected IKVEngine engine; + + @Override + protected void doSetup(Method method) { + beforeStart(); + engine = newEngine(); + engine.start(); + } + + protected void beforeStart() { + // no-op + } + + @Override + protected void doTeardown(Method method) { + engine.stop(); + afterStop(); + } + + protected void afterStop() { + // no-op + } + + protected abstract IKVEngine newEngine(); + + // Subclass must provide writer for specific space subtype + protected abstract IKVSpaceWriter writerOf(T space); + + // Common behavior tests below work for any IKVSpace implementation + + @Test + public void createIfMissing() { + String rangeId = "test_range1"; + IKVSpace space = engine.createIfMissing(rangeId); + IKVSpace space1 = engine.createIfMissing(rangeId); + assertEquals(space1, space); + } + + @Test + public void size() { + String rangeId = "test_range1"; + String rangeId1 = "test_range2"; + ByteString key = ByteString.copyFromUtf8("key"); + ByteString value = ByteString.copyFromUtf8("value"); + IKVSpace space = engine.createIfMissing(rangeId); + assertEquals(space.size(), 0); + writerOf((T) space).put(key, value).done(); + assertTrue(space.size() > 0); + + IKVSpace space1 = engine.createIfMissing(rangeId1); + assertEquals(space1.size(), 0); + } + + @Test + public void metadata() { + String rangeId = "test_range1"; + ByteString key = ByteString.copyFromUtf8("key"); + ByteString value = ByteString.copyFromUtf8("value"); + IKVSpace space = engine.createIfMissing(rangeId); + writerOf((T) space).metadata(key, value).done(); + try (IKVSpaceRefreshableReader reader = space.reader()) { + assertTrue(reader.metadata(key).isPresent()); + assertEquals(reader.metadata(key).get(), value); + } + } + + @Test + public void describe() { + String rangeId = "test_range1"; + ByteString key = ByteString.copyFromUtf8("key"); + ByteString value = ByteString.copyFromUtf8("value"); + IKVSpace space = engine.createIfMissing(rangeId); + KVSpaceDescriptor descriptor = space.describe(); + assertEquals(descriptor.id(), rangeId); + assertEquals(descriptor.metrics().get("size"), 0); + + writerOf((T) space).put(key, value).metadata(key, value).done(); + descriptor = space.describe(); + assertTrue(descriptor.metrics().get("size") > 0); + } + + @Test + public void kvSpaceDestroy() { + String rangeId = "test_range1"; + IKVSpace space = engine.createIfMissing(rangeId); + assertTrue(engine.spaces().containsKey(rangeId)); + var disposable = space.metadata().subscribe(); + space.destroy(); + assertTrue(disposable.isDisposed()); + assertTrue(engine.spaces().isEmpty()); + assertFalse(engine.spaces().containsKey(rangeId)); + } + + @Test + public void kvSpaceDestroyAndCreate() { + String rangeId = "test_range1"; + ByteString key = ByteString.copyFromUtf8("key"); + ByteString value = ByteString.copyFromUtf8("value"); + IKVSpace space = engine.createIfMissing(rangeId); + writerOf((T) space).put(key, value).done(); + try (IKVSpaceRefreshableReader reader = space.reader()) { + assertTrue(reader.exist(key)); + } + space.destroy(); + space = engine.createIfMissing(rangeId); + try (IKVSpaceRefreshableReader reader = space.reader()) { + assertFalse(reader.exist(key)); + writerOf((T) space).put(key, value).done(); + reader.refresh(); + assertTrue(reader.exist(key)); + } + } + + @Test + public void exist() { + String rangeId = "test_range1"; + ByteString key = ByteString.copyFromUtf8("key"); + ByteString value = ByteString.copyFromUtf8("value"); + IKVSpace space = engine.createIfMissing(rangeId); + try (IKVSpaceRefreshableReader reader = space.reader()) { + assertFalse(reader.exist(key)); + + IKVSpaceWriter writer = writerOf((T) space).put(key, value); + reader.refresh(); + assertFalse(reader.exist(key)); + + writer.done(); + reader.refresh(); + assertTrue(reader.exist(key)); + } + } + + @Test + public void get() { + String rangeId = "test_range1"; + ByteString key = ByteString.copyFromUtf8("key"); + ByteString value = ByteString.copyFromUtf8("value"); + IKVSpace space = engine.createIfMissing(rangeId); + try (IKVSpaceRefreshableReader reader = space.reader()) { + assertFalse(reader.get(key).isPresent()); + + IKVSpaceWriter writer = writerOf((T) space).put(key, value); + reader.refresh(); + assertFalse(reader.get(key).isPresent()); + + writer.done(); + reader.refresh(); + assertTrue(reader.get(key).isPresent()); + assertEquals(reader.get(key).get(), value); + } + } + + @Test + public void iterator() { + String rangeId = "test_range1"; + ByteString key = ByteString.copyFromUtf8("key"); + ByteString value = ByteString.copyFromUtf8("value"); + IKVSpace space = engine.createIfMissing(rangeId); + + try (IKVSpaceRefreshableReader reader = space.reader()) { + IKVSpaceIterator itr = reader.newIterator(); + itr.seekToFirst(); + assertFalse(itr.isValid()); + writerOf((T) space).put(key, value).done(); + + itr.seekToFirst(); + assertFalse(itr.isValid()); + reader.refresh(); + + itr.seekToFirst(); + assertTrue(itr.isValid()); + assertEquals(itr.key(), key); + assertEquals(itr.value(), value); + itr.next(); + assertFalse(itr.isValid()); + + itr.seekToLast(); + assertTrue(itr.isValid()); + assertEquals(itr.key(), key); + assertEquals(itr.value(), value); + itr.next(); + assertFalse(itr.isValid()); + + itr.seekForPrev(key); + assertTrue(itr.isValid()); + assertEquals(itr.key(), key); + assertEquals(itr.value(), value); + itr.next(); + assertFalse(itr.isValid()); + } + } + + @Test + public void iterateSubBoundary() { + String rangeId = "test_range1"; + ByteString key = ByteString.copyFromUtf8("key"); + ByteString value = ByteString.copyFromUtf8("value"); + IKVSpace space = engine.createIfMissing(rangeId); + + try (IKVSpaceRefreshableReader reader = space.reader()) { + IKVSpaceIterator itr = reader.newIterator(Boundary.newBuilder().setStartKey(key).build()); + itr.seekToFirst(); + assertFalse(itr.isValid()); + writerOf((T) space).put(key, value).done(); + + itr.seekToFirst(); + assertFalse(itr.isValid()); + reader.refresh(); + + itr.seekToFirst(); + assertTrue(itr.isValid()); + assertEquals(itr.key(), key); + assertEquals(itr.value(), value); + itr.next(); + assertFalse(itr.isValid()); + } + try (IKVSpaceRefreshableReader reader = space.reader()) { + IKVSpaceIterator itr = reader.newIterator(Boundary.newBuilder() + .setStartKey(ByteString.copyFromUtf8("0")) + .setEndKey(ByteString.copyFromUtf8("9")).build()); + itr.seekToFirst(); + assertFalse(itr.isValid()); + writerOf((T) space).put(key, value).done(); + reader.refresh(); + itr.seekToFirst(); + assertFalse(itr.isValid()); + } + } + + @Test + public void writer() { + String rangeId = "test_range1"; + ByteString key = ByteString.copyFromUtf8("key"); + ByteString value = ByteString.copyFromUtf8("value"); + IKVSpace space = engine.createIfMissing(rangeId); + writerOf((T) space).put(key, value).delete(key).metadata(key, value).done(); + try (IKVSpaceRefreshableReader reader = space.reader()) { + assertFalse(reader.exist(key)); + + IKVSpaceWriter writer = writerOf((T) space); + assertEquals(reader.metadata(key).get(), value); + writer.insert(key, value).done(); + reader.refresh(); + assertTrue(reader.exist(key)); + + writerOf((T) space).clear().done(); + reader.refresh(); + assertFalse(reader.exist(key)); + } + } + + @Test + public void clearSubBoundary() { + String rangeId = "test_range1"; + ByteString key = ByteString.copyFromUtf8("key"); + ByteString value = ByteString.copyFromUtf8("value"); + IKVSpace space = engine.createIfMissing(rangeId); + writerOf((T) space).put(key, value).done(); + + writerOf((T) space).clear(Boundary.newBuilder() + .setStartKey(ByteString.copyFromUtf8("0")) + .setEndKey(ByteString.copyFromUtf8("9")).build()) + .done(); + try (IKVSpaceRefreshableReader reader = space.reader()) { + assertTrue(reader.exist(key)); + } + } +} diff --git a/base-kv/base-kv-local-engine-spi/src/test/java/org/apache/bifromq/basekv/localengine/AbstractWALableEngineTest.java b/base-kv/base-kv-local-engine-spi/src/test/java/org/apache/bifromq/basekv/localengine/AbstractWALableEngineTest.java new file mode 100644 index 000000000..612507103 --- /dev/null +++ b/base-kv/base-kv-local-engine-spi/src/test/java/org/apache/bifromq/basekv/localengine/AbstractWALableEngineTest.java @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine; + +public abstract class AbstractWALableEngineTest extends AbstractKVEngineTest { + @Override + protected IKVSpaceWriter writerOf(IWALableKVSpace space) { + // IWALableKVSpace provides standard writer + return space.toWriter(); + } +} diff --git a/base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/MockableTest.java b/base-kv/base-kv-local-engine-spi/src/test/java/org/apache/bifromq/basekv/localengine/MockableTest.java similarity index 100% rename from base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/MockableTest.java rename to base-kv/base-kv-local-engine-spi/src/test/java/org/apache/bifromq/basekv/localengine/MockableTest.java diff --git a/base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/SyncContextTest.java b/base-kv/base-kv-local-engine-spi/src/test/java/org/apache/bifromq/basekv/localengine/SyncContextTest.java similarity index 70% rename from base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/SyncContextTest.java rename to base-kv/base-kv-local-engine-spi/src/test/java/org/apache/bifromq/basekv/localengine/SyncContextTest.java index eca0edbc0..143501776 100644 --- a/base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/SyncContextTest.java +++ b/base-kv/base-kv-local-engine-spi/src/test/java/org/apache/bifromq/basekv/localengine/SyncContextTest.java @@ -14,13 +14,15 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.localengine; +import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; import org.mockito.Mock; import org.testng.annotations.Test; @@ -28,10 +30,10 @@ public class SyncContextTest extends MockableTest { @Mock - private Runnable refresh; + private ISyncContext.IRefresh refresh; @Mock - private Runnable mutate; + private ISyncContext.IMutation mutate; @Test public void refreshAsNeeded() { @@ -42,17 +44,25 @@ public void refreshAsNeeded() { refresher1.runIfNeeded(refresh); refresher2.runIfNeeded(refresh); - verify(refresh, times(2)).run(); + verify(refresh, times(2)).refresh(eq(false)); mutator.run(mutate); - verify(mutate, times(1)).run(); + verify(mutate, times(1)).mutate(); refresher1.runIfNeeded(refresh); refresher2.runIfNeeded(refresh); - verify(refresh, times(4)).run(); + verify(refresh, times(4)).refresh(eq(false)); + when(mutate.mutate()).thenReturn(false); + mutator.run(mutate); + refresher1.runIfNeeded(refresh); + refresher2.runIfNeeded(refresh); + verify(refresh, times(6)).refresh(eq(false)); + + when(mutate.mutate()).thenReturn(true); + mutator.run(mutate); refresher1.runIfNeeded(refresh); refresher2.runIfNeeded(refresh); - verify(refresh, times(4)).run(); + verify(refresh, times(2)).refresh(eq(true)); } } diff --git a/base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/TestUtil.java b/base-kv/base-kv-local-engine-spi/src/test/java/org/apache/bifromq/basekv/localengine/TestUtil.java similarity index 100% rename from base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/TestUtil.java rename to base-kv/base-kv-local-engine-spi/src/test/java/org/apache/bifromq/basekv/localengine/TestUtil.java diff --git a/base-kv/base-kv-local-engine-spi/src/test/java/org/apache/bifromq/basekv/localengine/metrics/KVSpaceMetersTest.java b/base-kv/base-kv-local-engine-spi/src/test/java/org/apache/bifromq/basekv/localengine/metrics/KVSpaceMetersTest.java new file mode 100644 index 000000000..9d3a740f6 --- /dev/null +++ b/base-kv/base-kv-local-engine-spi/src/test/java/org/apache/bifromq/basekv/localengine/metrics/KVSpaceMetersTest.java @@ -0,0 +1,152 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.localengine.metrics; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertSame; +import static org.testng.Assert.assertTrue; + +import io.micrometer.core.instrument.DistributionSummary; +import io.micrometer.core.instrument.Gauge; +import io.micrometer.core.instrument.MeterRegistry; +import io.micrometer.core.instrument.Metrics; +import io.micrometer.core.instrument.Tag; +import io.micrometer.core.instrument.Tags; +import io.micrometer.core.instrument.Timer; +import io.micrometer.core.instrument.composite.CompositeMeterRegistry; +import io.micrometer.core.instrument.simple.SimpleMeterRegistry; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicLong; +import java.util.stream.Collectors; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class KVSpaceMetersTest { + private SimpleMeterRegistry registry; + private List savedRegistries; + private String id; + private Tags baseTags; + + @BeforeMethod + public void setUp() { + registry = new SimpleMeterRegistry(); + savedRegistries = new ArrayList<>(); + CompositeMeterRegistry composite = Metrics.globalRegistry; + savedRegistries.addAll(composite.getRegistries()); + for (MeterRegistry r : savedRegistries) { + Metrics.removeRegistry(r); + } + Metrics.addRegistry(registry); + id = "kv1"; + baseTags = Tags.of("env", "test"); + } + + @AfterMethod + public void tearDown() { + Metrics.removeRegistry(registry); + registry.close(); + for (MeterRegistry r : savedRegistries) { + Metrics.addRegistry(r); + } + } + + @Test + public void timerCacheAndUnregister() { + Timer t1 = KVSpaceMeters.getTimer(id, GeneralKVSpaceMetric.CallTimer, baseTags); + Timer t2 = KVSpaceMeters.getTimer(id, GeneralKVSpaceMetric.CallTimer, baseTags); + assertSame(t1, t2); + + t1.record(10, TimeUnit.MILLISECONDS); + assertTrue(t1.count() > 0); + assertTrue(t1.totalTime(TimeUnit.MILLISECONDS) > 0); + + t1.close(); + assertTrue(registry + .find(GeneralKVSpaceMetric.CallTimer.metricName()) + .tags("env", "test", "kvspace", id) + .meters().isEmpty()); + } + + @Test + public void summaryRecordAndMax() { + DistributionSummary s = + KVSpaceMeters.getSummary(id, GeneralKVSpaceMetric.ReadBytesDistribution, baseTags); + s.record(1.0); + s.record(3.0); + s.record(2.0); + + assertEquals(s.count(), 3L); + assertEquals(s.totalAmount(), 6.0, 0.0001); + assertEquals(s.max(), 3.0, 0.0001); + + assertFalse(registry + .find(GeneralKVSpaceMetric.ReadBytesDistribution.metricName()) + .tags("env", "test", "kvspace", id) + .meters().isEmpty()); + + s.close(); + assertTrue(registry + .find(GeneralKVSpaceMetric.ReadBytesDistribution.metricName()) + .tags("env", "test", "kvspace", id) + .meters().isEmpty()); + } + + @Test + public void gaugeValueUpdatesAndUnregister() { + AtomicLong value = new AtomicLong(1); + Gauge g = KVSpaceMeters.getGauge(id, GeneralKVSpaceMetric.CheckpointNumGauge, value::get, baseTags); + assertEquals(g.value(), 1.0, 0.0001); + + value.set(5); + assertEquals(g.value(), 5.0, 0.0001); + + assertFalse(registry + .find(GeneralKVSpaceMetric.CheckpointNumGauge.metricName()) + .tags("env", "test", "kvspace", id) + .meters().isEmpty()); + + g.close(); + assertTrue(registry + .find(GeneralKVSpaceMetric.CheckpointNumGauge.metricName()) + .tags("env", "test", "kvspace", id) + .meters().isEmpty()); + } + + @Test + public void tagsInjectionOnMeters() { + Timer t = KVSpaceMeters.getTimer(id, GeneralKVSpaceMetric.CallTimer, baseTags); + Map tagMap = t.getId().getTags().stream() + .collect(Collectors.toMap(Tag::getKey, Tag::getValue)); + + assertEquals(tagMap.get("env"), "test"); + assertEquals(tagMap.get("kvspace"), id); + + assertFalse(registry + .find(GeneralKVSpaceMetric.CallTimer.metricName()) + .tags("env", "test", "kvspace", id) + .meters().isEmpty()); + t.close(); + } +} diff --git a/base-kv/base-kv-local-engine/src/test/resources/log4j2-test.xml b/base-kv/base-kv-local-engine-spi/src/test/resources/log4j2-test.xml similarity index 100% rename from base-kv/base-kv-local-engine/src/test/resources/log4j2-test.xml rename to base-kv/base-kv-local-engine-spi/src/test/resources/log4j2-test.xml diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/ISyncContext.java b/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/ISyncContext.java deleted file mode 100644 index 8c8093ff3..000000000 --- a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/ISyncContext.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.bifromq.basekv.localengine; - -import java.util.function.Supplier; - -public interface ISyncContext { - interface IRefresher { - void runIfNeeded(Runnable runnable); - - T call(Supplier supplier); - } - - interface IMutator { - void run(Runnable runnable); - } - - IRefresher refresher(); - - IMutator mutator(); -} diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/KVEngineFactory.java b/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/KVEngineFactory.java deleted file mode 100644 index e87cd0982..000000000 --- a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/KVEngineFactory.java +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.bifromq.basekv.localengine; - -import org.apache.bifromq.basekv.localengine.memory.InMemCPableKVEngine; -import org.apache.bifromq.basekv.localengine.memory.InMemKVEngineConfigurator; -import org.apache.bifromq.basekv.localengine.memory.InMemWALableKVEngine; -import org.apache.bifromq.basekv.localengine.rocksdb.RocksDBCPableKVEngine; -import org.apache.bifromq.basekv.localengine.rocksdb.RocksDBCPableKVEngineConfigurator; -import org.apache.bifromq.basekv.localengine.rocksdb.RocksDBWALableKVEngine; -import org.apache.bifromq.basekv.localengine.rocksdb.RocksDBWALableKVEngineConfigurator; - -public class KVEngineFactory { - public static IKVEngine createCPable(String overrideIdentity, - ICPableKVEngineConfigurator configurator) { - if (configurator instanceof InMemKVEngineConfigurator) { - return new InMemCPableKVEngine(overrideIdentity, (InMemKVEngineConfigurator) configurator); - } - if (configurator instanceof RocksDBCPableKVEngineConfigurator) { - return new RocksDBCPableKVEngine(overrideIdentity, (RocksDBCPableKVEngineConfigurator) configurator); - } - throw new UnsupportedOperationException(); - } - - public static IKVEngine createWALable(String overrideIdentity, - IWALableKVEngineConfigurator configurator) { - if (configurator instanceof InMemKVEngineConfigurator) { - return new InMemWALableKVEngine(overrideIdentity, (InMemKVEngineConfigurator) configurator); - } - if (configurator instanceof RocksDBWALableKVEngineConfigurator) { - return new RocksDBWALableKVEngine(overrideIdentity, (RocksDBWALableKVEngineConfigurator) configurator); - } - throw new UnsupportedOperationException(); - } -} diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemCPableKVSpace.java b/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemCPableKVSpace.java deleted file mode 100644 index 13f2669b6..000000000 --- a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemCPableKVSpace.java +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.bifromq.basekv.localengine.memory; - -import static org.apache.bifromq.basekv.localengine.metrics.KVSpaceMeters.getGauge; - -import org.apache.bifromq.basekv.localengine.ICPableKVSpace; -import org.apache.bifromq.basekv.localengine.IKVSpaceCheckpoint; -import org.apache.bifromq.basekv.localengine.metrics.GeneralKVSpaceMetric; -import org.apache.bifromq.basekv.localengine.metrics.KVSpaceOpMeters; -import com.github.benmanes.caffeine.cache.Cache; -import com.github.benmanes.caffeine.cache.Caffeine; -import io.micrometer.core.instrument.Gauge; -import io.micrometer.core.instrument.Tags; -import java.util.HashMap; -import java.util.Optional; -import java.util.UUID; -import org.slf4j.Logger; - -public class InMemCPableKVSpace extends InMemKVSpace - implements ICPableKVSpace { - private final Cache checkpoints; - private final Gauge checkpointGauge; - private volatile InMemKVSpaceCheckpoint latestCheckpoint; - - protected InMemCPableKVSpace(String id, - InMemKVEngineConfigurator configurator, - InMemCPableKVEngine engine, - Runnable onDestroy, - KVSpaceOpMeters opMeters, - Logger logger, - String... tags) { - super(id, configurator, engine, onDestroy, opMeters, logger); - checkpoints = Caffeine.newBuilder().weakValues().build(); - checkpointGauge = getGauge(id, GeneralKVSpaceMetric.CheckpointNumGauge, checkpoints::estimatedSize, - Tags.of(tags)); - } - - @Override - public String checkpoint() { - synchronized (this) { - return metadataRefresher.call(() -> { - String cpId = UUID.randomUUID().toString(); - latestCheckpoint = new InMemKVSpaceCheckpoint(id, cpId, new HashMap<>(metadataMap), rangeData.clone(), - opMeters, logger); - checkpoints.put(cpId, latestCheckpoint); - return cpId; - }); - } - } - - @Override - public Optional openCheckpoint(String checkpointId) { - return Optional.ofNullable(checkpoints.getIfPresent(checkpointId)); - } - - @Override - public void close() { - super.close(); - checkpointGauge.close(); - } -} diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVEngineConfigurator.java b/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVEngineConfigurator.java deleted file mode 100644 index 2ab9c07bf..000000000 --- a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVEngineConfigurator.java +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.bifromq.basekv.localengine.memory; - -import org.apache.bifromq.basekv.localengine.ICPableKVEngineConfigurator; -import org.apache.bifromq.basekv.localengine.IWALableKVEngineConfigurator; -import lombok.AccessLevel; -import lombok.AllArgsConstructor; -import lombok.Builder; -import lombok.Getter; -import lombok.NoArgsConstructor; -import lombok.Setter; -import lombok.experimental.Accessors; -import lombok.experimental.SuperBuilder; - -@Accessors(chain = true) -@Getter -@Setter -@NoArgsConstructor -@AllArgsConstructor(access = AccessLevel.PRIVATE) -@SuperBuilder(toBuilder = true) -public final class InMemKVEngineConfigurator implements ICPableKVEngineConfigurator, IWALableKVEngineConfigurator { - @Builder.Default - private long gcIntervalInSec = 300; // ms -} diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVSpace.java b/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVSpace.java deleted file mode 100644 index 69b170491..000000000 --- a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVSpace.java +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.bifromq.basekv.localengine.memory; - -import static com.google.protobuf.ByteString.unsignedLexicographicalComparator; - -import org.apache.bifromq.basekv.localengine.IKVSpace; -import org.apache.bifromq.basekv.localengine.IKVSpaceWriter; -import org.apache.bifromq.basekv.localengine.ISyncContext; -import org.apache.bifromq.basekv.localengine.KVSpaceDescriptor; -import org.apache.bifromq.basekv.localengine.SyncContext; -import org.apache.bifromq.basekv.localengine.metrics.KVSpaceOpMeters; -import com.google.protobuf.ByteString; -import io.reactivex.rxjava3.core.Observable; -import io.reactivex.rxjava3.subjects.BehaviorSubject; -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentSkipListMap; -import org.slf4j.Logger; - -class InMemKVSpace, T extends InMemKVSpace> extends InMemKVSpaceReader - implements IKVSpace { - protected final String id; - protected final Map metadataMap = new ConcurrentHashMap<>(); - protected final ConcurrentSkipListMap rangeData = - new ConcurrentSkipListMap<>(unsignedLexicographicalComparator()); - private final E engine; - private final BehaviorSubject> metadataSubject = BehaviorSubject.create(); - private final ISyncContext syncContext = new SyncContext(); - protected final ISyncContext.IRefresher metadataRefresher = syncContext.refresher(); - private final Runnable onDestroy; - - protected InMemKVSpace(String id, - InMemKVEngineConfigurator configurator, - E engine, - Runnable onDestroy, - KVSpaceOpMeters readOpMeters, - Logger logger) { - super(id, readOpMeters, logger); - this.id = id; - this.engine = engine; - this.onDestroy = onDestroy; - } - - ISyncContext syncContext() { - return syncContext; - } - - @Override - protected Map metadataMap() { - return metadataRefresher.call(() -> metadataMap); - } - - @Override - protected ConcurrentSkipListMap rangeData() { - return rangeData; - } - - - @Override - public Observable> metadata() { - return metadataSubject; - } - - @Override - public KVSpaceDescriptor describe() { - return new KVSpaceDescriptor(id, collectStats()); - } - - @Override - public void open() { - - } - - @Override - public void close() { - - } - - private Map collectStats() { - Map stats = new HashMap<>(); - stats.put("size", (double) size()); - // TODO: more stats - return stats; - } - - - @Override - public void destroy() { - metadataSubject.onComplete(); - onDestroy.run(); - } - - - @Override - public IKVSpaceWriter toWriter() { - return new InMemKVSpaceWriter<>(id, metadataMap, rangeData, engine, syncContext, - metadataUpdated -> { - if (metadataUpdated) { - this.loadMetadata(); - } - }, opMeters, logger); - } - - private void loadMetadata() { - metadataRefresher.runIfNeeded(() -> { - if (!metadataMap.isEmpty()) { - metadataSubject.onNext(Collections.unmodifiableMap(new HashMap<>(metadataMap))); - } - }); - } -} diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVSpaceReader.java b/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVSpaceReader.java deleted file mode 100644 index a0d5d0136..000000000 --- a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/memory/InMemKVSpaceReader.java +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.bifromq.basekv.localengine.memory; - -import org.apache.bifromq.basekv.localengine.AbstractKVSpaceReader; -import org.apache.bifromq.basekv.localengine.IKVSpaceIterator; -import org.apache.bifromq.basekv.localengine.metrics.KVSpaceOpMeters; -import org.apache.bifromq.basekv.proto.Boundary; -import com.google.protobuf.ByteString; -import java.util.Map; -import java.util.Optional; -import java.util.SortedMap; -import java.util.concurrent.ConcurrentSkipListMap; -import org.slf4j.Logger; - -abstract class InMemKVSpaceReader extends AbstractKVSpaceReader { - protected InMemKVSpaceReader(String id, KVSpaceOpMeters readOpMeters, Logger logger) { - super(id, readOpMeters, logger); - } - - protected abstract Map metadataMap(); - - protected abstract ConcurrentSkipListMap rangeData(); - - @Override - protected Optional doMetadata(ByteString metaKey) { - return Optional.ofNullable(metadataMap().get(metaKey)); - } - - @Override - protected long doSize(Boundary boundary) { - SortedMap sizedData; - ConcurrentSkipListMap rangeData = rangeData(); - if (!boundary.hasStartKey() && !boundary.hasEndKey()) { - sizedData = rangeData; - } else if (!boundary.hasStartKey()) { - sizedData = rangeData.headMap(boundary.getEndKey()); - } else if (!boundary.hasEndKey()) { - sizedData = rangeData.tailMap(boundary.getStartKey()); - } else { - sizedData = rangeData.subMap(boundary.getStartKey(), boundary.getEndKey()); - } - // this may take a long time - return sizedData.entrySet() - .stream() - .map(entry -> entry.getKey().size() + entry.getValue().size()) - .reduce(0, Integer::sum); - } - - @Override - protected boolean doExist(ByteString key) { - return rangeData().containsKey(key); - } - - @Override - protected Optional doGet(ByteString key) { - return Optional.ofNullable(rangeData().get(key)); - } - - @Override - protected IKVSpaceIterator doNewIterator() { - return new InMemKVSpaceIterator(rangeData()); - } - - @Override - protected IKVSpaceIterator doNewIterator(Boundary subBoundary) { - return new InMemKVSpaceIterator(rangeData(), subBoundary); - } -} diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVSpace.java b/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVSpace.java deleted file mode 100644 index 600cd84da..000000000 --- a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVSpace.java +++ /dev/null @@ -1,236 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.bifromq.basekv.localengine.rocksdb; - -import static org.apache.bifromq.basekv.localengine.metrics.KVSpaceMeters.getGauge; -import static org.apache.bifromq.basekv.localengine.rocksdb.Keys.LATEST_CP_KEY; -import static java.nio.charset.StandardCharsets.UTF_8; - -import org.apache.bifromq.basekv.localengine.ICPableKVSpace; -import org.apache.bifromq.basekv.localengine.IKVSpaceCheckpoint; -import org.apache.bifromq.basekv.localengine.KVEngineException; -import org.apache.bifromq.basekv.localengine.metrics.GeneralKVSpaceMetric; -import org.apache.bifromq.basekv.localengine.metrics.KVSpaceMeters; -import org.apache.bifromq.basekv.localengine.metrics.KVSpaceOpMeters; -import org.apache.bifromq.basekv.localengine.rocksdb.metrics.RocksDBKVSpaceMetric; -import com.github.benmanes.caffeine.cache.Cache; -import com.github.benmanes.caffeine.cache.Caffeine; -import io.micrometer.core.instrument.Gauge; -import io.micrometer.core.instrument.Tags; -import io.micrometer.core.instrument.Timer; -import java.io.File; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Paths; -import java.util.Arrays; -import java.util.Collections; -import java.util.Optional; -import java.util.UUID; -import java.util.concurrent.atomic.AtomicReference; -import java.util.stream.Collectors; -import lombok.SneakyThrows; -import org.rocksdb.Checkpoint; -import org.rocksdb.FlushOptions; -import org.rocksdb.WriteOptions; -import org.slf4j.Logger; - -public class RocksDBCPableKVSpace - extends RocksDBKVSpace - implements ICPableKVSpace { - private static final String CP_SUFFIX = ".cp"; - private final RocksDBCPableKVEngine engine; - private final File cpRootDir; - private final WriteOptions writeOptions; - private final Checkpoint checkpoint; - private final AtomicReference latestCheckpointId = new AtomicReference<>(); - private final Cache checkpoints; - private final MetricManager metricMgr; - // keep a strong ref to latest checkpoint - private IKVSpaceCheckpoint latestCheckpoint; - - @SneakyThrows - public RocksDBCPableKVSpace(String id, - RocksDBCPableKVEngineConfigurator configurator, - RocksDBCPableKVEngine engine, - Runnable onDestroy, - KVSpaceOpMeters opMeters, - Logger logger, - String... tags) { - super(id, configurator, engine, onDestroy, opMeters, logger, tags); - this.engine = engine; - cpRootDir = new File(configurator.dbCheckpointRootDir(), id); - this.checkpoint = Checkpoint.create(db); - checkpoints = Caffeine.newBuilder().weakValues().build(); - writeOptions = new WriteOptions().setDisableWAL(true); - Files.createDirectories(cpRootDir.getAbsoluteFile().toPath()); - metricMgr = new MetricManager(tags); - } - - @Override - protected WriteOptions writeOptions() { - return writeOptions; - } - - @Override - public String checkpoint() { - return metricMgr.checkpointTimer.record(() -> { - synchronized (this) { - IRocksDBKVSpaceCheckpoint cp = doCheckpoint(); - checkpoints.put(cp.cpId(), cp); - latestCheckpoint = cp; - return cp.cpId(); - } - }); - } - - @Override - public Optional openCheckpoint(String checkpointId) { - return Optional.ofNullable(checkpoints.getIfPresent(checkpointId)); - } - - @Override - protected void doClose() { - logger.debug("Flush RocksDBCPableKVSpace[{}] before closing", id); - try (FlushOptions flushOptions = new FlushOptions().setWaitForFlush(true)) { - db.flush(flushOptions); - } catch (Throwable e) { - logger.error("Flush RocksDBCPableKVSpace[{}] error", id, e); - } - metricMgr.close(); - checkpoints.asMap().forEach((cpId, cp) -> cp.close()); - checkpoint.close(); - writeOptions.close(); - super.doClose(); - } - - @Override - protected void doDestroy() { - super.doDestroy(); - try { - deleteDir(cpRootDir.toPath()); - } catch (IOException e) { - logger.error("Failed to delete checkpoint root dir: {}", cpRootDir, e); - } - } - - @Override - protected void doLoad() { - loadLatestCheckpoint(); - super.doLoad(); - } - - private IRocksDBKVSpaceCheckpoint doCheckpoint() { - String cpId = genCheckpointId(); - File cpDir = Paths.get(cpRootDir.getAbsolutePath(), cpId).toFile(); - try { - logger.debug("KVSpace[{}] checkpoint start: checkpointId={}", id, cpId); - db.put(cfHandle, LATEST_CP_KEY, cpId.getBytes()); - checkpoint.createCheckpoint(cpDir.toString()); - latestCheckpointId.set(cpId); - return new RocksDBKVSpaceCheckpoint(id, cpId, cpDir, this::isLatest, opMeters, logger); - } catch (Throwable e) { - throw new KVEngineException("Checkpoint key range error", e); - } - } - - @SneakyThrows - private IRocksDBKVSpaceCheckpoint doLoadLatestCheckpoint() { - byte[] cpIdBytes = db.get(cfHandle, LATEST_CP_KEY); - if (cpIdBytes != null) { - try { - String cpId = new String(cpIdBytes, UTF_8); - File cpDir = Paths.get(cpRootDir.getAbsolutePath(), cpId).toFile(); - // cleanup obsolete checkpoints - for (String obsoleteId : obsoleteCheckpoints(cpId)) { - try { - cleanCheckpoint(obsoleteId); - } catch (Throwable e) { - logger.error("Clean checkpoint[{}] for kvspace[{}] error", obsoleteId, id, e); - } - } - logger.debug("Load latest checkpoint[{}] of kvspace[{}] in engine[{}] at path[{}]", - cpId, id, engine.id(), cpDir); - latestCheckpointId.set(cpId); - return new RocksDBKVSpaceCheckpoint(id, cpId, cpDir, this::isLatest, opMeters, logger); - } catch (Throwable e) { - logger.warn("Failed to load latest checkpoint, checkpoint now", e); - } - } - return doCheckpoint(); - } - - @SneakyThrows - private void loadLatestCheckpoint() { - IRocksDBKVSpaceCheckpoint checkpoint = doLoadLatestCheckpoint(); - assert !checkpoints.asMap().containsKey(checkpoint.cpId()); - checkpoints.put(checkpoint.cpId(), checkpoint); - latestCheckpoint = checkpoint; - } - - private String genCheckpointId() { - // we need generate global unique checkpoint id, since it will be used in raft snapshot - return UUID.randomUUID() + CP_SUFFIX; - } - - private boolean isLatest(String cpId) { - return cpId.equals(latestCheckpointId.get()); - } - - private File checkpointDir(String cpId) { - return Paths.get(cpRootDir.getAbsolutePath(), cpId).toFile(); - } - - private Iterable obsoleteCheckpoints(String skipId) { - File[] cpDirList = cpRootDir.listFiles(); - if (cpDirList == null) { - return Collections.emptyList(); - } - return Arrays.stream(cpDirList) - .filter(File::isDirectory) - .map(File::getName) - .filter(cpId -> !skipId.equals(cpId)) - .collect(Collectors.toList()); - } - - private void cleanCheckpoint(String cpId) { - logger.debug("Delete checkpoint[{}] of kvspace[{}]", cpId, id); - try { - deleteDir(checkpointDir(cpId).toPath()); - } catch (IOException e) { - logger.error("Failed to clean checkpoint[{}] for kvspace[{}] at path:{}", cpId, id, checkpointDir(cpId)); - } - } - - private class MetricManager { - private final Gauge checkpointGauge; // hold a strong reference - private final Timer checkpointTimer; - - MetricManager(String... metricTags) { - Tags tags = Tags.of(metricTags); - checkpointGauge = getGauge(id, GeneralKVSpaceMetric.CheckpointNumGauge, checkpoints::estimatedSize, tags); - checkpointTimer = KVSpaceMeters.getTimer(id, RocksDBKVSpaceMetric.CheckpointTimer, tags); - } - - void close() { - checkpointGauge.close(); - checkpointTimer.close(); - } - } -} diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpace.java b/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpace.java deleted file mode 100644 index 03bf047a0..000000000 --- a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpace.java +++ /dev/null @@ -1,387 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.bifromq.basekv.localengine.rocksdb; - -import static com.google.protobuf.UnsafeByteOperations.unsafeWrap; -import static io.reactivex.rxjava3.subjects.BehaviorSubject.createDefault; -import static java.util.Collections.emptyMap; -import static java.util.Collections.singletonList; -import static org.apache.bifromq.basekv.localengine.IKVEngine.DEFAULT_NS; -import static org.apache.bifromq.basekv.localengine.metrics.KVSpaceMeters.getCounter; -import static org.apache.bifromq.basekv.localengine.metrics.KVSpaceMeters.getGauge; -import static org.apache.bifromq.basekv.localengine.metrics.KVSpaceMeters.getTimer; -import static org.apache.bifromq.basekv.localengine.rocksdb.Keys.META_SECTION_END; -import static org.apache.bifromq.basekv.localengine.rocksdb.Keys.META_SECTION_START; -import static org.apache.bifromq.basekv.localengine.rocksdb.Keys.fromMetaKey; - -import com.google.common.collect.Maps; -import com.google.protobuf.ByteString; -import io.micrometer.core.instrument.Counter; -import io.micrometer.core.instrument.Gauge; -import io.micrometer.core.instrument.Metrics; -import io.micrometer.core.instrument.Tags; -import io.micrometer.core.instrument.Timer; -import io.micrometer.core.instrument.binder.jvm.ExecutorServiceMetrics; -import io.reactivex.rxjava3.core.Observable; -import io.reactivex.rxjava3.subjects.BehaviorSubject; -import java.io.File; -import java.io.IOException; -import java.nio.file.FileVisitResult; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.SimpleFileVisitor; -import java.nio.file.attribute.BasicFileAttributes; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.LinkedBlockingQueue; -import java.util.concurrent.ThreadPoolExecutor; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicReference; -import lombok.SneakyThrows; -import org.apache.bifromq.baseenv.EnvProvider; -import org.apache.bifromq.basekv.localengine.IKVSpace; -import org.apache.bifromq.basekv.localengine.IKVSpaceWriter; -import org.apache.bifromq.basekv.localengine.ISyncContext; -import org.apache.bifromq.basekv.localengine.KVEngineException; -import org.apache.bifromq.basekv.localengine.KVSpaceDescriptor; -import org.apache.bifromq.basekv.localengine.SyncContext; -import org.apache.bifromq.basekv.localengine.metrics.KVSpaceOpMeters; -import org.apache.bifromq.basekv.localengine.rocksdb.metrics.RocksDBKVSpaceMetric; -import org.rocksdb.BlockBasedTableConfig; -import org.rocksdb.ColumnFamilyDescriptor; -import org.rocksdb.ColumnFamilyHandle; -import org.rocksdb.CompactRangeOptions; -import org.rocksdb.DBOptions; -import org.rocksdb.RocksDB; -import org.rocksdb.RocksDBException; -import org.rocksdb.WriteOptions; -import org.slf4j.Logger; - -abstract class RocksDBKVSpace< - E extends RocksDBKVEngine, - T extends RocksDBKVSpace, - C extends RocksDBKVEngineConfigurator - > - extends RocksDBKVSpaceReader implements IKVSpace { - - protected final RocksDB db; - protected final ColumnFamilyHandle cfHandle; - private final AtomicReference state = new AtomicReference<>(State.Init); - private final File keySpaceDBDir; - private final DBOptions dbOptions; - private final ColumnFamilyDescriptor cfDesc; - private final IWriteStatsRecorder writeStats; - private final ExecutorService compactionExecutor; - private final E engine; - private final Runnable onDestroy; - private final AtomicBoolean compacting = new AtomicBoolean(false); - private final BehaviorSubject> metadataSubject = createDefault(emptyMap()); - private final ISyncContext syncContext = new SyncContext(); - private final ISyncContext.IRefresher metadataRefresher = syncContext.refresher(); - private final SpaceMetrics spaceMetrics; - private volatile long lastCompactAt; - private volatile long nextCompactAt; - - @SneakyThrows - public RocksDBKVSpace(String id, - C configurator, - E engine, - Runnable onDestroy, - KVSpaceOpMeters opMeters, - Logger logger, - String... tags) { - super(id, opMeters, logger); - this.onDestroy = onDestroy; - this.writeStats = configurator.heuristicCompaction() ? new RocksDBKVSpaceCompactionTrigger(id, - configurator.compactMinTombstoneKeys(), - configurator.compactMinTombstoneRanges(), - configurator.compactTombstoneKeysRatio(), - this::scheduleCompact, tags) : NoopWriteStatsRecorder.INSTANCE; - this.engine = engine; - compactionExecutor = ExecutorServiceMetrics.monitor(Metrics.globalRegistry, new ThreadPoolExecutor(1, 1, - 0L, TimeUnit.MILLISECONDS, new LinkedBlockingQueue<>(), - EnvProvider.INSTANCE.newThreadFactory("kvspace-compactor-" + id)), - "compactor", "kvspace", Tags.of(tags)); - dbOptions = configurator.dbOptions(); - keySpaceDBDir = new File(configurator.dbRootDir(), id); - try { - Files.createDirectories(keySpaceDBDir.getAbsoluteFile().toPath()); - cfDesc = new ColumnFamilyDescriptor(DEFAULT_NS.getBytes(), configurator.cfOptions(DEFAULT_NS)); - List cfDescs = singletonList(cfDesc); - List cfHandles = new ArrayList<>(); - db = RocksDB.open(dbOptions, keySpaceDBDir.getAbsolutePath(), cfDescs, cfHandles); - assert cfHandles.size() == 1; - cfHandle = cfHandles.get(0); - } catch (Throwable e) { - throw new KVEngineException("Failed to initialize RocksDBKVSpace", e); - } - spaceMetrics = new SpaceMetrics(Tags.of(tags).and("spaceId", id)); - } - - protected static void deleteDir(Path path) throws IOException { - Files.walkFileTree(path, new SimpleFileVisitor<>() { - @Override - public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { - Files.delete(file); - return FileVisitResult.CONTINUE; - } - - @Override - public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException { - Files.delete(dir); - return FileVisitResult.CONTINUE; - } - }); - } - - public void open() { - if (state.compareAndSet(State.Init, State.Opening)) { - doLoad(); - } - } - - public Observable> metadata() { - return metadataSubject; - } - - protected abstract WriteOptions writeOptions(); - - protected Optional doMetadata(ByteString metaKey) { - return metadataRefresher.call(() -> { - Map metaMap = metadataSubject.getValue(); - return Optional.ofNullable(metaMap.get(metaKey)); - }); - } - - @Override - public KVSpaceDescriptor describe() { - return new KVSpaceDescriptor(id, collectStats()); - } - - private Map collectStats() { - Map stats = new HashMap<>(); - stats.put("size", (double) size()); - // TODO: more stats - return stats; - } - - protected void doLoad() { - metadataRefresher.runIfNeeded(() -> { - try (RocksDBKVEngineIterator metaItr = - new RocksDBKVEngineIterator(db, cfHandle, null, META_SECTION_START, META_SECTION_END)) { - Map metaMap = new HashMap<>(); - for (metaItr.seekToFirst(); metaItr.isValid(); metaItr.next()) { - metaMap.put(fromMetaKey(metaItr.key()), unsafeWrap(metaItr.value())); - } - metadataSubject.onNext(Collections.unmodifiableMap(metaMap)); - } - }); - } - - private void updateMetadata(Map metadataUpdates) { - metadataRefresher.runIfNeeded(() -> { - if (metadataUpdates.isEmpty()) { - return; - } - Map metaMap = Maps.newHashMap(metadataSubject.getValue()); - metaMap.putAll(metadataUpdates); - metadataSubject.onNext(Collections.unmodifiableMap(metaMap)); - }); - } - - @Override - public void destroy() { - if (state.compareAndSet(State.Opening, State.Destroying)) { - try { - doDestroy(); - } catch (Throwable e) { - throw new KVEngineException("Destroy KVRange error", e); - } finally { - onDestroy.run(); - state.set(State.Terminated); - } - } - } - - @Override - public IKVSpaceWriter toWriter() { - return new RocksDBKVSpaceWriter<>(id, db, cfHandle, engine, writeOptions(), syncContext, - writeStats.newRecorder(), this::updateMetadata, opMeters, logger); - } - - public void close() { - if (state.compareAndSet(State.Opening, State.Closing)) { - try { - doClose(); - } finally { - state.set(State.Terminated); - } - } - } - - protected State state() { - return state.get(); - } - - protected void doClose() { - logger.debug("Close key range[{}]", id); - spaceMetrics.close(); - synchronized (compacting) { - db.destroyColumnFamilyHandle(cfHandle); - } - cfDesc.getOptions().close(); - try { - db.syncWal(); - } catch (RocksDBException e) { - logger.error("SyncWAL RocksDBKVSpace[{}] error", id, e); - } - db.close(); - dbOptions.close(); - metadataSubject.onComplete(); - } - - protected void doDestroy() { - doClose(); - try { - deleteDir(keySpaceDBDir.toPath()); - } catch (IOException e) { - logger.error("Failed to delete key range dir: {}", keySpaceDBDir, e); - } - } - - @Override - protected RocksDB db() { - return db; - } - - @Override - protected ColumnFamilyHandle cfHandle() { - return cfHandle; - } - - @Override - protected ISyncContext.IRefresher newRefresher() { - return syncContext.refresher(); - } - - private void scheduleCompact() { - if (state.get() != State.Opening) { - return; - } - spaceMetrics.compactionSchedCounter.increment(); - if (compacting.compareAndSet(false, true)) { - compactionExecutor.execute(spaceMetrics.compactionTimer.wrap(() -> { - logger.debug("KeyRange[{}] compaction start", id); - lastCompactAt = System.nanoTime(); - writeStats.reset(); - try (CompactRangeOptions options = new CompactRangeOptions() - .setBottommostLevelCompaction(CompactRangeOptions.BottommostLevelCompaction.kSkip) - .setExclusiveManualCompaction(false)) { - synchronized (compacting) { - if (state.get() == State.Opening) { - db.compactRange(cfHandle, null, null, options); - } - } - logger.debug("KeyRange[{}] compacted", id); - } catch (Throwable e) { - logger.error("KeyRange[{}] compaction error", id, e); - } finally { - compacting.set(false); - if (nextCompactAt > lastCompactAt) { - scheduleCompact(); - } - } - })); - } else { - nextCompactAt = System.nanoTime(); - } - } - - protected enum State { - Init, Opening, Destroying, Closing, Terminated - } - - private class SpaceMetrics { - private final Gauge blockCacheSizeGauge; - private final Gauge tableReaderSizeGauge; - private final Gauge memtableSizeGauges; - private final Gauge pinedMemorySizeGauges; - private final Counter compactionSchedCounter; - private final Timer compactionTimer; - - SpaceMetrics(Tags metricTags) { - compactionSchedCounter = getCounter(id, RocksDBKVSpaceMetric.CompactionCounter, metricTags); - compactionTimer = getTimer(id, RocksDBKVSpaceMetric.CompactionTimer, metricTags); - blockCacheSizeGauge = getGauge(id, RocksDBKVSpaceMetric.BlockCache, () -> { - try { - if (!((BlockBasedTableConfig) cfDesc.getOptions().tableFormatConfig()).noBlockCache()) { - return db.getLongProperty(cfHandle, "rocksdb.block-cache-usage"); - } - return 0; - } catch (RocksDBException e) { - logger.warn("Unable to get long property {}", "rocksdb.block-cache-usage", e); - return 0; - } - }, metricTags); - tableReaderSizeGauge = getGauge(id, RocksDBKVSpaceMetric.TableReader, () -> { - try { - return db.getLongProperty(cfHandle, "rocksdb.estimate-table-readers-mem"); - } catch (RocksDBException e) { - logger.warn("Unable to get long property {}", "rocksdb.estimate-table-readers-mem", e); - return 0; - } - }, metricTags); - memtableSizeGauges = getGauge(id, RocksDBKVSpaceMetric.MemTable, () -> { - try { - return db.getLongProperty(cfHandle, "rocksdb.cur-size-all-mem-tables"); - } catch (RocksDBException e) { - logger.warn("Unable to get long property {}", "rocksdb.cur-size-all-mem-tables", e); - return 0; - } - }, metricTags); - pinedMemorySizeGauges = getGauge(id, RocksDBKVSpaceMetric.PinnedMem, () -> { - try { - if (!((BlockBasedTableConfig) cfDesc.getOptions().tableFormatConfig()).noBlockCache()) { - return db.getLongProperty(cfHandle, "rocksdb.block-cache-pinned-usage"); - } - return 0; - } catch (RocksDBException e) { - logger.warn("Unable to get long property {}", "rocksdb.block-cache-pinned-usage", e); - return 0; - } - }, metricTags); - } - - void close() { - blockCacheSizeGauge.close(); - memtableSizeGauges.close(); - tableReaderSizeGauge.close(); - pinedMemorySizeGauges.close(); - compactionSchedCounter.close(); - compactionTimer.close(); - } - } -} diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpaceIterator.java b/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpaceIterator.java deleted file mode 100644 index 88919132a..000000000 --- a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpaceIterator.java +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.bifromq.basekv.localengine.rocksdb; - -import static com.google.protobuf.UnsafeByteOperations.unsafeWrap; -import static org.apache.bifromq.basekv.localengine.rocksdb.Keys.DATA_SECTION_END; -import static org.apache.bifromq.basekv.localengine.rocksdb.Keys.DATA_SECTION_START; -import static org.apache.bifromq.basekv.localengine.rocksdb.Keys.fromDataKey; -import static org.apache.bifromq.basekv.localengine.rocksdb.Keys.toDataKey; -import static org.apache.bifromq.basekv.utils.BoundaryUtil.endKeyBytes; -import static org.apache.bifromq.basekv.utils.BoundaryUtil.startKeyBytes; - -import com.google.protobuf.ByteString; -import java.lang.ref.Cleaner; -import org.apache.bifromq.basekv.localengine.IKVSpaceIterator; -import org.apache.bifromq.basekv.localengine.ISyncContext; -import org.apache.bifromq.basekv.proto.Boundary; -import org.rocksdb.ColumnFamilyHandle; -import org.rocksdb.RocksDB; -import org.rocksdb.Snapshot; - -class RocksDBKVSpaceIterator implements IKVSpaceIterator { - private static final Cleaner CLEANER = Cleaner.create(); - private final RocksDBKVEngineIterator rocksItr; - private final ISyncContext.IRefresher refresher; - private final Cleaner.Cleanable onClose; - - public RocksDBKVSpaceIterator(RocksDB db, - ColumnFamilyHandle cfHandle, - Boundary boundary, - ISyncContext.IRefresher refresher) { - this(db, cfHandle, null, boundary, refresher); - } - - public RocksDBKVSpaceIterator(RocksDB db, - ColumnFamilyHandle cfHandle, - Snapshot snapshot, - Boundary boundary, - ISyncContext.IRefresher refresher) { - this(db, cfHandle, snapshot, boundary, refresher, true); - } - - public RocksDBKVSpaceIterator(RocksDB db, - ColumnFamilyHandle cfHandle, - Snapshot snapshot, - Boundary boundary, - ISyncContext.IRefresher refresher, - boolean fillCache) { - byte[] startKey = startKeyBytes(boundary); - byte[] endKey = endKeyBytes(boundary); - startKey = startKey != null ? toDataKey(startKey) : DATA_SECTION_START; - endKey = endKey != null ? toDataKey(endKey) : DATA_SECTION_END; - this.rocksItr = new RocksDBKVEngineIterator(db, cfHandle, snapshot, startKey, endKey, fillCache); - this.refresher = refresher; - onClose = CLEANER.register(this, new State(rocksItr)); - } - - @Override - public ByteString key() { - return fromDataKey(rocksItr.key()); - } - - @Override - public ByteString value() { - return unsafeWrap(rocksItr.value()); - } - - @Override - public boolean isValid() { - return rocksItr.isValid(); - } - - @Override - public void next() { - rocksItr.next(); - } - - @Override - public void prev() { - rocksItr.prev(); - } - - @Override - public void seekToFirst() { - rocksItr.seekToFirst(); - } - - @Override - public void seekToLast() { - rocksItr.seekToLast(); - } - - @Override - public void seek(ByteString target) { - rocksItr.seek(toDataKey(target)); - } - - @Override - public void seekForPrev(ByteString target) { - rocksItr.seekForPrev(toDataKey(target)); - } - - @Override - public void refresh() { - refresher.runIfNeeded(rocksItr::refresh); - } - - @Override - public void close() { - onClose.clean(); - } - - private record State(RocksDBKVEngineIterator rocksItr) implements Runnable { - @Override - public void run() { - rocksItr.close(); - } - } -} diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpaceReader.java b/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpaceReader.java deleted file mode 100644 index 8c1080c88..000000000 --- a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpaceReader.java +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.bifromq.basekv.localengine.rocksdb; - -import static com.google.protobuf.UnsafeByteOperations.unsafeWrap; -import static java.util.Collections.singletonList; -import static org.apache.bifromq.basekv.localengine.rocksdb.Keys.DATA_SECTION_END; -import static org.apache.bifromq.basekv.localengine.rocksdb.Keys.DATA_SECTION_START; -import static org.apache.bifromq.basekv.localengine.rocksdb.Keys.toDataKey; -import static org.apache.bifromq.basekv.utils.BoundaryUtil.compare; -import static org.apache.bifromq.basekv.utils.BoundaryUtil.isValid; -import static org.rocksdb.SizeApproximationFlag.INCLUDE_FILES; -import static org.rocksdb.SizeApproximationFlag.INCLUDE_MEMTABLES; - -import com.google.protobuf.ByteString; -import java.util.Optional; -import org.apache.bifromq.basekv.localengine.AbstractKVSpaceReader; -import org.apache.bifromq.basekv.localengine.IKVSpaceIterator; -import org.apache.bifromq.basekv.localengine.ISyncContext; -import org.apache.bifromq.basekv.localengine.KVEngineException; -import org.apache.bifromq.basekv.localengine.metrics.KVSpaceOpMeters; -import org.apache.bifromq.basekv.proto.Boundary; -import org.rocksdb.ColumnFamilyHandle; -import org.rocksdb.Range; -import org.rocksdb.RocksDB; -import org.rocksdb.RocksDBException; -import org.rocksdb.Slice; -import org.slf4j.Logger; - -abstract class RocksDBKVSpaceReader extends AbstractKVSpaceReader { - - protected RocksDBKVSpaceReader(String id, KVSpaceOpMeters opMeters, Logger logger) { - super(id, opMeters, logger); - } - - protected abstract RocksDB db(); - - protected abstract ColumnFamilyHandle cfHandle(); - - protected abstract ISyncContext.IRefresher newRefresher(); - - protected final long doSize(Boundary boundary) { - byte[] start = - !boundary.hasStartKey() ? DATA_SECTION_START : toDataKey(boundary.getStartKey().toByteArray()); - byte[] end = - !boundary.hasEndKey() ? DATA_SECTION_END : toDataKey(boundary.getEndKey().toByteArray()); - if (compare(start, end) < 0) { - try (Slice startSlice = new Slice(start); Slice endSlice = new Slice(end)) { - Range range = new Range(startSlice, endSlice); - return db().getApproximateSizes(cfHandle(), singletonList(range), INCLUDE_MEMTABLES, INCLUDE_FILES)[0]; - } - } - return 0; - } - - @Override - protected final boolean doExist(ByteString key) { - return get(key).isPresent(); - } - - @Override - protected final Optional doGet(ByteString key) { - try { - byte[] data = db().get(cfHandle(), toDataKey(key)); - return Optional.ofNullable(data == null ? null : unsafeWrap(data)); - } catch (RocksDBException rocksDBException) { - throw new KVEngineException("Get failed", rocksDBException); - } - } - - @Override - protected IKVSpaceIterator doNewIterator() { - return new RocksDBKVSpaceIterator(db(), cfHandle(), Boundary.getDefaultInstance(), newRefresher()); - } - - @Override - protected IKVSpaceIterator doNewIterator(Boundary subBoundary) { - assert isValid(subBoundary); - return new RocksDBKVSpaceIterator(db(), cfHandle(), subBoundary, newRefresher()); - } -} diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpaceSnapshot.java b/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpaceSnapshot.java deleted file mode 100644 index 3013d5ef9..000000000 --- a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpaceSnapshot.java +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.bifromq.basekv.localengine.rocksdb; - -import static com.google.protobuf.UnsafeByteOperations.unsafeWrap; -import static org.apache.bifromq.basekv.localengine.rocksdb.Keys.toDataKey; -import static org.apache.bifromq.basekv.localengine.rocksdb.Keys.toMetaKey; -import static org.apache.bifromq.basekv.utils.BoundaryUtil.isValid; - -import com.google.protobuf.ByteString; -import java.lang.ref.Cleaner; -import java.util.Optional; -import java.util.function.Supplier; -import org.apache.bifromq.basekv.localengine.AbstractKVSpaceReader; -import org.apache.bifromq.basekv.localengine.IKVSpaceIterator; -import org.apache.bifromq.basekv.localengine.ISyncContext; -import org.apache.bifromq.basekv.localengine.KVEngineException; -import org.apache.bifromq.basekv.localengine.metrics.KVSpaceOpMeters; -import org.apache.bifromq.basekv.proto.Boundary; -import org.rocksdb.ColumnFamilyHandle; -import org.rocksdb.ReadOptions; -import org.rocksdb.RocksDB; -import org.rocksdb.RocksDBException; -import org.rocksdb.Snapshot; -import org.slf4j.Logger; - -class RocksDBKVSpaceSnapshot extends AbstractKVSpaceReader implements IRocksDBKVSpaceCheckpoint { - private static final Cleaner CLEANER = Cleaner.create(); - private static final ISyncContext.IRefresher DUMMY_REFRESHER = new ISyncContext.IRefresher() { - @Override - public void runIfNeeded(Runnable runnable) { - // no need to do any refresh, since it's readonly - } - - @Override - public T call(Supplier supplier) { - return supplier.get(); - } - }; - private final Snapshot snapshot; - private final ColumnFamilyHandle cfHandle; - private final RocksDB db; - private final ReadOptions readOptions; - private final Cleaner.Cleanable cleanable; - - RocksDBKVSpaceSnapshot(String id, - Snapshot snapshot, - ColumnFamilyHandle cfHandle, - RocksDB db, - KVSpaceOpMeters readOpMeters, - Logger logger) { - super(id, readOpMeters, logger); - this.snapshot = snapshot; - this.cfHandle = cfHandle; - this.db = db; - this.readOptions = new ReadOptions().setSnapshot(snapshot); - cleanable = CLEANER.register(this, new ClosableResources(readOptions, snapshot, db)); - } - - @Override - public String cpId() { - return Long.toUnsignedString(snapshot.getSequenceNumber()); - } - - @Override - public void close() { - cleanable.clean(); - } - - @Override - protected Optional doMetadata(ByteString metaKey) { - try { - byte[] valBytes = db.get(cfHandle, readOptions, toMetaKey(metaKey)); - if (valBytes == null) { - return Optional.empty(); - } - return Optional.of(unsafeWrap(valBytes)); - } catch (RocksDBException e) { - throw new KVEngineException("Failed to read metadata", e); - } - } - - @Override - protected long doSize(Boundary boundary) { - throw new UnsupportedOperationException("Getting size of snapshot is unsupported"); - } - - @Override - protected boolean doExist(ByteString key) { - return get(key).isPresent(); - } - - @Override - protected Optional doGet(ByteString key) { - try { - byte[] data = db.get(cfHandle, readOptions, toDataKey(key)); - return Optional.ofNullable(data == null ? null : unsafeWrap(data)); - } catch (RocksDBException rocksDBException) { - throw new KVEngineException("Get failed", rocksDBException); - } - } - - @Override - protected IKVSpaceIterator doNewIterator() { - return new RocksDBKVSpaceIterator(db, cfHandle, snapshot, Boundary.getDefaultInstance(), DUMMY_REFRESHER); - } - - @Override - protected IKVSpaceIterator doNewIterator(Boundary subBoundary) { - assert isValid(subBoundary); - return new RocksDBKVSpaceIterator(db, cfHandle, snapshot, subBoundary, DUMMY_REFRESHER); - } - - private record ClosableResources(ReadOptions readOptions, Snapshot snapshot, RocksDB db) implements Runnable { - @Override - public void run() { - readOptions.close(); - db.releaseSnapshot(snapshot); - } - } -} diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpaceWriter.java b/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpaceWriter.java deleted file mode 100644 index c9679356a..000000000 --- a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBKVSpaceWriter.java +++ /dev/null @@ -1,236 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.bifromq.basekv.localengine.rocksdb; - -import static com.google.protobuf.UnsafeByteOperations.unsafeWrap; -import static org.apache.bifromq.basekv.localengine.rocksdb.Keys.toMetaKey; - -import com.google.protobuf.ByteString; -import java.util.Map; -import java.util.Optional; -import java.util.function.Consumer; -import org.apache.bifromq.basekv.localengine.IKVSpaceIterator; -import org.apache.bifromq.basekv.localengine.IKVSpaceMetadataWriter; -import org.apache.bifromq.basekv.localengine.IKVSpaceWriter; -import org.apache.bifromq.basekv.localengine.ISyncContext; -import org.apache.bifromq.basekv.localengine.KVEngineException; -import org.apache.bifromq.basekv.localengine.metrics.KVSpaceOpMeters; -import org.apache.bifromq.basekv.proto.Boundary; -import org.rocksdb.ColumnFamilyHandle; -import org.rocksdb.RocksDB; -import org.rocksdb.RocksDBException; -import org.rocksdb.WriteOptions; -import org.slf4j.Logger; - -class RocksDBKVSpaceWriter, T extends - RocksDBKVSpace, C extends RocksDBKVEngineConfigurator> - extends RocksDBKVSpaceReader implements IKVSpaceWriter { - private static final long MIGRATION_FLUSH_BYTES = 4L * 1024 * 1024; - private static final int MIGRATION_FLUSH_OPS = 4096; - private final RocksDB db; - private final ColumnFamilyHandle cfHandle; - private final ISyncContext syncContext; - private final E engine; - private final RocksDBKVSpaceWriterHelper helper; - private final IWriteStatsRecorder.IRecorder writeStatsRecorder; - - RocksDBKVSpaceWriter(String id, RocksDB db, ColumnFamilyHandle cfHandle, E engine, WriteOptions writeOptions, - ISyncContext syncContext, IWriteStatsRecorder.IRecorder writeStatsRecorder, - Consumer> afterWrite, KVSpaceOpMeters opMeters, Logger logger) { - this(id, db, cfHandle, engine, syncContext, new RocksDBKVSpaceWriterHelper(db, writeOptions), - writeStatsRecorder, afterWrite, opMeters, logger); - } - - private RocksDBKVSpaceWriter(String id, RocksDB db, ColumnFamilyHandle cfHandle, E engine, ISyncContext syncContext, - RocksDBKVSpaceWriterHelper writerHelper, - IWriteStatsRecorder.IRecorder writeStatsRecorder, - Consumer> afterWrite, KVSpaceOpMeters opMeters, - Logger logger) { - super(id, opMeters, logger); - this.db = db; - this.cfHandle = cfHandle; - this.engine = engine; - this.syncContext = syncContext; - this.helper = writerHelper; - this.writeStatsRecorder = writeStatsRecorder; - writerHelper.addMutators(syncContext.mutator()); - writerHelper.addAfterWriteCallback(cfHandle, afterWrite); - } - - @Override - public IKVSpaceWriter metadata(ByteString metaKey, ByteString metaValue) { - try { - helper.metadata(cfHandle(), metaKey, metaValue); - return this; - } catch (RocksDBException e) { - throw new KVEngineException("Put in batch failed", e); - } - } - - @Override - public IKVSpaceWriter insert(ByteString key, ByteString value) { - try { - helper.insert(cfHandle(), key, value); - writeStatsRecorder.recordInsert(); - return this; - } catch (RocksDBException e) { - throw new KVEngineException("Insert in batch failed", e); - } - } - - @Override - public IKVSpaceWriter put(ByteString key, ByteString value) { - try { - helper.put(cfHandle(), key, value); - writeStatsRecorder.recordPut(); - return this; - } catch (RocksDBException e) { - throw new KVEngineException("Put in batch failed", e); - } - } - - @Override - public IKVSpaceWriter delete(ByteString key) { - try { - helper.delete(cfHandle(), key); - writeStatsRecorder.recordDelete(); - return this; - } catch (RocksDBException e) { - throw new KVEngineException("Single delete in batch failed", e); - } - } - - @Override - public IKVSpaceWriter clear() { - return clear(Boundary.getDefaultInstance()); - } - - @Override - public IKVSpaceWriter clear(Boundary boundary) { - try { - helper.clear(cfHandle(), boundary); - writeStatsRecorder.recordDeleteRange(); - } catch (Throwable e) { - throw new KVEngineException("Delete range in batch failed", e); - } - return this; - } - - @Override - public IKVSpaceMetadataWriter migrateTo(String targetSpaceId, Boundary boundary) { - try { - RocksDBKVSpace targetKVSpace = engine.createIfMissing(targetSpaceId); - RocksDBKVSpaceWriter targetKVSpaceWriter = (RocksDBKVSpaceWriter) targetKVSpace.toWriter(); - // move data - int c = 0; - try (IKVSpaceIterator itr = newIterator(boundary)) { - for (itr.seekToFirst(); itr.isValid(); itr.next()) { - targetKVSpaceWriter.put(itr.key(), itr.value()); - targetKVSpaceWriter.flushIfNeededForMigration(); - c++; - } - } - logger.debug("Migrate {} kv to range[{}] from range[{}]: startKey={}, endKey={}", c, targetSpaceId, id, - boundary.getStartKey().toStringUtf8(), boundary.getEndKey().toStringUtf8()); - // clear moved data in left range - helper.clear(cfHandle(), boundary); - return targetKVSpaceWriter; - } catch (Throwable e) { - throw new KVEngineException("Delete range in batch failed", e); - } - } - - @Override - public IKVSpaceMetadataWriter migrateFrom(String fromSpaceId, Boundary boundary) { - try { - RocksDBKVSpace sourceKVSpace = engine.createIfMissing(fromSpaceId); - IKVSpaceWriter sourceKVSpaceWriter = sourceKVSpace.toWriter(); - // move data - try (IKVSpaceIterator itr = sourceKVSpace.newIterator(boundary)) { - for (itr.seekToFirst(); itr.isValid(); itr.next()) { - helper.put(cfHandle(), itr.key(), itr.value()); - flushIfNeededForMigration(); - } - } - // clear moved data in right range - sourceKVSpaceWriter.clear(boundary); - return sourceKVSpaceWriter; - } catch (Throwable e) { - throw new KVEngineException("Delete range in batch failed", e); - } - } - - @Override - public void done() { - opMeters.batchWriteCallTimer.record(() -> { - try { - opMeters.writeBatchSizeSummary.record(helper.count()); - helper.done(); - writeStatsRecorder.stop(); - } catch (Throwable e) { - logger.error("Write Batch commit failed", e); - throw new KVEngineException("Batch commit failed", e); - } - }); - } - - @Override - public void abort() { - helper.abort(); - } - - @Override - public int count() { - return helper.count(); - } - - @Override - protected Optional doMetadata(ByteString metaKey) { - try { - byte[] metaValBytes = db.get(cfHandle, toMetaKey(metaKey)); - return Optional.ofNullable(metaValBytes == null ? null : unsafeWrap(metaValBytes)); - } catch (RocksDBException e) { - throw new RuntimeException(e); - } - } - - @Override - protected RocksDB db() { - return db; - } - - @Override - protected ColumnFamilyHandle cfHandle() { - return cfHandle; - } - - private void flushIfNeededForMigration() { - // ensure metadata changes are flushed atomically - if (!helper.hasPendingMetadata() - && (helper.count() >= MIGRATION_FLUSH_OPS || helper.dataSize() >= MIGRATION_FLUSH_BYTES)) { - helper.flush(); - } - } - - @Override - protected ISyncContext.IRefresher newRefresher() { - return syncContext.refresher(); - } -} diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBWALableKVEngineConfigurator.java b/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBWALableKVEngineConfigurator.java deleted file mode 100644 index b26e104aa..000000000 --- a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBWALableKVEngineConfigurator.java +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.bifromq.basekv.localengine.rocksdb; - -import org.apache.bifromq.basekv.localengine.IWALableKVEngineConfigurator; -import lombok.Builder; -import lombok.Getter; -import lombok.Setter; -import lombok.experimental.Accessors; -import lombok.experimental.SuperBuilder; -import org.rocksdb.DBOptions; - -@Accessors(chain = true, fluent = true) -@Getter -@Setter -@SuperBuilder(toBuilder = true) -public final class RocksDBWALableKVEngineConfigurator - extends RocksDBKVEngineConfigurator - implements IWALableKVEngineConfigurator { - @Builder.Default - private boolean asyncWALFlush = false; - @Builder.Default - private boolean fsyncWAL = false; - - @Override - public DBOptions dbOptions() { - DBOptions options = super.dbOptions(); - options.setManualWalFlush(asyncWALFlush); - return options; - } -} diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/metrics/RocksDBKVSpaceMetric.java b/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/metrics/RocksDBKVSpaceMetric.java deleted file mode 100644 index c0e8198d9..000000000 --- a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/metrics/RocksDBKVSpaceMetric.java +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.bifromq.basekv.localengine.rocksdb.metrics; - -import org.apache.bifromq.basekv.localengine.metrics.IKVSpaceMetric; -import io.micrometer.core.instrument.Meter; - -/** - * RocksDB specific metrics for KVSpace. - */ -public enum RocksDBKVSpaceMetric implements IKVSpaceMetric { - BlockCache("basekv.le.rocksdb.mem.blockcache", Meter.Type.GAUGE), - TableReader("basekv.le.rocksdb.mem.tablereader", Meter.Type.GAUGE), - MemTable("basekv.le.rocksdb.mem.memtable", Meter.Type.GAUGE), - PinnedMem("basekv.le.rocksdb.mem.pinned", Meter.Type.GAUGE), - CheckpointTimer("basekv.le.rocksdb.checkpoint.time", Meter.Type.TIMER), - CompactionCounter("basekv.le.rocksdb.compaction.count", Meter.Type.COUNTER), - CompactionTimer("basekv.le.rocksdb.compaction.time", Meter.Type.TIMER), - TotalKeysGauge("basekv.le.rocksdb.compaction.keys", Meter.Type.GAUGE), - TotalTombstoneKeysGauge("basekv.le.rocksdb.compaction.delkeys", Meter.Type.GAUGE), - TotalTombstoneRangesGauge("basekv.le.rocksdb.compaction.delranges", Meter.Type.GAUGE), - FlushTimer("basekv.le.rocksdb.flush.time", Meter.Type.TIMER); - - private final String metricName; - private final Meter.Type meterType; - - RocksDBKVSpaceMetric(String metricName, Meter.Type meterType) { - this.metricName = metricName; - this.meterType = meterType; - } - - @Override - public String metricName() { - return metricName; - } - - @Override - public Meter.Type meterType() { - return meterType; - } -} diff --git a/base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/AbstractKVEngineTest.java b/base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/AbstractKVEngineTest.java deleted file mode 100644 index f1bcb7f53..000000000 --- a/base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/AbstractKVEngineTest.java +++ /dev/null @@ -1,377 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.bifromq.basekv.localengine; - -import static org.testng.Assert.assertEquals; -import static org.testng.Assert.assertFalse; -import static org.testng.Assert.assertTrue; - -import org.apache.bifromq.basekv.proto.Boundary; -import com.google.protobuf.ByteString; -import io.reactivex.rxjava3.disposables.Disposable; -import java.lang.reflect.Method; -import org.testng.annotations.Test; - -public abstract class AbstractKVEngineTest extends MockableTest { - protected IKVEngine engine; - - @Override - protected void doSetup(Method method) { - beforeStart(); - engine = newEngine(); - engine.start(); - } - - protected void beforeStart() { - - } - - @Override - protected void doTeardown(Method method) { - engine.stop(); - afterStop(); - } - - protected void afterStop() { - - } - - protected abstract IKVEngine newEngine(); - - @Test - public void createIfMissing() { - String rangeId = "test_range1"; - IKVSpace keyRange = engine.createIfMissing(rangeId); - IKVSpace keyRange1 = engine.createIfMissing(rangeId); - assertEquals(keyRange1, keyRange); - } - - @Test - public void size() { - String rangeId = "test_range1"; - String rangeId1 = "test_range2"; - ByteString key = ByteString.copyFromUtf8("key"); - ByteString value = ByteString.copyFromUtf8("value"); - IKVSpace keyRange = engine.createIfMissing(rangeId); - assertEquals(keyRange.size(), 0); - keyRange.toWriter().put(key, value).done(); - assertTrue(keyRange.size() > 0); - - IKVSpace keyRange1 = engine.createIfMissing(rangeId1); - assertEquals(keyRange1.size(), 0); - } - - @Test - public void metadata() { - String rangeId = "test_range1"; - ByteString key = ByteString.copyFromUtf8("key"); - ByteString value = ByteString.copyFromUtf8("value"); - IKVSpace keyRange = engine.createIfMissing(rangeId); - keyRange.toWriter().metadata(key, value).done(); - assertTrue(keyRange.metadata(key).isPresent()); - assertEquals(keyRange.metadata(key).get(), value); - } - - @Test - public void describe() { - String rangeId = "test_range1"; - ByteString key = ByteString.copyFromUtf8("key"); - ByteString value = ByteString.copyFromUtf8("value"); - IKVSpace keyRange = engine.createIfMissing(rangeId); - KVSpaceDescriptor descriptor = keyRange.describe(); - assertEquals(descriptor.id(), rangeId); - assertEquals(descriptor.metrics().get("size"), 0); - - keyRange.toWriter().put(key, value).metadata(key, value).done(); - descriptor = keyRange.describe(); - assertTrue(descriptor.metrics().get("size") > 0); - } - - @Test - public void kvSpaceDestroy() { - String rangeId = "test_range1"; - IKVSpace range = engine.createIfMissing(rangeId); - assertTrue(engine.spaces().containsKey(rangeId)); - Disposable disposable = range.metadata().subscribe(); - range.destroy(); - assertTrue(disposable.isDisposed()); - assertTrue(engine.spaces().isEmpty()); - assertFalse(engine.spaces().containsKey(rangeId)); - } - - @Test - public void kvSpaceDestroyAndCreate() { - String rangeId = "test_range1"; - ByteString key = ByteString.copyFromUtf8("key"); - ByteString value = ByteString.copyFromUtf8("value"); - IKVSpace range = engine.createIfMissing(rangeId); - range.toWriter().put(key, value).done(); - assertTrue(range.exist(key)); - range.destroy(); - - range = engine.createIfMissing(rangeId); - assertFalse(range.exist(key)); - range.toWriter().put(key, value).done(); - assertTrue(range.exist(key)); - } - - - @Test - public void exist() { - String rangeId = "test_range1"; - ByteString key = ByteString.copyFromUtf8("key"); - ByteString value = ByteString.copyFromUtf8("value"); - IKVSpace keyRange = engine.createIfMissing(rangeId); - assertFalse(keyRange.exist(key)); - - IKVSpaceWriter rangeWriter = keyRange.toWriter().put(key, value); - assertFalse(keyRange.exist(key)); - - rangeWriter.done(); - assertTrue(keyRange.exist(key)); - } - - @Test - public void get() { - String rangeId = "test_range1"; - ByteString key = ByteString.copyFromUtf8("key"); - ByteString value = ByteString.copyFromUtf8("value"); - IKVSpace keyRange = engine.createIfMissing(rangeId); - assertFalse(keyRange.get(key).isPresent()); - - IKVSpaceWriter rangeWriter = keyRange.toWriter().put(key, value); - assertFalse(keyRange.get(key).isPresent()); - - rangeWriter.done(); - assertTrue(keyRange.get(key).isPresent()); - assertEquals(keyRange.get(key).get(), value); - } - - @Test - public void iterator() { - String rangeId = "test_range1"; - ByteString key = ByteString.copyFromUtf8("key"); - ByteString value = ByteString.copyFromUtf8("value"); - IKVSpace keyRange = engine.createIfMissing(rangeId); - - try (IKVSpaceIterator keyRangeIterator = keyRange.newIterator()) { - keyRangeIterator.seekToFirst(); - assertFalse(keyRangeIterator.isValid()); - keyRange.toWriter().put(key, value).done(); - - keyRangeIterator.seekToFirst(); - assertFalse(keyRangeIterator.isValid()); - keyRangeIterator.refresh(); - - keyRangeIterator.seekToFirst(); - assertTrue(keyRangeIterator.isValid()); - assertEquals(keyRangeIterator.key(), key); - assertEquals(keyRangeIterator.value(), value); - keyRangeIterator.next(); - assertFalse(keyRangeIterator.isValid()); - - keyRangeIterator.seekToLast(); - assertTrue(keyRangeIterator.isValid()); - assertEquals(keyRangeIterator.key(), key); - assertEquals(keyRangeIterator.value(), value); - keyRangeIterator.next(); - assertFalse(keyRangeIterator.isValid()); - - keyRangeIterator.seekForPrev(key); - assertTrue(keyRangeIterator.isValid()); - assertEquals(keyRangeIterator.key(), key); - assertEquals(keyRangeIterator.value(), value); - keyRangeIterator.next(); - assertFalse(keyRangeIterator.isValid()); - } - } - - @Test - public void iterateSubBoundary() { - String rangeId = "test_range1"; - ByteString key = ByteString.copyFromUtf8("key"); - ByteString value = ByteString.copyFromUtf8("value"); - IKVSpace keyRange = engine.createIfMissing(rangeId); - - try (IKVSpaceIterator keyRangeIterator = keyRange.newIterator(Boundary.newBuilder() - .setStartKey(key) - .build())) { - keyRangeIterator.seekToFirst(); - assertFalse(keyRangeIterator.isValid()); - keyRange.toWriter().put(key, value).done(); - - keyRangeIterator.seekToFirst(); - assertFalse(keyRangeIterator.isValid()); - keyRangeIterator.refresh(); - - keyRangeIterator.seekToFirst(); - assertTrue(keyRangeIterator.isValid()); - assertEquals(keyRangeIterator.key(), key); - assertEquals(keyRangeIterator.value(), value); - keyRangeIterator.next(); - assertFalse(keyRangeIterator.isValid()); - - keyRangeIterator.seekToLast(); - assertTrue(keyRangeIterator.isValid()); - assertEquals(keyRangeIterator.key(), key); - assertEquals(keyRangeIterator.value(), value); - keyRangeIterator.next(); - assertFalse(keyRangeIterator.isValid()); - - keyRangeIterator.seekForPrev(key); - assertTrue(keyRangeIterator.isValid()); - assertEquals(keyRangeIterator.key(), key); - assertEquals(keyRangeIterator.value(), value); - keyRangeIterator.next(); - assertFalse(keyRangeIterator.isValid()); - } - try (IKVSpaceIterator keyRangeIterator = keyRange.newIterator(Boundary.newBuilder() - .setStartKey(ByteString.copyFromUtf8("0")) - .setEndKey(ByteString.copyFromUtf8("9")) - .build())) { - keyRangeIterator.seekToFirst(); - assertFalse(keyRangeIterator.isValid()); - - keyRange.toWriter().put(key, value).done(); - - keyRangeIterator.refresh(); - - keyRangeIterator.seekToFirst(); - assertFalse(keyRangeIterator.isValid()); - } - } - - @Test - public void writer() { - String rangeId = "test_range1"; - ByteString key = ByteString.copyFromUtf8("key"); - ByteString value = ByteString.copyFromUtf8("value"); - IKVSpace keyRange = engine.createIfMissing(rangeId); - keyRange.toWriter() - .put(key, value) - .delete(key) - .metadata(key, value) - .done(); - assertFalse(keyRange.exist(key)); - - IKVSpaceWriter rangeWriter = keyRange.toWriter(); - assertEquals(rangeWriter.metadata(key).get(), value); - rangeWriter.insert(key, value).done(); - assertTrue(keyRange.exist(key)); - - keyRange.toWriter().clear().done(); - assertFalse(keyRange.exist(key)); - } - - @Test - public void clearSubBoundary() { - String rangeId = "test_range1"; - ByteString key = ByteString.copyFromUtf8("key"); - ByteString value = ByteString.copyFromUtf8("value"); - IKVSpace keyRange = engine.createIfMissing(rangeId); - keyRange.toWriter().put(key, value).done(); - - keyRange.toWriter() - .clear(Boundary.newBuilder() - .setStartKey(ByteString.copyFromUtf8("0")) - .setEndKey(ByteString.copyFromUtf8("9")) - .build()) - .done(); - assertTrue(keyRange.exist(key)); - } - - @Test - public void migrateTo() { - String leftRangeId = "test_range1"; - String rightRangeId = "test_range2"; - ByteString key1 = ByteString.copyFromUtf8("1"); - ByteString value1 = ByteString.copyFromUtf8("1"); - ByteString key2 = ByteString.copyFromUtf8("6"); - ByteString value2 = ByteString.copyFromUtf8("6"); - ByteString splitKey = ByteString.copyFromUtf8("5"); - - ByteString metaKey = ByteString.copyFromUtf8("metaKey"); - ByteString metaVal = ByteString.copyFromUtf8("metaVal"); - - - IKVSpace leftRange = engine.createIfMissing(leftRangeId); - leftRange.toWriter() - .put(key1, value1) - .put(key2, value2) - .done(); - IKVSpaceWriter leftSpaceWriter = leftRange.toWriter(); - IKVSpaceMetadataWriter rightSpaceMetadataWriter = leftSpaceWriter - .migrateTo(rightRangeId, Boundary.newBuilder().setStartKey(splitKey).build()).metadata(metaKey, metaVal); - leftSpaceWriter.done(); - rightSpaceMetadataWriter.done(); - - IKVSpace rightRange = engine.createIfMissing(rightRangeId); - - assertFalse(leftRange.metadata(metaKey).isPresent()); - assertTrue(rightRange.metadata(metaKey).isPresent()); - assertEquals(rightRange.metadata(metaKey).get(), metaVal); - assertFalse(leftRange.exist(key2)); - assertTrue(rightRange.exist(key2)); - } - - @Test - public void migrateFrom() { - String leftRangeId = "test_range1"; - String rightRangeId = "test_range2"; - ByteString key1 = ByteString.copyFromUtf8("1"); - ByteString value1 = ByteString.copyFromUtf8("1"); - ByteString key2 = ByteString.copyFromUtf8("6"); - ByteString value2 = ByteString.copyFromUtf8("6"); - ByteString splitKey = ByteString.copyFromUtf8("5"); - - ByteString metaKey = ByteString.copyFromUtf8("metaKey"); - ByteString metaVal = ByteString.copyFromUtf8("metaVal"); - - - IKVSpace leftRange = engine.createIfMissing(leftRangeId); - leftRange.toWriter() - .put(key1, value1) - .done(); - assertFalse(leftRange.exist(key2)); - IKVSpace rightRange = engine.createIfMissing(rightRangeId); - rightRange.toWriter() - .put(key2, value2) - .done(); - assertTrue(rightRange.exist(key2)); - - IKVSpaceWriter leftSpaceWriter = leftRange.toWriter(); - IKVSpaceMetadataWriter rightSpaceMetadataWriter = leftSpaceWriter - .migrateFrom(rightRangeId, Boundary.newBuilder().setStartKey(splitKey).build()) - .metadata(metaKey, metaVal); - leftSpaceWriter - .metadata(metaKey, metaVal) - .done(); - rightSpaceMetadataWriter.done(); - - assertTrue(leftRange.metadata(metaKey).isPresent()); - assertTrue(rightRange.metadata(metaKey).isPresent()); - - assertEquals(rightRange.metadata(metaKey).get(), metaVal); - assertEquals(rightRange.metadata(metaKey).get(), metaVal); - - assertTrue(leftRange.exist(key2)); - assertFalse(rightRange.exist(key2)); - } -} diff --git a/base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/metrics/KVSpaceMetersTest.java b/base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/metrics/KVSpaceMetersTest.java deleted file mode 100644 index 14126ae1f..000000000 --- a/base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/metrics/KVSpaceMetersTest.java +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.bifromq.basekv.localengine.metrics; - -import static org.awaitility.Awaitility.await; -import static org.testng.Assert.assertFalse; - -import org.apache.bifromq.basekv.localengine.MockableTest; -import io.micrometer.core.instrument.DistributionSummary; -import io.micrometer.core.instrument.Gauge; -import io.micrometer.core.instrument.Metrics; -import io.micrometer.core.instrument.Tags; -import java.lang.ref.WeakReference; -import java.util.concurrent.atomic.AtomicInteger; -import lombok.extern.slf4j.Slf4j; -import org.testng.annotations.Test; - -@Slf4j -public class KVSpaceMetersTest extends MockableTest { - @Test - public void removeGaugeWhenNoRef() { - AtomicInteger gaugeCounter = new AtomicInteger(); - Gauge gauge = KVSpaceMeters.getGauge("testSpace", GeneralKVSpaceMetric.CheckpointNumGauge, gaugeCounter::get, - Tags.of(new String[0])); - String gaugeName = gauge.getId().getName(); - WeakReference weakRef = new WeakReference<>(gauge); - assertFalse(Metrics.globalRegistry.find(gaugeName).gauges().isEmpty()); - gauge = null; - await().forever().until(() -> { - System.gc(); - return weakRef.get() == null && Metrics.globalRegistry.find(gaugeName).gauges().stream() - .noneMatch(g -> g.getId().getName().equals("testSpace")); - }); - } - - - @Test - public void removeSummaryWhenNoRef() { - DistributionSummary summary = KVSpaceMeters.getSummary("testSpace", GeneralKVSpaceMetric.ReadBytesDistribution, - Tags.of(new String[0])); - String timerName = summary.getId().getName(); - WeakReference weakRef = new WeakReference<>(summary); - assertFalse(Metrics.globalRegistry.find(timerName).summaries().isEmpty()); - summary = null; - await().forever().until(() -> { - System.gc(); - return weakRef.get() == null && Metrics.globalRegistry.find(timerName).counters().isEmpty(); - }); - - } -} diff --git a/base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/KVSpaceSnapshotTest.java b/base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/KVSpaceSnapshotTest.java deleted file mode 100644 index 763b80080..000000000 --- a/base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/KVSpaceSnapshotTest.java +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.bifromq.basekv.localengine.rocksdb; - -import static org.awaitility.Awaitility.await; - -import org.apache.bifromq.basekv.localengine.metrics.KVSpaceOpMeters; -import io.micrometer.core.instrument.Tags; -import org.mockito.Mockito; -import org.rocksdb.Snapshot; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.testng.annotations.Test; - -public class KVSpaceSnapshotTest extends AbstractRawRocksDBTest { - @Test - public void gc() { - Snapshot snapshot = db.getSnapshot(); - Logger logger = LoggerFactory.getLogger("testLogger"); - RocksDBKVSpaceSnapshot spaceSnapshot = - new RocksDBKVSpaceSnapshot("testSpace", snapshot, cfHandle, db, - new KVSpaceOpMeters("testSpace", Tags.of("tag", "value")), logger); - spaceSnapshot = null; - await().forever().until(() -> { - System.gc(); - try { - Mockito.verify(db, Mockito.times(1)).releaseSnapshot(snapshot); - return true; - } catch (Throwable e) { - return false; - } - }); - } -} diff --git a/base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/metrics/KVSpaceMetersTest.java b/base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/metrics/KVSpaceMetersTest.java deleted file mode 100644 index 853cd0686..000000000 --- a/base-kv/base-kv-local-engine/src/test/java/org/apache/bifromq/basekv/localengine/rocksdb/metrics/KVSpaceMetersTest.java +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.bifromq.basekv.localengine.rocksdb.metrics; - -import static org.awaitility.Awaitility.await; -import static org.testng.Assert.assertFalse; - -import org.apache.bifromq.basekv.localengine.MockableTest; -import org.apache.bifromq.basekv.localengine.metrics.KVSpaceMeters; -import io.micrometer.core.instrument.Counter; -import io.micrometer.core.instrument.Gauge; -import io.micrometer.core.instrument.Metrics; -import io.micrometer.core.instrument.Tags; -import io.micrometer.core.instrument.Timer; -import java.lang.ref.WeakReference; -import java.util.concurrent.atomic.AtomicInteger; -import lombok.extern.slf4j.Slf4j; -import org.testng.annotations.Test; - -@Slf4j -public class KVSpaceMetersTest extends MockableTest { - @Test - public void removeGaugeWhenNoRef() { - AtomicInteger gaugeCounter = new AtomicInteger(); - Gauge gauge = KVSpaceMeters.getGauge("testSpace", RocksDBKVSpaceMetric.TotalTombstoneRangesGauge, - gaugeCounter::get, - Tags.of(new String[0])); - String gaugeName = gauge.getId().getName(); - WeakReference weakRef = new WeakReference<>(gauge); - assertFalse(Metrics.globalRegistry.find(gaugeName).gauges().isEmpty()); - gauge = null; - await().forever().until(() -> { - System.gc(); - return weakRef.get() == null && Metrics.globalRegistry.find(gaugeName).gauges().isEmpty(); - }); - } - - @Test - public void removeTimerWhenNoRef() { - Timer timer = KVSpaceMeters.getTimer("testSpace", RocksDBKVSpaceMetric.FlushTimer, Tags.of(new String[0])); - String timerName = timer.getId().getName(); - WeakReference weakRef = new WeakReference<>(timer); - assertFalse(Metrics.globalRegistry.find(timerName).timers().isEmpty()); - timer = null; - await().forever().until(() -> { - System.gc(); - return weakRef.get() == null && Metrics.globalRegistry.find(timerName).timers().isEmpty(); - }); - } - - @Test - public void removeCounterWhenNoRef() { - Counter counter = - KVSpaceMeters.getCounter("testSpace", RocksDBKVSpaceMetric.CompactionCounter, Tags.of(new String[0])); - String timerName = counter.getId().getName(); - WeakReference weakRef = new WeakReference<>(counter); - assertFalse(Metrics.globalRegistry.find(timerName).counters().isEmpty()); - counter = null; - await().forever().until(() -> { - System.gc(); - return weakRef.get() == null && Metrics.globalRegistry.find(timerName).counters().isEmpty(); - }); - } -} diff --git a/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/BasicStateStoreTest.java b/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/BasicStateStoreTest.java index 176fcf585..8885eb6ae 100644 --- a/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/BasicStateStoreTest.java +++ b/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/BasicStateStoreTest.java @@ -27,16 +27,15 @@ import static org.testng.Assert.assertTrue; import static org.testng.Assert.fail; -import org.apache.bifromq.basekv.raft.proto.ClusterConfig; -import org.apache.bifromq.basekv.raft.proto.LogEntry; -import org.apache.bifromq.basekv.raft.proto.Snapshot; -import org.apache.bifromq.basekv.raft.proto.Voting; import com.google.protobuf.ByteString; import java.util.ArrayList; -import java.util.Iterator; import java.util.List; import java.util.concurrent.CountDownLatch; import java.util.concurrent.atomic.AtomicInteger; +import org.apache.bifromq.basekv.raft.proto.ClusterConfig; +import org.apache.bifromq.basekv.raft.proto.LogEntry; +import org.apache.bifromq.basekv.raft.proto.Snapshot; +import org.apache.bifromq.basekv.raft.proto.Voting; import org.testng.Assert; import org.testng.annotations.Test; @@ -247,11 +246,12 @@ public void testTruncateAndAppend() { assertNotEquals(stateStorage.latestClusterConfig(), updatedClusterConfig); assertEquals(stateStorage.firstIndex(), 1); assertEquals(stateStorage.lastIndex(), 8); - Iterator entries5To8 = stateStorage.entries(5, 9, -1); - for (int i = 0; entries5To8.hasNext(); i++) { - LogEntry entry = entries5To8.next(); - assertEquals(entry.getTerm(), 2); - assertEquals(entry.getIndex(), i + 5); + try (ILogEntryIterator entries5To8 = stateStorage.entries(5, 9, -1)) { + for (int i = 0; entries5To8.hasNext(); i++) { + LogEntry entry = entries5To8.next(); + assertEquals(entry.getTerm(), 2); + assertEquals(entry.getIndex(), i + 5); + } } } @@ -564,8 +564,9 @@ public void testApplyDifferentSnapshot() { @Test public void testFetchEntries() { IRaftStateStore stateStorage = setupStateStorage(); - Iterator entries = stateStorage.entries(stateStorage.firstIndex(), stateStorage.lastIndex(), -1); - assertFalse(entries.hasNext()); + try (ILogEntryIterator entries = stateStorage.entries(stateStorage.firstIndex(), stateStorage.lastIndex(), -1)) { + assertFalse(entries.hasNext()); + } int count = 10; while (count-- > 0) { LogEntry entry = LogEntry.newBuilder() @@ -576,17 +577,20 @@ public void testFetchEntries() { stateStorage.append(singletonList(entry), true); } AtomicInteger counter = new AtomicInteger(0); - stateStorage.entries(stateStorage.firstIndex(), stateStorage.lastIndex() + 1, -1) - .forEachRemaining(e -> counter.incrementAndGet()); + try (ILogEntryIterator it = stateStorage.entries(stateStorage.firstIndex(), stateStorage.lastIndex() + 1, -1)) { + it.forEachRemaining(e -> counter.incrementAndGet()); + } assertEquals(counter.get(), 10); counter.set(0); - stateStorage.entries(stateStorage.firstIndex(), stateStorage.lastIndex() + 1, 2) - .forEachRemaining(e -> counter.incrementAndGet()); + try (ILogEntryIterator it = stateStorage.entries(stateStorage.firstIndex(), stateStorage.lastIndex() + 1, 2)) { + it.forEachRemaining(e -> counter.incrementAndGet()); + } assertEquals(counter.get(), 1); counter.set(0); - stateStorage.entries(stateStorage.firstIndex(), stateStorage.lastIndex() + 1, 10) - .forEachRemaining(e -> counter.incrementAndGet()); + try (ILogEntryIterator it = stateStorage.entries(stateStorage.firstIndex(), stateStorage.lastIndex() + 1, 10)) { + it.forEachRemaining(e -> counter.incrementAndGet()); + } assertEquals(counter.get(), 2); } diff --git a/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/ILogEntryIterator.java b/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/ILogEntryIterator.java new file mode 100644 index 000000000..aaacac666 --- /dev/null +++ b/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/ILogEntryIterator.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.raft; + +import java.util.Iterator; +import org.apache.bifromq.basekv.raft.proto.LogEntry; + +/** + * A closeable iterator for retrieved log entries. + */ +public interface ILogEntryIterator extends Iterator, AutoCloseable { + /** + * Caller must call this method after use. + */ + void close(); +} diff --git a/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/IRaftNode.java b/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/IRaftNode.java index 34e87954e..8e231e6d4 100644 --- a/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/IRaftNode.java +++ b/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/IRaftNode.java @@ -14,22 +14,20 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.raft; -import org.apache.bifromq.basekv.raft.event.RaftEvent; -import org.apache.bifromq.basekv.raft.proto.ClusterConfig; -import org.apache.bifromq.basekv.raft.proto.LogEntry; -import org.apache.bifromq.basekv.raft.proto.RaftMessage; -import org.apache.bifromq.basekv.raft.proto.RaftNodeStatus; import com.google.protobuf.ByteString; -import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.CompletableFuture; +import org.apache.bifromq.basekv.raft.event.RaftEvent; +import org.apache.bifromq.basekv.raft.proto.ClusterConfig; +import org.apache.bifromq.basekv.raft.proto.RaftMessage; +import org.apache.bifromq.basekv.raft.proto.RaftNodeStatus; /** * Raft Node interface, which is used to interact with raft node. @@ -155,9 +153,9 @@ public interface IRaftNode { * * @param fromIndex the start index of the log entry * @param maxSize the max size of the log entries - * @return the future of the log entries + * @return the future of the log entries iterator */ - CompletableFuture> retrieveCommitted(long fromIndex, long maxSize); + CompletableFuture retrieveCommitted(long fromIndex, long maxSize); /** * Start the Raft Node with necessary callbacks, this callbacks will be executed in raft's thread. diff --git a/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/IRaftStateStore.java b/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/IRaftStateStore.java index 60b6697a9..e9054c182 100644 --- a/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/IRaftStateStore.java +++ b/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/IRaftStateStore.java @@ -14,18 +14,17 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.raft; +import java.util.List; +import java.util.Optional; import org.apache.bifromq.basekv.raft.proto.ClusterConfig; import org.apache.bifromq.basekv.raft.proto.LogEntry; import org.apache.bifromq.basekv.raft.proto.Snapshot; import org.apache.bifromq.basekv.raft.proto.Voting; -import java.util.Iterator; -import java.util.List; -import java.util.Optional; /** * Local Storage interface used by RAFT state machine to work with its local persistent state. The implementation MUST @@ -140,14 +139,14 @@ public interface IRaftStateStore { /** * Get an iterator for retrieving log entries between lo and hi(exclusively) and aggregated size no greater than - * maxSize. + * maxSize. The iterator must be closed explicitly. * * @param lo the start index of the log entry * @param hi the end index of the log entry * @param maxSize the max size of the log entries - * @return the iterator of the log entries + * @return the log iterator of the log entries */ - Iterator entries(long lo, long hi, long maxSize); + ILogEntryIterator entries(long lo, long hi, long maxSize); /** * Append log entries after specified index, if flush is true, registered StableListener must be called immediately diff --git a/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/InMemoryStateStore.java b/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/InMemoryStateStore.java index 61e6d27f2..8a2786076 100644 --- a/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/InMemoryStateStore.java +++ b/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/InMemoryStateStore.java @@ -19,10 +19,6 @@ package org.apache.bifromq.basekv.raft; -import org.apache.bifromq.basekv.raft.proto.ClusterConfig; -import org.apache.bifromq.basekv.raft.proto.LogEntry; -import org.apache.bifromq.basekv.raft.proto.Snapshot; -import org.apache.bifromq.basekv.raft.proto.Voting; import com.google.protobuf.ByteString; import java.util.ArrayList; import java.util.Iterator; @@ -35,6 +31,10 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; +import org.apache.bifromq.basekv.raft.proto.ClusterConfig; +import org.apache.bifromq.basekv.raft.proto.LogEntry; +import org.apache.bifromq.basekv.raft.proto.Snapshot; +import org.apache.bifromq.basekv.raft.proto.Voting; /** * A simple in-memory state store for testing purpose. @@ -176,7 +176,7 @@ public Optional entryAt(long index) { } @Override - public Iterator entries(long lo, long hi, long maxSize) { + public ILogEntryIterator entries(long lo, long hi, long maxSize) { if (lo < firstIndex()) { throw new IndexOutOfBoundsException("lo must not be less than firstIndex"); } @@ -204,7 +204,24 @@ public Iterator entries(long lo, long hi, long maxSize) { } lo++; } - return ret.iterator(); + return new ILogEntryIterator() { + private final Iterator delegate = ret.iterator(); + + @Override + public void close() { + + } + + @Override + public boolean hasNext() { + return delegate.hasNext(); + } + + @Override + public LogEntry next() { + return delegate.next(); + } + }; } @Override diff --git a/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/MetricMonitoredStateStore.java b/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/MetricMonitoredStateStore.java index 475b034b8..7bbc1a29a 100644 --- a/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/MetricMonitoredStateStore.java +++ b/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/MetricMonitoredStateStore.java @@ -14,21 +14,20 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.raft; -import org.apache.bifromq.basekv.raft.proto.ClusterConfig; -import org.apache.bifromq.basekv.raft.proto.LogEntry; -import org.apache.bifromq.basekv.raft.proto.Snapshot; -import org.apache.bifromq.basekv.raft.proto.Voting; import io.micrometer.core.instrument.Metrics; import io.micrometer.core.instrument.Tags; import io.micrometer.core.instrument.Timer; -import java.util.Iterator; import java.util.List; import java.util.Optional; +import org.apache.bifromq.basekv.raft.proto.ClusterConfig; +import org.apache.bifromq.basekv.raft.proto.LogEntry; +import org.apache.bifromq.basekv.raft.proto.Snapshot; +import org.apache.bifromq.basekv.raft.proto.Voting; class MetricMonitoredStateStore implements IRaftStateStore { private final IRaftStateStore delegate; @@ -95,7 +94,7 @@ public Optional entryAt(long index) { } @Override - public Iterator entries(long lo, long hi, long maxSize) { + public ILogEntryIterator entries(long lo, long hi, long maxSize) { return metricManager.entriesTimer.record(() -> delegate.entries(lo, hi, maxSize)); } diff --git a/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/RaftConfigChanger.java b/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/RaftConfigChanger.java index 92e64f859..3a10a4119 100644 --- a/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/RaftConfigChanger.java +++ b/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/RaftConfigChanger.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.raft; @@ -25,14 +25,14 @@ import static org.apache.bifromq.basekv.raft.RaftConfigChanger.State.TargetConfigCommitting; import static org.apache.bifromq.basekv.raft.RaftConfigChanger.State.Waiting; -import org.apache.bifromq.basekv.raft.exception.ClusterConfigChangeException; -import org.apache.bifromq.basekv.raft.proto.ClusterConfig; -import org.apache.bifromq.basekv.raft.proto.LogEntry; -import org.apache.bifromq.basekv.raft.proto.RaftNodeSyncState; import java.util.Collections; import java.util.HashSet; import java.util.Set; import java.util.concurrent.CompletableFuture; +import org.apache.bifromq.basekv.raft.exception.ClusterConfigChangeException; +import org.apache.bifromq.basekv.raft.proto.ClusterConfig; +import org.apache.bifromq.basekv.raft.proto.LogEntry; +import org.apache.bifromq.basekv.raft.proto.RaftNodeSyncState; import org.slf4j.Logger; /** @@ -51,7 +51,7 @@ * IMPLEMENTATION NOTES: * * "catching up" is formulated as (lastIndex - nextIndex) / nextIndexIncreasingRate <= electionTimeout * * only one cluster config process could run at the same time, so leader should check if the pending is null - * and if the latest cluster config is not in Joint-Consensus Mode and it is committed. + * and if the latest cluster config is not in Joint-Consensus Mode, and it is committed. * * if any new server is not catching up in catchingUpTimeoutTick, the process will be aborted by reporting * slow learner exception. * * if the process aborted in #1, remove no used peer replicator tracked previously. @@ -75,6 +75,7 @@ class RaftConfigChanger { private long catchingUpElapsedTick = 0; private long jointConfigIndex = 0; private long targetConfigIndex = 0; + private ClusterConfig fallbackConfig; private ClusterConfig jointConfig; private ClusterConfig targetConfig; @@ -114,6 +115,7 @@ public void submit(String correlateId, allVoters.addAll(nextVoters); Set allLearners = new HashSet<>(latestConfig.getNextLearnersList()); allLearners.addAll(nextLearners); + fallbackConfig = latestConfig.toBuilder().setCorrelateId(correlateId).build(); jointConfig = ClusterConfig.newBuilder() .setCorrelateId(correlateId) .addAllVoters(latestConfig.getVotersList()) @@ -154,7 +156,8 @@ public boolean tick(long currentTerm) { // accumulated if (catchingUpElapsedTick >= config.getInstallSnapshotTimeoutTick() + 10L * config.getElectionTimeoutTick()) { - logger.debug("Catching up timeout, give up changing config"); + logger.debug("Catching up timeout, revert to previous config: correlateId={}", + fallbackConfig.getCorrelateId()); // report exception, unregister replicators and transit to Waiting state Set peersToStopTracking = new HashSet<>(jointConfig.getNextVotersList()); @@ -163,7 +166,17 @@ public boolean tick(long currentTerm) { peersToStopTracking.removeIf(jointConfig.getLearnersList()::contains); peerLogTracker.stopTracking(peersToStopTracking); - state = Waiting; + targetConfigIndex = stateStorage.lastIndex() + 1; + LogEntry fallbackConfigEntry = LogEntry.newBuilder() + .setTerm(currentTerm) + .setIndex(targetConfigIndex) + .setConfig(fallbackConfig) + .build(); + // flush the log entry immediately + stateStorage.append(Collections.singletonList(fallbackConfigEntry), true); + // update self progress + peerLogTracker.replicateBy(stateStorage.local(), stateStorage.lastIndex()); + state = State.FallbackConfigCommitting; onDone.completeExceptionally(ClusterConfigChangeException.slowLearner()); return true; } else { @@ -210,7 +223,7 @@ public boolean tick(long currentTerm) { /** * Leader must call this method to report current commitIndex and currentTerm, The return bool indicates if there is - * a state change, Leader must examine the status afterwards and take corresponding actions. + * a state change, Leader must examine the status afterward and take corresponding actions. * * @param commitIndex committed index * @param currentTerm current term @@ -218,8 +231,8 @@ public boolean tick(long currentTerm) { */ public boolean commitTo(long commitIndex, long currentTerm) { assert state != Abort; - switch (state) { - case JointConfigCommitting: + return switch (state) { + case JointConfigCommitting -> { if (commitIndex >= jointConfigIndex) { targetConfigIndex = stateStorage.lastIndex() + 1; assert commitIndex < targetConfigIndex; @@ -236,21 +249,28 @@ public boolean commitTo(long commitIndex, long currentTerm) { state = TargetConfigCommitting; logger.debug("Joint config committed, append target config as log entry[index={}]", targetConfigIndex); - return true; + yield true; } - return false; - case TargetConfigCommitting: + yield false; + } + case TargetConfigCommitting -> { if (commitIndex >= targetConfigIndex) { state = Waiting; logger.debug("Target config committed at index[{}]", targetConfigIndex); - return true; + yield true; } - return false; - case Waiting: - case CatchingUp: - default: - return false; - } + yield false; + } + case FallbackConfigCommitting -> { + if (commitIndex >= targetConfigIndex) { + state = Waiting; + logger.debug("Fallback config committed at index[{}]", targetConfigIndex); + yield true; + } + yield false; + } + default -> false; + }; } /** @@ -274,7 +294,7 @@ protected Set remotePeers() { all.addAll(clusterConfig.getLearnersList()); all.remove(stateStorage.local()); } - case CatchingUp, JointConfigCommitting, TargetConfigCommitting -> { + case CatchingUp, JointConfigCommitting, TargetConfigCommitting, FallbackConfigCommitting -> { all.addAll(jointConfig.getVotersList()); all.addAll(jointConfig.getLearnersList()); all.addAll(jointConfig.getNextVotersList()); @@ -299,7 +319,7 @@ public void abort(ClusterConfigChangeException e) { assert state != Abort; switch (state) { case Waiting -> state = Abort; - case CatchingUp, TargetConfigCommitting, JointConfigCommitting -> { + case CatchingUp, TargetConfigCommitting, JointConfigCommitting, FallbackConfigCommitting -> { logger.debug("Abort on-going cluster config change"); state = Abort; onDone.completeExceptionally(e); @@ -370,6 +390,7 @@ enum State { Waiting, // changer could accept submission of new config change CatchingUp, // catching up new voters in new submitted config JointConfigCommitting, // joint config is appended as log entry and waiting to be committed - TargetConfigCommitting // target config is appended as log entry and waiting to be committed + TargetConfigCommitting, // target config is appended as log entry and waiting to be committed + FallbackConfigCommitting // fallback config is appended as log entry and waiting to be committed } } diff --git a/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/RaftNode.java b/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/RaftNode.java index ba619c67c..39d0e53b8 100644 --- a/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/RaftNode.java +++ b/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/RaftNode.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.raft; @@ -27,7 +27,6 @@ import io.micrometer.core.instrument.binder.jvm.ExecutorServiceMetrics; import java.util.EnumMap; import java.util.HashMap; -import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; @@ -45,7 +44,6 @@ import org.apache.bifromq.basekv.raft.event.RaftEventType; import org.apache.bifromq.basekv.raft.exception.InternalError; import org.apache.bifromq.basekv.raft.proto.ClusterConfig; -import org.apache.bifromq.basekv.raft.proto.LogEntry; import org.apache.bifromq.basekv.raft.proto.RaftMessage; import org.apache.bifromq.basekv.raft.proto.RaftNodeStatus; import org.apache.bifromq.logger.MDCLogger; @@ -213,7 +211,7 @@ public CompletableFuture changeClusterConfig(String correlateId, } @Override - public CompletableFuture> retrieveCommitted(long fromIndex, long maxSize) { + public CompletableFuture retrieveCommitted(long fromIndex, long maxSize) { return submit(onDone -> stateRef.get().retrieveCommitted(fromIndex, maxSize, sampleLatency(onDone, metricMgr.retrieveEntriesTimer))); } diff --git a/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/RaftNodeState.java b/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/RaftNodeState.java index 5672b8b74..c6220f486 100644 --- a/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/RaftNodeState.java +++ b/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/RaftNodeState.java @@ -163,7 +163,7 @@ final ByteString latestSnapshot() { } final void retrieveCommitted(long fromIndex, long maxSize, - CompletableFuture> onDone) { + CompletableFuture onDone) { if (fromIndex < stateStorage.firstIndex() || fromIndex > stateStorage.lastIndex()) { onDone.completeExceptionally(new IndexOutOfBoundsException("Index out of range")); } else { diff --git a/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/RaftNodeStateLeader.java b/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/RaftNodeStateLeader.java index f84ca6548..24381d1a7 100644 --- a/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/RaftNodeStateLeader.java +++ b/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/RaftNodeStateLeader.java @@ -20,6 +20,7 @@ package org.apache.bifromq.basekv.raft; import static org.apache.bifromq.base.util.CompletableFutureUtil.unwrap; +import static org.apache.bifromq.basekv.raft.RaftConfigChanger.State.FallbackConfigCommitting; import static org.apache.bifromq.basekv.raft.RaftConfigChanger.State.JointConfigCommitting; import static org.apache.bifromq.basekv.raft.RaftConfigChanger.State.TargetConfigCommitting; @@ -28,7 +29,6 @@ import java.util.Collections; import java.util.HashMap; import java.util.HashSet; -import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -156,10 +156,11 @@ RaftNodeState tick() { peerLogTracker.tick(); if (configChanger.tick(currentTerm())) { // there is a state change after tick - if (configChanger.state() == JointConfigCommitting || configChanger.state() == TargetConfigCommitting) { - log.debug("{} cluster config is activated in current term", - configChanger.state() == JointConfigCommitting ? "Joint" : "Target"); + if (configChanger.state() == JointConfigCommitting + || configChanger.state() == TargetConfigCommitting + || configChanger.state() == FallbackConfigCommitting) { ClusterConfig clusterConfig = stateStorage.latestClusterConfig(); + log.debug("Activate config in current term: {}", clusterConfig); activityTracker.refresh(clusterConfig); electionElapsedTick = 0; // to prevent leader from quorum check failed prematurely if (leaderTransferTask != null) { @@ -557,33 +558,33 @@ private List prepareAppendEntriesForPeer(String peer, boolean force long preLogTerm = stateStorage.entryAt(preLogIndex) .map(LogEntry::getTerm).orElseGet(() -> stateStorage.latestSnapshot().getTerm()); if (!peerLogTracker.pauseReplicating(peer) && nextIndex <= stateStorage.lastIndex()) { - Iterator entries = stateStorage.entries(nextIndex, - stateStorage.lastIndex() + 1, config.getMaxSizePerAppend()); - AppendEntries.Builder builder = AppendEntries - .newBuilder() - .setLeaderId(stateStorage.local()) - .setPrevLogIndex(preLogIndex) - .setPrevLogTerm(preLogTerm) - .setCommitIndex(commitIndex) // tell follower the latest commit index - .setReadIndex(readIndex); - entries.forEachRemaining(builder::addEntries); - AppendEntries appendEntries = builder.build(); - messages.add(RaftMessage.newBuilder() - .setTerm(currentTerm()) - .setAppendEntries(appendEntries) - .build()); - - assert appendEntries.getEntriesCount() != 0; - log.trace("Prepare {} entries after " - + "entry[index:{},term:{}] for peer[{}] with readIndex[{}] when {}", - appendEntries.getEntriesCount(), - preLogIndex, - preLogTerm, - peer, - readIndex, - peerLogTracker.status(peer)); - peerLogTracker.replicateBy(peer, - appendEntries.getEntries(appendEntries.getEntriesCount() - 1).getIndex()); + try (ILogEntryIterator entries = stateStorage.entries(nextIndex, + stateStorage.lastIndex() + 1, config.getMaxSizePerAppend())) { + AppendEntries.Builder builder = AppendEntries + .newBuilder() + .setLeaderId(stateStorage.local()) + .setPrevLogIndex(preLogIndex) + .setPrevLogTerm(preLogTerm) + .setCommitIndex(commitIndex) // tell follower the latest commit index + .setReadIndex(readIndex); + entries.forEachRemaining(builder::addEntries); + AppendEntries appendEntries = builder.build(); + messages.add(RaftMessage.newBuilder() + .setTerm(currentTerm()) + .setAppendEntries(appendEntries) + .build()); + assert appendEntries.getEntriesCount() != 0; + log.trace("Prepare {} entries after " + + "entry[index:{},term:{}] for peer[{}] with readIndex[{}] when {}", + appendEntries.getEntriesCount(), + preLogIndex, + preLogTerm, + peer, + readIndex, + peerLogTracker.status(peer)); + peerLogTracker.replicateBy(peer, + appendEntries.getEntries(appendEntries.getEntriesCount() - 1).getIndex()); + } break; } if (forceHeartbeat || peerLogTracker.needHeartbeat(peer)) { @@ -720,6 +721,9 @@ private RaftNodeState commit() { leaderTransferTask = null; } } + case FallbackConfigCommitting -> { + // do nothing when fallback config is committed + } default -> { // do nothing } diff --git a/base-kv/base-kv-raft/src/test/java/org/apache/bifromq/basekv/raft/RaftConfigChangerTest.java b/base-kv/base-kv-raft/src/test/java/org/apache/bifromq/basekv/raft/RaftConfigChangerTest.java index 43a538255..22e33cc22 100644 --- a/base-kv/base-kv-raft/src/test/java/org/apache/bifromq/basekv/raft/RaftConfigChangerTest.java +++ b/base-kv/base-kv-raft/src/test/java/org/apache/bifromq/basekv/raft/RaftConfigChangerTest.java @@ -14,26 +14,28 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.raft; import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.ArgumentMatchers.argThat; +import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertTrue; -import org.apache.bifromq.basekv.raft.exception.ClusterConfigChangeException; -import org.apache.bifromq.basekv.raft.proto.ClusterConfig; -import org.apache.bifromq.basekv.raft.proto.LogEntry; -import org.apache.bifromq.basekv.raft.proto.RaftNodeSyncState; import java.util.Arrays; import java.util.HashSet; import java.util.List; import java.util.concurrent.CompletableFuture; +import org.apache.bifromq.basekv.raft.exception.ClusterConfigChangeException; +import org.apache.bifromq.basekv.raft.proto.ClusterConfig; +import org.apache.bifromq.basekv.raft.proto.LogEntry; +import org.apache.bifromq.basekv.raft.proto.RaftNodeSyncState; import org.mockito.ArgumentCaptor; import org.mockito.Mock; import org.mockito.MockitoAnnotations; @@ -57,6 +59,7 @@ public class RaftConfigChangerTest { IPeerLogReplicator logReplicator; private AutoCloseable closeable; + @BeforeMethod public void setup() { closeable = MockitoAnnotations.openMocks(this); @@ -149,15 +152,16 @@ public void testSubmitExceptionally() { @Test public void testCatchupTimeout() { + ClusterConfig latestConfig = ClusterConfig.newBuilder() + .addVoters("V1") + .addVoters("V2") + .addVoters("V3") + .addLearners("L1") + .build(); when(stateStorage.latestClusterConfig()) - .thenReturn(ClusterConfig.newBuilder() - .addVoters("V1") - .addVoters("V2") - .addVoters("V3") - .addLearners("L1") - .build()); + .thenReturn(latestConfig); CompletableFuture onDone = new CompletableFuture<>(); - configChanger.submit("cId", new HashSet<>() {{ + configChanger.submit("newId", new HashSet<>() {{ add("V1"); add("N2"); add("N3"); @@ -189,10 +193,22 @@ public void testCatchupTimeout() { add("L2"); }}); - Assert.assertEquals(configChanger.state(), RaftConfigChanger.State.Waiting); + Assert.assertEquals(configChanger.state(), RaftConfigChanger.State.FallbackConfigCommitting); assertTrue(configChanger.remotePeers().containsAll(Arrays.asList("V1", "V2", "V3", "L1"))); assertTrue(onDone.isDone() && onDone.isCompletedExceptionally()); + + verify(stateStorage).append(argThat(logEntries -> { + if (logEntries.size() != 1) { + return false; + } + LogEntry logEntry = logEntries.get(0); + if (!logEntry.hasConfig()) { + return false; + } + ClusterConfig targetConfig = logEntry.getConfig(); + return targetConfig.equals(latestConfig.toBuilder().setCorrelateId("newId").build()); + }), eq(true)); } @Test diff --git a/base-kv/base-kv-raft/src/test/java/org/apache/bifromq/basekv/raft/functest/ChangeClusterConfigTest.java b/base-kv/base-kv-raft/src/test/java/org/apache/bifromq/basekv/raft/functest/ChangeClusterConfigTest.java index 7ac3efeb3..a874fe58f 100644 --- a/base-kv/base-kv-raft/src/test/java/org/apache/bifromq/basekv/raft/functest/ChangeClusterConfigTest.java +++ b/base-kv/base-kv-raft/src/test/java/org/apache/bifromq/basekv/raft/functest/ChangeClusterConfigTest.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.raft.functest; @@ -26,14 +26,6 @@ import static org.testng.Assert.assertSame; import static org.testng.Assert.assertTrue; -import org.apache.bifromq.basekv.raft.exception.ClusterConfigChangeException; -import org.apache.bifromq.basekv.raft.functest.annotation.Cluster; -import org.apache.bifromq.basekv.raft.functest.annotation.Config; -import org.apache.bifromq.basekv.raft.functest.annotation.Ticker; -import org.apache.bifromq.basekv.raft.functest.template.SharedRaftConfigTestTemplate; -import org.apache.bifromq.basekv.raft.proto.ClusterConfig; -import org.apache.bifromq.basekv.raft.proto.RaftNodeStatus; -import org.apache.bifromq.basekv.raft.proto.RaftNodeSyncState; import com.google.protobuf.ByteString; import java.util.Arrays; import java.util.Collections; @@ -43,6 +35,14 @@ import java.util.concurrent.CompletableFuture; import java.util.concurrent.TimeUnit; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.basekv.raft.exception.ClusterConfigChangeException; +import org.apache.bifromq.basekv.raft.functest.annotation.Cluster; +import org.apache.bifromq.basekv.raft.functest.annotation.Config; +import org.apache.bifromq.basekv.raft.functest.annotation.Ticker; +import org.apache.bifromq.basekv.raft.functest.template.SharedRaftConfigTestTemplate; +import org.apache.bifromq.basekv.raft.proto.ClusterConfig; +import org.apache.bifromq.basekv.raft.proto.RaftNodeStatus; +import org.apache.bifromq.basekv.raft.proto.RaftNodeSyncState; import org.testng.annotations.Test; @Slf4j @@ -588,4 +588,143 @@ public void testAddVotersInitWithEmptyConfig() { assertTrue(group.awaitIndexCommitted("V2", 3)); assertEquals(group.latestClusterConfig("V2").getVotersCount(), 2); } + + @Test(groups = "integration") + public void testNoOpConfigChangeDirectCommit() { + String leader = group.currentLeader().get(); + ClusterConfig before = group.latestClusterConfig(leader); + + Set newVoters = new HashSet<>(before.getVotersList()); + Set newLearners = new HashSet<>(before.getLearnersList()); + + CompletableFuture done = group.changeClusterConfig(leader, "cNoop", newVoters, newLearners); + done.join(); + assertTrue(done.isDone() && !done.isCompletedExceptionally()); + + ClusterConfig after = group.latestClusterConfig(leader); + assertEquals(new HashSet<>(after.getVotersList()), newVoters); + assertEquals(new HashSet<>(after.getLearnersList()), newLearners); + assertTrue(after.getNextVotersList().isEmpty()); + assertTrue(after.getNextLearnersList().isEmpty()); + assertEquals(after.getCorrelateId(), "cNoop"); + } + + @Test(groups = "integration") + public void testLearnerDoesNotBlockChange() { + String leader = group.currentLeader().get(); + + // add a new learner and isolate it + group.addRaftNode("Lx", 0, 0, ClusterConfig.newBuilder().addLearners("Lx").build(), raftConfig()); + group.connect("Lx"); + group.isolate("Lx"); + + Set newVoters = new HashSet<>(clusterConfig().getVotersList()); + Set newLearners = new HashSet<>() {{ + add("Lx"); + }}; + + CompletableFuture done = group.changeClusterConfig(leader, newVoters, newLearners); + done.join(); + + assertTrue(done.isDone() && !done.isCompletedExceptionally()); + // learners may lag, but voters should have committed the change + for (String v : newVoters) { + assertTrue(group.latestClusterConfig(v).getLearnersList().contains("Lx")); + } + } + + @Test(groups = "integration") + public void testConcurrentChangeRejected() { + String leader = group.currentLeader().get(); + + // prepare a new voter + group.addRaftNode("V4", 0, 0, ClusterConfig.newBuilder().addVoters("V4").build(), raftConfig()); + group.connect("V4"); + + Set newVoters = new HashSet<>(clusterConfig().getVotersList()) {{ + add("V4"); + }}; + + CompletableFuture first = group.changeClusterConfig(leader, "c1", newVoters, Collections.emptySet()); + CompletableFuture second = group.changeClusterConfig(leader, "c2", newVoters, Collections.emptySet()); + + boolean firstFailed; + try { + first.join(); + firstFailed = false; + } catch (Throwable e) { + assertSame(e.getCause().getClass(), ClusterConfigChangeException.ConcurrentChangeException.class); + firstFailed = true; + } + try { + second.join(); + if (firstFailed) { + // if first failed, second should succeed + assertTrue(second.isDone() && !second.isCompletedExceptionally()); + } + } catch (Throwable e) { + // if first succeeded, second must be rejected as concurrent change + assertSame(e.getCause().getClass(), ClusterConfigChangeException.ConcurrentChangeException.class); + } + } + + @Test(groups = "integration") + public void testParamValidation() { + String leader = group.currentLeader().get(); + + // empty voters + try { + group.changeClusterConfig(leader, "cEmpty", Collections.emptySet(), Collections.emptySet()).join(); + } catch (Throwable e) { + assertSame(e.getCause().getClass(), ClusterConfigChangeException.EmptyVotersException.class); + } + + // overlap between voters and learners + Set voters = new HashSet<>(Collections.singleton("V1")); + Set learners = new HashSet<>(Collections.singleton("V1")); + try { + group.changeClusterConfig(leader, "cOverlap", voters, learners).join(); + } catch (Throwable e) { + assertSame(e.getCause().getClass(), ClusterConfigChangeException.LearnersOverlapException.class); + } + } + + @Config(electionTimeoutTick = 2, installSnapshotTimeoutTick = 2) + @Test(groups = "integration") + public void testCatchingUpTimeoutFallback() { + String leader = group.currentLeader().get(); + ClusterConfig before = group.latestClusterConfig(leader); + + // prepare a new voter that cannot catch up + group.addRaftNode("Vx", 0, 0, ClusterConfig.newBuilder().addVoters("Vx").build(), raftConfig()); + group.connect("Vx"); + group.isolate("Vx"); + + // Choose a next voters set of size 2 to make majority catch-up require both peers; + // isolate Vx so peersCatchUp() stays false until timeout, triggering fallback. + Set newVoters = new HashSet<>() {{ + add(leader); + add("Vx"); + }}; + CompletableFuture done = group.changeClusterConfig(leader, "cSlow", newVoters, Collections.emptySet()); + + try { + done.join(); + } catch (Throwable e) { + assertTrue(e.getCause() instanceof ClusterConfigChangeException.SlowLearnerException); + } + + ClusterConfig after = group.latestClusterConfig(leader); + // should rollback to previous voters/learners, with correlateId set to the submitted one + assertEquals(new HashSet<>(after.getVotersList()), new HashSet<>(before.getVotersList())); + assertEquals(new HashSet<>(after.getLearnersList()), new HashSet<>(before.getLearnersList())); + assertTrue(after.getNextVotersList().isEmpty()); + assertTrue(after.getNextLearnersList().isEmpty()); + assertEquals(after.getCorrelateId(), "cSlow"); + + // newly added voter is no longer tracked + List logs = group.syncStateLogs("Vx"); + assertFalse(logs.isEmpty()); + assertSame(logs.get(logs.size() - 1), null); + } } diff --git a/base-kv/base-kv-raft/src/test/java/org/apache/bifromq/basekv/raft/functest/RaftNodeGroup.java b/base-kv/base-kv-raft/src/test/java/org/apache/bifromq/basekv/raft/functest/RaftNodeGroup.java index 728de1e13..069d789fc 100644 --- a/base-kv/base-kv-raft/src/test/java/org/apache/bifromq/basekv/raft/functest/RaftNodeGroup.java +++ b/base-kv/base-kv-raft/src/test/java/org/apache/bifromq/basekv/raft/functest/RaftNodeGroup.java @@ -19,27 +19,11 @@ package org.apache.bifromq.basekv.raft.functest; -import org.apache.bifromq.basekv.raft.IRaftNode; -import org.apache.bifromq.basekv.raft.IRaftStateStore; -import org.apache.bifromq.basekv.raft.InMemoryStateStore; -import org.apache.bifromq.basekv.raft.RaftConfig; -import org.apache.bifromq.basekv.raft.RaftNode; -import org.apache.bifromq.basekv.raft.event.CommitEvent; -import org.apache.bifromq.basekv.raft.event.ElectionEvent; -import org.apache.bifromq.basekv.raft.event.SnapshotRestoredEvent; -import org.apache.bifromq.basekv.raft.event.SyncStateChangedEvent; -import org.apache.bifromq.basekv.raft.proto.ClusterConfig; -import org.apache.bifromq.basekv.raft.proto.LogEntry; -import org.apache.bifromq.basekv.raft.proto.RaftMessage; -import org.apache.bifromq.basekv.raft.proto.RaftNodeStatus; -import org.apache.bifromq.basekv.raft.proto.RaftNodeSyncState; -import org.apache.bifromq.basekv.raft.proto.Snapshot; import com.google.protobuf.ByteString; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashSet; -import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Optional; @@ -54,6 +38,22 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.stream.Collectors; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.basekv.raft.ILogEntryIterator; +import org.apache.bifromq.basekv.raft.IRaftNode; +import org.apache.bifromq.basekv.raft.IRaftStateStore; +import org.apache.bifromq.basekv.raft.InMemoryStateStore; +import org.apache.bifromq.basekv.raft.RaftConfig; +import org.apache.bifromq.basekv.raft.RaftNode; +import org.apache.bifromq.basekv.raft.event.CommitEvent; +import org.apache.bifromq.basekv.raft.event.ElectionEvent; +import org.apache.bifromq.basekv.raft.event.SnapshotRestoredEvent; +import org.apache.bifromq.basekv.raft.event.SyncStateChangedEvent; +import org.apache.bifromq.basekv.raft.proto.ClusterConfig; +import org.apache.bifromq.basekv.raft.proto.LogEntry; +import org.apache.bifromq.basekv.raft.proto.RaftMessage; +import org.apache.bifromq.basekv.raft.proto.RaftNodeStatus; +import org.apache.bifromq.basekv.raft.proto.RaftNodeSyncState; +import org.apache.bifromq.basekv.raft.proto.Snapshot; import org.awaitility.Awaitility; @Slf4j @@ -211,9 +211,11 @@ public List logEntries(String id, long fromIndex) { assert commitLogs.containsKey(id); RaftNode node = nodes.get(id); try { - List ret = new ArrayList(); - node.retrieveCommitted(fromIndex, Long.MAX_VALUE).get().forEachRemaining(ret::add); - return ret; + List ret = new ArrayList<>(); + try (ILogEntryIterator it = node.retrieveCommitted(fromIndex, Long.MAX_VALUE).get()) { + it.forEachRemaining(ret::add); + return ret; + } } catch (Exception e) { throw new RuntimeException(e); } @@ -403,17 +405,20 @@ public CompletableFuture changeClusterConfig(String id, String correlateId public List retrieveCommitted(String id, long fromIndex, long maxSize) { assert nodes.containsKey(id); List entries = new ArrayList<>(); - nodes.get(id).retrieveCommitted(fromIndex, maxSize).join().forEachRemaining(entries::add); + try (ILogEntryIterator it = nodes.get(id).retrieveCommitted(fromIndex, maxSize).join()) { + it.forEachRemaining(entries::add); + } return entries; } public Optional entryAt(String id, long index) { assert nodes.containsKey(id); - Iterator entries = nodes.get(id).retrieveCommitted(index, Long.MAX_VALUE).join(); - if (entries.hasNext()) { - return Optional.of(entries.next()); + try (ILogEntryIterator it = nodes.get(id).retrieveCommitted(index, Long.MAX_VALUE).join()) { + if (it.hasNext()) { + return Optional.of(it.next()); + } + return Optional.empty(); } - return Optional.empty(); } public long commitIndex(String id) { diff --git a/base-kv/base-kv-split-hinter-spi/pom.xml b/base-kv/base-kv-split-hinter-spi/pom.xml new file mode 100644 index 000000000..cb0127bed --- /dev/null +++ b/base-kv/base-kv-split-hinter-spi/pom.xml @@ -0,0 +1,58 @@ + + + + + 4.0.0 + + org.apache.bifromq + base-kv + ${revision} + + + base-kv-split-hinter-spi + jar + + + + org.apache.bifromq + base-kv-store-coproc-api + + + org.apache.bifromq + base-kv-type-proto + + + org.apache.bifromq + base-kv-store-rpc-definition + + + com.google.protobuf + protobuf-java + + + org.projectlombok + lombok + provided + + + + diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/api/IKVLoadRecord.java b/base-kv/base-kv-split-hinter-spi/src/main/java/org/apache/bifromq/basekv/store/range/hinter/IKVLoadRecord.java similarity index 73% rename from base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/api/IKVLoadRecord.java rename to base-kv/base-kv-split-hinter-spi/src/main/java/org/apache/bifromq/basekv/store/range/hinter/IKVLoadRecord.java index f0b72cc64..12b446970 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/api/IKVLoadRecord.java +++ b/base-kv/base-kv-split-hinter-spi/src/main/java/org/apache/bifromq/basekv/store/range/hinter/IKVLoadRecord.java @@ -17,27 +17,40 @@ * under the License. */ -package org.apache.bifromq.basekv.store.api; +package org.apache.bifromq.basekv.store.range.hinter; import com.google.protobuf.ByteString; import java.util.Map; +/** + * The load recorded collected during a time window. + */ public interface IKVLoadRecord { + /** + * The start nanos of the record. + * + * @return the start time in nanos + */ long startNanos(); /** - * Get the kv io times + * Get the kv io times. * * @return the access times to kv engine */ int getKVIOs(); /** - * Get the total time spent on io of kv engine + * Get the total time spent on io of kv engine. * * @return the total time in nanos */ long getKVIONanos(); + /** + * Key distribution in the window. + * + * @return the map of key and access times. + */ Map keyDistribution(); -} +} \ No newline at end of file diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/api/IKVRangeSplitHinter.java b/base-kv/base-kv-split-hinter-spi/src/main/java/org/apache/bifromq/basekv/store/range/hinter/IKVRangeSplitHinter.java similarity index 91% rename from base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/api/IKVRangeSplitHinter.java rename to base-kv/base-kv-split-hinter-spi/src/main/java/org/apache/bifromq/basekv/store/range/hinter/IKVRangeSplitHinter.java index 0a76aae7a..75486b6b0 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/api/IKVRangeSplitHinter.java +++ b/base-kv/base-kv-split-hinter-spi/src/main/java/org/apache/bifromq/basekv/store/range/hinter/IKVRangeSplitHinter.java @@ -14,16 +14,19 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ -package org.apache.bifromq.basekv.store.api; +package org.apache.bifromq.basekv.store.range.hinter; import org.apache.bifromq.basekv.proto.Boundary; import org.apache.bifromq.basekv.proto.SplitHint; import org.apache.bifromq.basekv.store.proto.ROCoProcInput; import org.apache.bifromq.basekv.store.proto.RWCoProcInput; +/** + * The interface of Split hinter. + */ public interface IKVRangeSplitHinter { void recordQuery(ROCoProcInput input, IKVLoadRecord ioRecord); diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVRangeMetadataWriter.java b/base-kv/base-kv-split-hinter-spi/src/main/java/org/apache/bifromq/basekv/store/range/hinter/IKVRangeSplitHinterFactory.java similarity index 68% rename from base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVRangeMetadataWriter.java rename to base-kv/base-kv-split-hinter-spi/src/main/java/org/apache/bifromq/basekv/store/range/hinter/IKVRangeSplitHinterFactory.java index dda77326b..f07aeb9a5 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVRangeMetadataWriter.java +++ b/base-kv/base-kv-split-hinter-spi/src/main/java/org/apache/bifromq/basekv/store/range/hinter/IKVRangeSplitHinterFactory.java @@ -17,12 +17,17 @@ * under the License. */ -package org.apache.bifromq.basekv.store.range; +package org.apache.bifromq.basekv.store.range.hinter; -public interface IKVRangeMetadataWriter> extends IKVRangeMetadataUpdatable { - void done(); +import com.google.protobuf.Struct; - void abort(); - - int count(); +/** + * Factory SPI for creating {@link IKVRangeSplitHinter} by configuration. + */ +public interface IKVRangeSplitHinterFactory { + /** + * Create a hinter instance with given context and plugin configuration. + */ + IKVRangeSplitHinter create(SplitHinterContext ctx, Struct conf); } + diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVRangeCheckpointReader.java b/base-kv/base-kv-split-hinter-spi/src/main/java/org/apache/bifromq/basekv/store/range/hinter/SplitHinterContext.java similarity index 70% rename from base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVRangeCheckpointReader.java rename to base-kv/base-kv-split-hinter-spi/src/main/java/org/apache/bifromq/basekv/store/range/hinter/SplitHinterContext.java index a611fad77..a40cea106 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVRangeCheckpointReader.java +++ b/base-kv/base-kv-split-hinter-spi/src/main/java/org/apache/bifromq/basekv/store/range/hinter/SplitHinterContext.java @@ -17,11 +17,21 @@ * under the License. */ -package org.apache.bifromq.basekv.store.range; +package org.apache.bifromq.basekv.store.range.hinter; +import java.util.function.Supplier; +import lombok.Builder; +import lombok.Value; +import org.apache.bifromq.basekv.proto.KVRangeId; import org.apache.bifromq.basekv.store.api.IKVRangeReader; -public interface IKVRangeCheckpointReader extends IKVRangeReader { - @Override - IKVCheckpointReader newDataReader(); +@Value +@Builder +public class SplitHinterContext { + String clusterId; + String storeId; + KVRangeId id; + Supplier readerProvider; + String[] tags; } + diff --git a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/KVStoreBalanceController.java b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/KVStoreBalanceController.java index 5f73f98e5..c4b64b54d 100644 --- a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/KVStoreBalanceController.java +++ b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/KVStoreBalanceController.java @@ -47,6 +47,7 @@ import org.apache.bifromq.basekv.balance.command.BootstrapCommand; import org.apache.bifromq.basekv.balance.command.ChangeConfigCommand; import org.apache.bifromq.basekv.balance.command.MergeCommand; +import org.apache.bifromq.basekv.balance.command.QuitCommand; import org.apache.bifromq.basekv.balance.command.RangeCommand; import org.apache.bifromq.basekv.balance.command.RecoveryCommand; import org.apache.bifromq.basekv.balance.command.SplitCommand; @@ -73,6 +74,8 @@ import org.apache.bifromq.basekv.store.proto.ReplyCode; import org.apache.bifromq.basekv.store.proto.TransferLeadershipReply; import org.apache.bifromq.basekv.store.proto.TransferLeadershipRequest; +import org.apache.bifromq.basekv.store.proto.ZombieQuitRequest; +import org.apache.bifromq.basekv.utils.KVRangeIdUtil; import org.apache.bifromq.logger.MDCLogger; import org.slf4j.Logger; @@ -416,6 +419,7 @@ yield handleStoreReplyCode(command, .setVer(mergeCommand.getExpectedVer()) .setMergerId(mergeCommand.getKvRangeId()) .setMergeeId(mergeCommand.getMergeeId()) + .addAllMergeeVoters(mergeCommand.getVoters()) .build(); yield handleStoreReplyCode(command, storeClient.mergeRanges(command.getToStore(), rangeMergeRequest) @@ -462,6 +466,23 @@ yield handleStoreReplyCode(command, return true; }); } + case QUIT -> { + assert command instanceof QuitCommand; + QuitCommand quitCommand = (QuitCommand) command; + ZombieQuitRequest zombieQuitRequest = ZombieQuitRequest.newBuilder() + .setReqId(System.nanoTime()) + .setKvRangeId(quitCommand.getKvRangeId()) + .build(); + yield storeClient.zombieQuit(command.getToStore(), zombieQuitRequest) + .handle((r, e) -> { + if (e != null) { + log.error("Unexpected error when recover, req: {}", zombieQuitRequest, e); + } + log.debug("Range[{}] in zombie state and quit?: {}", + KVRangeIdUtil.toString(quitCommand.getKvRangeId()), r.getQuit()); + return true; + }); + } case BOOTSTRAP -> { assert command instanceof BootstrapCommand; BootstrapCommand bootstrapCommand = (BootstrapCommand) command; diff --git a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/RedundantRangeRemovalBalancer.java b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/RedundantRangeRemovalBalancer.java index f6e44e363..9bc863fba 100644 --- a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/RedundantRangeRemovalBalancer.java +++ b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/RedundantRangeRemovalBalancer.java @@ -43,13 +43,16 @@ import org.apache.bifromq.basekv.balance.NoNeedBalance; import org.apache.bifromq.basekv.balance.StoreBalancer; import org.apache.bifromq.basekv.balance.command.BalanceCommand; +import org.apache.bifromq.basekv.balance.command.QuitCommand; import org.apache.bifromq.basekv.proto.Boundary; import org.apache.bifromq.basekv.proto.KVRangeDescriptor; import org.apache.bifromq.basekv.proto.KVRangeId; import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor; import org.apache.bifromq.basekv.raft.proto.ClusterConfig; import org.apache.bifromq.basekv.raft.proto.RaftNodeStatus; +import org.apache.bifromq.basekv.utils.BoundaryUtil; import org.apache.bifromq.basekv.utils.EffectiveEpoch; +import org.apache.bifromq.basekv.utils.EffectiveRoute; import org.apache.bifromq.basekv.utils.KVRangeIdUtil; import org.apache.bifromq.basekv.utils.RangeLeader; @@ -102,6 +105,10 @@ public void update(Set landscape) { return; } scheduled = cleanupBoundaryConflictRange(effectiveEpoch); + if (scheduled) { + return; + } + scheduled = cleanupZombieRange(effectiveEpoch); if (!scheduled) { if (pendingQuitCommand.get() != null) { log.debug("No redundant range found, clear pending quit command"); @@ -196,6 +203,49 @@ private boolean cleanupBoundaryConflictRange(EffectiveEpoch effectiveEpoch) { return false; } + private boolean cleanupZombieRange(EffectiveEpoch effectiveEpoch) { + EffectiveRoute effectiveRoute = getEffectiveRoute(effectiveEpoch); + if (BoundaryUtil.isValidSplitSet(effectiveRoute.leaderRanges().navigableKeySet())) { + Map effectiveRangeMap = new HashMap<>(); + for (RangeLeader rangeLeader : effectiveRoute.leaderRanges().values()) { + effectiveRangeMap.put(rangeLeader.descriptor().getId(), rangeLeader.descriptor()); + } + for (KVRangeStoreDescriptor storeDescriptor : effectiveEpoch.storeDescriptors()) { + if (!storeDescriptor.getId().equals(localStoreId)) { + // only focus on the zombie ranges in local store + continue; + } + for (KVRangeDescriptor rangeDescriptor : storeDescriptor.getRangesList()) { + if (isZombieRange(rangeDescriptor, effectiveRangeMap)) { + log.debug("Schedule command to remove zombie range: id={}, boundary={}", + KVRangeIdUtil.toString(rangeDescriptor.getId()), rangeDescriptor.getBoundary()); + pendingQuitCommand.set(new PendingQuitCommand(QuitCommand.builder() + .kvRangeId(rangeDescriptor.getId()) + .toStore(localStoreId) + .build(), randomSuspicionTimeout())); + return true; + } + } + } + } + return false; + } + + private boolean isZombieRange(KVRangeDescriptor rangeDescriptor, Map effectiveRange) { + if (rangeDescriptor.getRole() != RaftNodeStatus.Candidate) { + return false; + } + if (!effectiveRange.containsKey(rangeDescriptor.getId())) { + return true; + } + KVRangeDescriptor effectiveRangeDescriptor = effectiveRange.get(rangeDescriptor.getId()); + Set allReplicas = Sets.newHashSet(effectiveRangeDescriptor.getConfig().getVotersList()); + allReplicas.addAll(effectiveRangeDescriptor.getConfig().getLearnersList()); + allReplicas.addAll(effectiveRangeDescriptor.getConfig().getNextVotersList()); + allReplicas.addAll(effectiveRangeDescriptor.getConfig().getNextLearnersList()); + return !allReplicas.contains(localStoreId); + } + private Map> findConflictingRanges( Set effectiveEpoch) { Map> leaderRangesByRangeId = new HashMap<>(); diff --git a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/KVStoreBalanceControllerTest.java b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/KVStoreBalanceControllerTest.java index dc44b4be4..03648fddb 100644 --- a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/KVStoreBalanceControllerTest.java +++ b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/KVStoreBalanceControllerTest.java @@ -189,16 +189,19 @@ public void testChangeConfig() { public void testMerge() { KVRangeId id = KVRangeIdUtil.generate(); Set storeDescriptors = generateDescriptor(); - MergeCommand command = - MergeCommand.builder().kvRangeId(id).mergeeId(KVRangeIdUtil.generate()).toStore(LOCAL_STORE_ID) - .expectedVer(2L).build(); + MergeCommand command = MergeCommand.builder() + .kvRangeId(id) + .mergeeId(KVRangeIdUtil.generate()) + .toStore(LOCAL_STORE_ID) + .voters(Set.of(LOCAL_STORE_ID)) + .expectedVer(2L).build(); when(storeBalancer.balance()).thenReturn(BalanceNow.of(command)); when(storeClient.mergeRanges(eq(LOCAL_STORE_ID), any())).thenReturn(new CompletableFuture<>()); storeDescSubject.onNext(storeDescriptors); awaitExecute(200); verify(storeClient, times(1)).mergeRanges(eq(LOCAL_STORE_ID), argThat( - r -> r.getMergerId().equals(id) && r.getVer() == command.getExpectedVer() && - r.getMergeeId().equals(command.getMergeeId()))); + r -> r.getMergerId().equals(id) && r.getVer() == command.getExpectedVer() + && r.getMergeeId().equals(command.getMergeeId()))); } @Test diff --git a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RedundantRangeRemovalBalancerTest.java b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RedundantRangeRemovalBalancerTest.java index 00871e263..8a8d674c6 100644 --- a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RedundantRangeRemovalBalancerTest.java +++ b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RedundantRangeRemovalBalancerTest.java @@ -21,6 +21,7 @@ import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertSame; +import static org.testng.Assert.assertTrue; import com.google.protobuf.ByteString; import java.time.Duration; @@ -32,6 +33,7 @@ import org.apache.bifromq.basekv.balance.BalanceResult; import org.apache.bifromq.basekv.balance.BalanceResultType; import org.apache.bifromq.basekv.balance.command.ChangeConfigCommand; +import org.apache.bifromq.basekv.balance.command.QuitCommand; import org.apache.bifromq.basekv.proto.Boundary; import org.apache.bifromq.basekv.proto.KVRangeDescriptor; import org.apache.bifromq.basekv.proto.KVRangeId; @@ -414,4 +416,234 @@ public void idConflictButVotersOverlapShouldNotDelete() { BalanceResult result = balancer.balance(); assertSame(result.type(), BalanceResultType.NoNeedBalance); } + + @Test + public void scheduleQuitForZombieCandidateNotInEffectiveMap() { + // Effective leaders forming a valid split set + KVRangeId effId1 = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); + KVRangeId effId2 = KVRangeId.newBuilder().setEpoch(1).setId(2).build(); + // Make effective leader boundaries a valid split set: [null, m) and [m, null) + Boundary b1 = Boundary.newBuilder() + .setEndKey(ByteString.copyFromUtf8("m")).build(); + Boundary b2 = Boundary.newBuilder() + .setStartKey(ByteString.copyFromUtf8("m")).build(); + String leaderStore = "leaderStore"; + KVRangeDescriptor effRange1 = KVRangeDescriptor.newBuilder() + .setId(effId1).setVer(1).setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) + .setBoundary(b1) + .setConfig(ClusterConfig.newBuilder().addVoters(leaderStore).build()) + .build(); + KVRangeDescriptor effRange2 = KVRangeDescriptor.newBuilder() + .setId(effId2).setVer(1).setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) + .setBoundary(b2) + .setConfig(ClusterConfig.newBuilder().addVoters(leaderStore).build()) + .build(); + KVRangeStoreDescriptor leaderDesc = KVRangeStoreDescriptor.newBuilder() + .setId(leaderStore) + .addRanges(effRange1) + .addRanges(effRange2) + .build(); + + // Local zombie candidate with a different id that doesn't exist in effective map + KVRangeId zombieId = KVRangeId.newBuilder().setEpoch(1).setId(99).build(); + KVRangeDescriptor zombieCandidate = KVRangeDescriptor.newBuilder() + .setId(zombieId).setVer(1).setRole(RaftNodeStatus.Candidate) + .setState(State.StateType.Normal) + .setBoundary(Boundary.newBuilder() + .setStartKey(ByteString.copyFromUtf8("a")) + .setEndKey(ByteString.copyFromUtf8("z")).build()) + .setConfig(ClusterConfig.newBuilder().addVoters("other").build()) + .build(); + KVRangeStoreDescriptor localDesc = KVRangeStoreDescriptor.newBuilder() + .setId(localStoreId) + .addRanges(zombieCandidate) + .build(); + + balancer.update(Set.of(leaderDesc, localDesc)); + + BalanceResult result = balancer.balance(); + // first AwaitBalance due to suspicion delay + assertEquals(result.type(), BalanceResultType.AwaitBalance); + mockTime.set(3000L); + result = balancer.balance(); + assertEquals(result.type(), BalanceResultType.BalanceNow); + QuitCommand cmd = (QuitCommand) ((BalanceNow) result).command; + assertEquals(cmd.getToStore(), localStoreId); + assertEquals(cmd.getKvRangeId(), zombieId); + } + + @Test + public void doNotQuitCandidateIfLocalIsInEffectiveReplicaSet() { + // Effective leader contains localStore in replica sets + KVRangeId id = KVRangeId.newBuilder().setEpoch(1).setId(7).build(); + // Single leader must be FULL_BOUNDARY to be valid split set + Boundary b = Boundary.newBuilder().build(); + String leaderStore = "leaderStore"; + ClusterConfig effCfg = ClusterConfig.newBuilder() + .addVoters(leaderStore) + .addLearners(localStoreId) // local exists in effective replica set + .build(); + KVRangeDescriptor effLeader = KVRangeDescriptor.newBuilder() + .setId(id).setVer(1).setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) + .setBoundary(b) + .setConfig(effCfg) + .build(); + KVRangeStoreDescriptor leaderDesc = KVRangeStoreDescriptor.newBuilder() + .setId(leaderStore) + .addRanges(effLeader) + .build(); + + // Local candidate with the same id + KVRangeDescriptor localCandidate = KVRangeDescriptor.newBuilder() + .setId(id).setVer(1).setRole(RaftNodeStatus.Candidate) + .setState(State.StateType.Normal) + .setBoundary(b) + .setConfig(ClusterConfig.newBuilder().addVoters(leaderStore).build()) + .build(); + KVRangeStoreDescriptor localDesc = KVRangeStoreDescriptor.newBuilder() + .setId(localStoreId) + .addRanges(localCandidate) + .build(); + + balancer.update(Set.of(leaderDesc, localDesc)); + + BalanceResult result = balancer.balance(); + assertSame(result.type(), BalanceResultType.NoNeedBalance); + } + + @Test + public void quitCandidateIfLocalNotInEffectiveReplicaSet() { + // Effective leader without local in any replica set + KVRangeId id = KVRangeId.newBuilder().setEpoch(1).setId(8).build(); + // Single leader must be FULL_BOUNDARY to be valid split set + Boundary b = Boundary.newBuilder().build(); + String leaderStore = "leaderStore"; + ClusterConfig effCfg = ClusterConfig.newBuilder() + .addVoters(leaderStore) + .build(); + KVRangeDescriptor effLeader = KVRangeDescriptor.newBuilder() + .setId(id).setVer(1).setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) + .setBoundary(b) + .setConfig(effCfg) + .build(); + KVRangeStoreDescriptor leaderDesc = KVRangeStoreDescriptor.newBuilder() + .setId(leaderStore) + .addRanges(effLeader) + .build(); + + // Local candidate with the same id but local not in effective config + KVRangeDescriptor localCandidate = KVRangeDescriptor.newBuilder() + .setId(id).setVer(1).setRole(RaftNodeStatus.Candidate) + .setState(State.StateType.Normal) + .setBoundary(b) + .setConfig(ClusterConfig.newBuilder().addVoters(leaderStore).build()) + .build(); + KVRangeStoreDescriptor localDesc = KVRangeStoreDescriptor.newBuilder() + .setId(localStoreId) + .addRanges(localCandidate) + .build(); + + balancer.update(Set.of(leaderDesc, localDesc)); + + BalanceResult result = balancer.balance(); + // first AwaitBalance due to suspicion delay + assertEquals(result.type(), BalanceResultType.AwaitBalance); + mockTime.set(3000L); + result = balancer.balance(); + assertEquals(result.type(), BalanceResultType.BalanceNow); + assertTrue(((BalanceNow) result).command instanceof QuitCommand); + } + + @Test + public void doNotQuitWhenEffectiveSplitSetInvalid() { + // Two leaders with overlapping boundaries -> invalid split set + String leaderStore = "leaderStore"; + KVRangeId effId1 = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); + KVRangeId effId2 = KVRangeId.newBuilder().setEpoch(1).setId(2).build(); + Boundary b1 = Boundary.newBuilder() + .setStartKey(ByteString.copyFromUtf8("a")) + .setEndKey(ByteString.copyFromUtf8("n")).build(); + Boundary b2 = Boundary.newBuilder() + .setStartKey(ByteString.copyFromUtf8("m")) // overlaps with b1 + .setEndKey(ByteString.copyFromUtf8("z")).build(); + KVRangeDescriptor effRange1 = KVRangeDescriptor.newBuilder() + .setId(effId1).setVer(1).setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) + .setBoundary(b1) + .setConfig(ClusterConfig.newBuilder().addVoters(leaderStore).build()) + .build(); + KVRangeDescriptor effRange2 = KVRangeDescriptor.newBuilder() + .setId(effId2).setVer(1).setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) + .setBoundary(b2) + .setConfig(ClusterConfig.newBuilder().addVoters(leaderStore).build()) + .build(); + KVRangeStoreDescriptor leaderDesc = KVRangeStoreDescriptor.newBuilder() + .setId(leaderStore) + .addRanges(effRange1) + .addRanges(effRange2) + .build(); + + // Local zombie candidate not present in effective map + KVRangeId zombieId = KVRangeId.newBuilder().setEpoch(1).setId(99).build(); + KVRangeDescriptor zombieCandidate = KVRangeDescriptor.newBuilder() + .setId(zombieId).setVer(1).setRole(RaftNodeStatus.Candidate) + .setState(State.StateType.Normal) + .setBoundary(Boundary.newBuilder() + .setStartKey(ByteString.copyFromUtf8("a")) + .setEndKey(ByteString.copyFromUtf8("z")).build()) + .setConfig(ClusterConfig.newBuilder().addVoters("other").build()) + .build(); + KVRangeStoreDescriptor localDesc = KVRangeStoreDescriptor.newBuilder() + .setId(localStoreId) + .addRanges(zombieCandidate) + .build(); + + balancer.update(Set.of(leaderDesc, localDesc)); + + BalanceResult result = balancer.balance(); + assertSame(result.type(), BalanceResultType.NoNeedBalance); + } + + @Test + public void doNotQuitNonCandidate() { + // Effective leader forming valid split set + String leaderStore = "leaderStore"; + KVRangeId effId = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); + Boundary b = Boundary.newBuilder() + .setStartKey(ByteString.copyFromUtf8("a")) + .setEndKey(ByteString.copyFromUtf8("z")).build(); + KVRangeDescriptor effLeader = KVRangeDescriptor.newBuilder() + .setId(effId).setVer(1).setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) + .setBoundary(b) + .setConfig(ClusterConfig.newBuilder().addVoters(leaderStore).build()) + .build(); + KVRangeStoreDescriptor leaderDesc = KVRangeStoreDescriptor.newBuilder() + .setId(leaderStore) + .addRanges(effLeader) + .build(); + + // Local follower with an id not in effective map shouldn't be treated as zombie + KVRangeId followerId = KVRangeId.newBuilder().setEpoch(1).setId(99).build(); + KVRangeDescriptor localFollower = KVRangeDescriptor.newBuilder() + .setId(followerId).setVer(1).setRole(RaftNodeStatus.Follower) + .setState(State.StateType.Normal) + .setBoundary(b) + .setConfig(ClusterConfig.newBuilder().addVoters(localStoreId).build()) + .build(); + KVRangeStoreDescriptor localDesc = KVRangeStoreDescriptor.newBuilder() + .setId(localStoreId) + .addRanges(localFollower) + .build(); + + balancer.update(Set.of(leaderDesc, localDesc)); + + BalanceResult result = balancer.balance(); + assertSame(result.type(), BalanceResultType.NoNeedBalance); + } } diff --git a/base-kv/base-kv-store-balance-spi/src/main/java/org/apache/bifromq/basekv/balance/command/CommandType.java b/base-kv/base-kv-store-balance-spi/src/main/java/org/apache/bifromq/basekv/balance/command/CommandType.java index f8b2a71ca..91eaaacd1 100644 --- a/base-kv/base-kv-store-balance-spi/src/main/java/org/apache/bifromq/basekv/balance/command/CommandType.java +++ b/base-kv/base-kv-store-balance-spi/src/main/java/org/apache/bifromq/basekv/balance/command/CommandType.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.balance.command; @@ -25,6 +25,7 @@ public enum CommandType { CHANGE_CONFIG, RECOVERY, + QUIT, SPLIT, MERGE, TRANSFER_LEADERSHIP, diff --git a/base-kv/base-kv-store-balance-spi/src/main/java/org/apache/bifromq/basekv/balance/command/MergeCommand.java b/base-kv/base-kv-store-balance-spi/src/main/java/org/apache/bifromq/basekv/balance/command/MergeCommand.java index 330bdcd24..2197c26de 100644 --- a/base-kv/base-kv-store-balance-spi/src/main/java/org/apache/bifromq/basekv/balance/command/MergeCommand.java +++ b/base-kv/base-kv-store-balance-spi/src/main/java/org/apache/bifromq/basekv/balance/command/MergeCommand.java @@ -14,16 +14,17 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.balance.command; -import org.apache.bifromq.basekv.proto.KVRangeId; -import org.apache.bifromq.basekv.utils.KVRangeIdUtil; +import java.util.Set; import lombok.Getter; import lombok.Setter; import lombok.experimental.SuperBuilder; +import org.apache.bifromq.basekv.proto.KVRangeId; +import org.apache.bifromq.basekv.utils.KVRangeIdUtil; @Getter @Setter @@ -31,6 +32,7 @@ public class MergeCommand extends RangeCommand { private KVRangeId mergeeId; + private Set voters; @Override public CommandType type() { @@ -39,8 +41,9 @@ public CommandType type() { @Override public String toString() { - return String.format("MergeCommand{toStore=%s, kvRangeId=%s, mergeeId=%s, expectedVer=%s}", - getToStore(), KVRangeIdUtil.toString(getKvRangeId()), KVRangeIdUtil.toString(mergeeId), printableVer()); + return String.format("MergeCommand{toStore=%s, kvRangeId=%s, expectedVer=%s, mergeeId=%s, voters=%s}", + getToStore(), KVRangeIdUtil.toString(getKvRangeId()), + printableVer(), KVRangeIdUtil.toString(mergeeId), voters); } } diff --git a/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/config/model/StorageEngineConfig.java b/base-kv/base-kv-store-balance-spi/src/main/java/org/apache/bifromq/basekv/balance/command/QuitCommand.java similarity index 64% rename from build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/config/model/StorageEngineConfig.java rename to base-kv/base-kv-store-balance-spi/src/main/java/org/apache/bifromq/basekv/balance/command/QuitCommand.java index cbfb8c8bd..6aa2d7283 100644 --- a/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/config/model/StorageEngineConfig.java +++ b/base-kv/base-kv-store-balance-spi/src/main/java/org/apache/bifromq/basekv/balance/command/QuitCommand.java @@ -17,19 +17,26 @@ * under the License. */ -package org.apache.bifromq.starter.config.model; +package org.apache.bifromq.basekv.balance.command; -import com.fasterxml.jackson.annotation.JsonSubTypes; -import com.fasterxml.jackson.annotation.JsonTypeInfo; import lombok.Getter; import lombok.Setter; +import lombok.experimental.SuperBuilder; +import org.apache.bifromq.basekv.utils.KVRangeIdUtil; @Getter @Setter -@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type") -@JsonSubTypes({ - @JsonSubTypes.Type(value = InMemEngineConfig.class, name = "memory"), - @JsonSubTypes.Type(value = RocksDBEngineConfig.class, name = "rocksdb") -}) -public abstract class StorageEngineConfig { +@SuperBuilder +public class QuitCommand extends BalanceCommand { + + @Override + public CommandType type() { + return CommandType.QUIT; + } + + @Override + public String toString() { + return String.format("QuitCommand{toStore=%s, kvRangeId=%s}", + getToStore(), KVRangeIdUtil.toString(getKvRangeId())); + } } diff --git a/base-kv/base-kv-store-balance-spi/src/main/java/org/apache/bifromq/basekv/balance/util/CommandUtil.java b/base-kv/base-kv-store-balance-spi/src/main/java/org/apache/bifromq/basekv/balance/util/CommandUtil.java index a06048d4e..f3051b1f7 100644 --- a/base-kv/base-kv-store-balance-spi/src/main/java/org/apache/bifromq/basekv/balance/util/CommandUtil.java +++ b/base-kv/base-kv-store-balance-spi/src/main/java/org/apache/bifromq/basekv/balance/util/CommandUtil.java @@ -148,6 +148,7 @@ public static BalanceCommand diffBy(NavigableMap expect .kvRangeId(currentRangeLeader.descriptor().getId()) .expectedVer(currentRangeLeader.descriptor().getVer()) .mergeeId(nextRangeLeader.descriptor().getId()) + .voters(setOf(nextRangeLeader.descriptor().getConfig().getVotersList())) .build(); } else { // align mergee layout with merger layout diff --git a/base-kv/base-kv-store-client/src/main/java/org/apache/bifromq/basekv/client/BaseKVStoreClient.java b/base-kv/base-kv-store-client/src/main/java/org/apache/bifromq/basekv/client/BaseKVStoreClient.java index 741736fea..14a954bd7 100644 --- a/base-kv/base-kv-store-client/src/main/java/org/apache/bifromq/basekv/client/BaseKVStoreClient.java +++ b/base-kv/base-kv-store-client/src/main/java/org/apache/bifromq/basekv/client/BaseKVStoreClient.java @@ -34,6 +34,7 @@ import static org.apache.bifromq.basekv.store.proto.BaseKVStoreServiceGrpc.getRecoverMethod; import static org.apache.bifromq.basekv.store.proto.BaseKVStoreServiceGrpc.getSplitMethod; import static org.apache.bifromq.basekv.store.proto.BaseKVStoreServiceGrpc.getTransferLeadershipMethod; +import static org.apache.bifromq.basekv.store.proto.BaseKVStoreServiceGrpc.getZombieQuitMethod; import static org.apache.bifromq.basekv.utils.DescriptorUtil.getEffectiveEpoch; import static org.apache.bifromq.basekv.utils.DescriptorUtil.getRangeLeaders; @@ -91,6 +92,8 @@ import org.apache.bifromq.basekv.store.proto.ReplyCode; import org.apache.bifromq.basekv.store.proto.TransferLeadershipReply; import org.apache.bifromq.basekv.store.proto.TransferLeadershipRequest; +import org.apache.bifromq.basekv.store.proto.ZombieQuitReply; +import org.apache.bifromq.basekv.store.proto.ZombieQuitRequest; import org.apache.bifromq.basekv.utils.BoundaryUtil; import org.apache.bifromq.basekv.utils.EffectiveEpoch; import org.apache.bifromq.basekv.utils.RangeLeader; @@ -116,6 +119,7 @@ final class BaseKVStoreClient implements IBaseKVStoreClient { private final IBaseKVLandscapeObserver landscapeObserver; private final MethodDescriptor bootstrapMethod; private final MethodDescriptor recoverMethod; + private final MethodDescriptor zombieQuitMethod; private final MethodDescriptor transferLeadershipMethod; private final MethodDescriptor changeReplicaConfigMethod; private final MethodDescriptor splitMethod; @@ -149,6 +153,8 @@ final class BaseKVStoreClient implements IBaseKVStoreClient { toScopedFullMethodName(clusterId, getBootstrapMethod().getFullMethodName())); this.recoverMethod = bluePrint.methodDesc(toScopedFullMethodName(clusterId, getRecoverMethod().getFullMethodName())); + this.zombieQuitMethod = + bluePrint.methodDesc(toScopedFullMethodName(clusterId, getZombieQuitMethod().getFullMethodName())); this.transferLeadershipMethod = bluePrint.methodDesc(toScopedFullMethodName(clusterId, getTransferLeadershipMethod().getFullMethodName())); this.changeReplicaConfigMethod = @@ -231,6 +237,16 @@ public CompletableFuture recover(String storeId, RecoverRequest re return rpcClient.invoke("", serverId, request, recoverMethod); } + @Override + public CompletableFuture zombieQuit(String storeId, ZombieQuitRequest request) { + String serverId = storeToServerMap.get(storeId); + if (serverId == null) { + return CompletableFuture.failedFuture( + new ServerNotFoundException("BaseKVStore Server not available for storeId: " + storeId)); + } + return rpcClient.invoke("", serverId, request, zombieQuitMethod); + } + @Override public CompletableFuture transferLeadership(String storeId, TransferLeadershipRequest request) { @@ -271,12 +287,12 @@ public CompletableFuture changeReplicaConfig(String st .setCode(ReplyCode.InternalError) .build(); }) - .thenApplyAsync(v -> { + .thenApply(v -> { if (v.hasLatest()) { patchRouter(storeId, v.getLatest()); } return v; - }, CLIENT_EXECUTOR); + }); } @Override @@ -400,7 +416,7 @@ public void close() { }; } return rpcClient.createRequestPipeline("", serverIdOpt.get(), null, emptyMap(), executeMethod); - }), latest -> patchRouter(storeId, latest), CLIENT_EXECUTOR, log); + }), latest -> patchRouter(storeId, latest), log); } @Override @@ -444,7 +460,7 @@ public void close() { } else { return rpcClient.createRequestPipeline("", serverIdOpt.get(), null, emptyMap(), queryMethod); } - }), latest -> patchRouter(storeId, latest), CLIENT_EXECUTOR, log); + }), latest -> patchRouter(storeId, latest), log); } @Override @@ -513,13 +529,16 @@ private boolean refreshStoreRoute(ClusterInfo clusterInfo) { } private void patchRouter(String storeId, KVRangeDescriptor latest) { - latestRouteMap.set(patchRouteMap(storeId, latest, new HashMap<>(latestRouteMap.get()))); - NavigableMap rangeLeaders = getRangeLeaders(latestRouteMap.get()); - NavigableMap router = buildClientRoute(clusterId, rangeLeaders, latestRouteMap.get()); - NavigableMap last = effectiveRouter.get(); - if (!router.equals(last)) { - effectiveRouter.set(Collections.unmodifiableNavigableMap(router)); - } + CLIENT_EXECUTOR.execute(() -> { + latestRouteMap.set(patchRouteMap(storeId, latest, new HashMap<>(latestRouteMap.get()))); + NavigableMap rangeLeaders = getRangeLeaders(latestRouteMap.get()); + NavigableMap router = buildClientRoute(clusterId, rangeLeaders, + latestRouteMap.get()); + NavigableMap last = effectiveRouter.get(); + if (!router.equals(last)) { + effectiveRouter.set(Collections.unmodifiableNavigableMap(router)); + } + }); } private void refreshMutPipelines(Map storeDescriptors) { diff --git a/base-kv/base-kv-store-client/src/main/java/org/apache/bifromq/basekv/client/IBaseKVStoreClient.java b/base-kv/base-kv-store-client/src/main/java/org/apache/bifromq/basekv/client/IBaseKVStoreClient.java index 86876ea8b..74fe92db7 100644 --- a/base-kv/base-kv-store-client/src/main/java/org/apache/bifromq/basekv/client/IBaseKVStoreClient.java +++ b/base-kv/base-kv-store-client/src/main/java/org/apache/bifromq/basekv/client/IBaseKVStoreClient.java @@ -14,11 +14,15 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.client; +import io.reactivex.rxjava3.core.Observable; +import java.util.NavigableMap; +import java.util.Set; +import java.util.concurrent.CompletableFuture; import org.apache.bifromq.basekv.proto.Boundary; import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor; import org.apache.bifromq.basekv.store.proto.BootstrapReply; @@ -37,11 +41,9 @@ import org.apache.bifromq.basekv.store.proto.RecoverRequest; import org.apache.bifromq.basekv.store.proto.TransferLeadershipReply; import org.apache.bifromq.basekv.store.proto.TransferLeadershipRequest; +import org.apache.bifromq.basekv.store.proto.ZombieQuitReply; +import org.apache.bifromq.basekv.store.proto.ZombieQuitRequest; import org.apache.bifromq.baserpc.client.IConnectable; -import io.reactivex.rxjava3.core.Observable; -import java.util.NavigableMap; -import java.util.Set; -import java.util.concurrent.CompletableFuture; /** * The interface of BaseKV Store Client. @@ -61,6 +63,8 @@ static BaseKVStoreClientBuilder newBuilder() { CompletableFuture recover(String storeId, RecoverRequest request); + CompletableFuture zombieQuit(String storeId, ZombieQuitRequest request); + CompletableFuture transferLeadership(String storeId, TransferLeadershipRequest request); CompletableFuture changeReplicaConfig(String storeId, ChangeReplicaConfigRequest request); @@ -123,7 +127,6 @@ static BaseKVStoreClientBuilder newBuilder() { */ CompletableFuture linearizedQuery(String storeId, KVRangeRORequest request, String orderKey); - /** * Create a caller-managed pipeline for executing rw command orderly. * diff --git a/base-kv/base-kv-store-client/src/main/java/org/apache/bifromq/basekv/client/ManagedMutationPipeline.java b/base-kv/base-kv-store-client/src/main/java/org/apache/bifromq/basekv/client/ManagedMutationPipeline.java index 49c3c4a6a..f140235ac 100644 --- a/base-kv/base-kv-store-client/src/main/java/org/apache/bifromq/basekv/client/ManagedMutationPipeline.java +++ b/base-kv/base-kv-store-client/src/main/java/org/apache/bifromq/basekv/client/ManagedMutationPipeline.java @@ -22,7 +22,6 @@ import io.reactivex.rxjava3.core.Observable; import io.reactivex.rxjava3.disposables.Disposable; import java.util.concurrent.CompletableFuture; -import java.util.concurrent.Executor; import java.util.function.Consumer; import org.apache.bifromq.basekv.proto.KVRangeDescriptor; import org.apache.bifromq.basekv.store.proto.KVRangeRWReply; @@ -34,16 +33,12 @@ class ManagedMutationPipeline implements IMutationPipeline { private final Logger log; private final Disposable disposable; private final Consumer routePatcher; - private final Executor clientExecutor; private volatile IRPCClient.IRequestPipeline ppln; ManagedMutationPipeline(Observable> pplnObservable, - Consumer routePatcher, - Executor clientExecutor, - Logger log) { + Consumer routePatcher, Logger log) { this.log = log; this.routePatcher = routePatcher; - this.clientExecutor = clientExecutor; disposable = pplnObservable.subscribe(next -> { IRPCClient.IRequestPipeline old = ppln; ppln = next; @@ -58,12 +53,12 @@ class ManagedMutationPipeline implements IMutationPipeline { public CompletableFuture execute(KVRangeRWRequest request) { log.trace("Requesting rw range:req={}", request); return ppln.invoke(request) - .thenApplyAsync(v -> { + .thenApply(v -> { if (v.hasLatest()) { routePatcher.accept(v.getLatest()); } return v; - }, clientExecutor); + }); } @Override diff --git a/base-kv/base-kv-store-client/src/main/java/org/apache/bifromq/basekv/client/ManagedQueryPipeline.java b/base-kv/base-kv-store-client/src/main/java/org/apache/bifromq/basekv/client/ManagedQueryPipeline.java index f8fd7bff4..74a0edf14 100644 --- a/base-kv/base-kv-store-client/src/main/java/org/apache/bifromq/basekv/client/ManagedQueryPipeline.java +++ b/base-kv/base-kv-store-client/src/main/java/org/apache/bifromq/basekv/client/ManagedQueryPipeline.java @@ -22,7 +22,6 @@ import io.reactivex.rxjava3.core.Observable; import io.reactivex.rxjava3.disposables.Disposable; import java.util.concurrent.CompletableFuture; -import java.util.concurrent.Executor; import java.util.function.Consumer; import org.apache.bifromq.basekv.proto.KVRangeDescriptor; import org.apache.bifromq.basekv.store.proto.KVRangeROReply; @@ -34,16 +33,13 @@ class ManagedQueryPipeline implements IQueryPipeline { private final Logger log; private final Disposable disposable; private final Consumer routePatcher; - private final Executor clientExecutor; private volatile IRPCClient.IRequestPipeline ppln; ManagedQueryPipeline(Observable> pplnObservable, Consumer routePatcher, - Executor clientExecutor, Logger log) { this.log = log; this.routePatcher = routePatcher; - this.clientExecutor = clientExecutor; disposable = pplnObservable.subscribe(next -> { IRPCClient.IRequestPipeline old = ppln; ppln = next; @@ -57,12 +53,12 @@ class ManagedQueryPipeline implements IQueryPipeline { public CompletableFuture query(KVRangeRORequest request) { log.trace("Invoke ro range request: \n{}", request); return ppln.invoke(request) - .thenApplyAsync(v -> { + .thenApply(v -> { if (v.hasLatest()) { routePatcher.accept(v.getLatest()); } return v; - }, clientExecutor); + }); } @Override diff --git a/base-kv/base-kv-store-client/src/main/java/org/apache/bifromq/basekv/client/scheduler/BatchQueryCall.java b/base-kv/base-kv-store-client/src/main/java/org/apache/bifromq/basekv/client/scheduler/BatchQueryCall.java index 91751a964..481f78730 100644 --- a/base-kv/base-kv-store-client/src/main/java/org/apache/bifromq/basekv/client/scheduler/BatchQueryCall.java +++ b/base-kv/base-kv-store-client/src/main/java/org/apache/bifromq/basekv/client/scheduler/BatchQueryCall.java @@ -52,15 +52,13 @@ protected BatchQueryCall(IQueryPipeline pipeline, QueryCallBatcherKey batcherKey @Override public void add(ICallTask callTask) { - BatchQueryCall.BatchCallTask lastBatchCallTask; - QueryCallBatcherKey batcherKey = callTask.batcherKey(); - if ((lastBatchCallTask = batchCallTasks.peekLast()) != null) { + BatchQueryCall.BatchCallTask lastBatchCallTask = batchCallTasks.peekLast(); + if (lastBatchCallTask == null) { + lastBatchCallTask = new BatchQueryCall.BatchCallTask<>(this.batcherKey.storeId, this.batcherKey.ver); lastBatchCallTask.batchedTasks.add(callTask); batchCallTasks.add(lastBatchCallTask); } else { - lastBatchCallTask = new BatchQueryCall.BatchCallTask<>(batcherKey.storeId, batcherKey.ver); lastBatchCallTask.batchedTasks.add(callTask); - batchCallTasks.add(lastBatchCallTask); } } diff --git a/base-kv/base-kv-store-client/src/test/java/org/apache/bifromq/basekv/client/scheduler/BatchQueryCallTest.java b/base-kv/base-kv-store-client/src/test/java/org/apache/bifromq/basekv/client/scheduler/BatchQueryCallTest.java index 3d6950bc8..3b7330a64 100644 --- a/base-kv/base-kv-store-client/src/test/java/org/apache/bifromq/basekv/client/scheduler/BatchQueryCallTest.java +++ b/base-kv/base-kv-store-client/src/test/java/org/apache/bifromq/basekv/client/scheduler/BatchQueryCallTest.java @@ -19,150 +19,113 @@ package org.apache.bifromq.basekv.client.scheduler; -import static org.apache.bifromq.basekv.client.scheduler.Fixtures.setting; -import static org.apache.bifromq.basekv.utils.BoundaryUtil.FULL_BOUNDARY; -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.Mockito.when; import static org.testng.Assert.assertEquals; -import com.google.protobuf.ByteString; -import java.time.Duration; -import java.util.ArrayList; -import java.util.List; -import java.util.TreeMap; +import java.util.Queue; import java.util.concurrent.CompletableFuture; -import java.util.concurrent.CopyOnWriteArrayList; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.ThreadLocalRandom; -import lombok.SneakyThrows; -import org.apache.bifromq.basekv.client.IBaseKVStoreClient; +import java.util.concurrent.atomic.AtomicInteger; import org.apache.bifromq.basekv.client.IQueryPipeline; import org.apache.bifromq.basekv.proto.KVRangeId; import org.apache.bifromq.basekv.store.proto.KVRangeROReply; -import org.apache.bifromq.basekv.utils.BoundaryUtil; -import org.apache.bifromq.basekv.utils.KVRangeIdUtil; -import org.mockito.Mock; -import org.mockito.MockitoAnnotations; -import org.testng.annotations.AfterMethod; -import org.testng.annotations.BeforeMethod; +import org.apache.bifromq.basekv.store.proto.KVRangeRORequest; +import org.apache.bifromq.basekv.store.proto.ROCoProcInput; +import org.apache.bifromq.basekv.store.proto.ROCoProcOutput; +import org.apache.bifromq.basekv.store.proto.ReplyCode; +import org.apache.bifromq.basescheduler.ICallTask; import org.testng.annotations.Test; public class BatchQueryCallTest { - private KVRangeId id; - @Mock - private IBaseKVStoreClient storeClient; - @Mock - private IQueryPipeline queryPipeline1; - @Mock - private IQueryPipeline queryPipeline2; - private AutoCloseable closeable; - - @BeforeMethod - public void setup() { - closeable = MockitoAnnotations.openMocks(this); - id = KVRangeIdUtil.generate(); - } + @Test + public void onlyOneQueryPerBatch() { + CountingQueryPipeline pipeline = new CountingQueryPipeline(); + QueryCallBatcherKey key = new QueryCallBatcherKey(KVRangeId.newBuilder().setId(1).build(), + "storeA", 0, 1L, false); + DummyBatchQueryCall call = new DummyBatchQueryCall(pipeline, key); + + // add multiple tasks into the same batch + DummyTask t1 = new DummyTask(key); + DummyTask t2 = new DummyTask(key); + call.add(t1); + call.add(t2); + + // execute and wait + call.execute().join(); - @SneakyThrows - @AfterMethod - public void teardown() { - closeable.close(); + // only 1 underlying query should be fired + assertEquals(pipeline.count.get(), 1); + // ensure tasks completed + t1.resultPromise().join(); + t2.resultPromise().join(); } - @Test - public void addToSameBatch() { - ExecutorService executor = Executors.newSingleThreadScheduledExecutor(); - - when(storeClient.latestEffectiveRouter()).thenReturn(new TreeMap<>(BoundaryUtil::compare) {{ - put(FULL_BOUNDARY, setting(id, "V1", 0)); - }}); - when(storeClient.createLinearizedQueryPipeline("V1")).thenReturn(queryPipeline1); - when(queryPipeline1.query(any())) - .thenReturn(CompletableFuture.supplyAsync(() -> KVRangeROReply.newBuilder().build(), executor)); - - TestQueryCallScheduler scheduler = new TestQueryCallScheduler(storeClient, Duration.ofMinutes(5), true); - List reqList = new ArrayList<>(); - List respList = new CopyOnWriteArrayList<>(); - List> futures = new ArrayList<>(); - for (int i = 0; i < 1000; i++) { - int req = ThreadLocalRandom.current().nextInt(); - reqList.add(req); - futures.add(scheduler.schedule(ByteString.copyFromUtf8(Integer.toString(req))) - .thenAccept((v) -> respList.add(Integer.parseInt(v.toStringUtf8())))); + private static class CountingQueryPipeline implements IQueryPipeline { + final AtomicInteger count = new AtomicInteger(); + + @Override + public CompletableFuture query(KVRangeRORequest request) { + count.incrementAndGet(); + return CompletableFuture.completedFuture(KVRangeROReply.newBuilder() + .setCode(ReplyCode.Ok) + .build()); + } + + @Override + public void close() { } - CompletableFuture.allOf(futures.toArray(CompletableFuture[]::new)).join(); - // the resp order preserved - assertEquals(reqList, respList); - executor.shutdown(); } - @Test - public void addToDifferentBatch() { - when(storeClient.createLinearizedQueryPipeline("V1")).thenReturn(queryPipeline1); - when(storeClient.createLinearizedQueryPipeline("V2")).thenReturn(queryPipeline2); - ExecutorService executor1 = Executors.newSingleThreadScheduledExecutor(); - ExecutorService executor2 = Executors.newSingleThreadScheduledExecutor(); - when(queryPipeline1.query(any())) - .thenReturn(CompletableFuture.supplyAsync(() -> KVRangeROReply.newBuilder().build(), executor1)); - when(queryPipeline2.query(any())) - .thenReturn(CompletableFuture.supplyAsync(() -> KVRangeROReply.newBuilder().build(), executor2)); - TestQueryCallScheduler scheduler = new TestQueryCallScheduler(storeClient, Duration.ofMinutes(5), true); - List reqList1 = new ArrayList<>(); - List reqList2 = new ArrayList<>(); - List respList1 = new CopyOnWriteArrayList<>(); - List respList2 = new CopyOnWriteArrayList<>(); - List> futures = new ArrayList<>(); - for (int i = 0; i < 1000; i++) { - int req = ThreadLocalRandom.current().nextInt(1, 1001); - if (req < 500) { - reqList1.add(req); - when(storeClient.latestEffectiveRouter()).thenReturn(new TreeMap<>(BoundaryUtil::compare) {{ - put(FULL_BOUNDARY, setting(id, "V1", 0)); - }}); - futures.add(scheduler.schedule(ByteString.copyFromUtf8(Integer.toString(req))) - .thenAccept((v) -> respList1.add(Integer.parseInt(v.toStringUtf8())))); - } else { - reqList2.add(req); - when(storeClient.latestEffectiveRouter()).thenReturn(new TreeMap<>(BoundaryUtil::compare) {{ - put(FULL_BOUNDARY, setting(id, "V2", 0)); - }}); - futures.add(scheduler.schedule(ByteString.copyFromUtf8(Integer.toString(req))) - .thenAccept((v) -> respList2.add(Integer.parseInt(v.toStringUtf8())))); + private static class DummyBatchQueryCall extends BatchQueryCall { + DummyBatchQueryCall(IQueryPipeline pipeline, QueryCallBatcherKey key) { + super(pipeline, key); + } + + @Override + protected ROCoProcInput makeBatch(java.util.Iterator reqIterator) { + return ROCoProcInput.getDefaultInstance(); + } + + @Override + protected void handleOutput(Queue> batchedTasks, + ROCoProcOutput output) { + ICallTask task; + while ((task = batchedTasks.poll()) != null) { + task.resultPromise().complete(1); } } - CompletableFuture.allOf(futures.toArray(CompletableFuture[]::new)).join(); - // the resp order preserved - assertEquals(reqList1, respList1); - assertEquals(reqList2, respList2); - executor1.shutdown(); - executor2.shutdown(); + + @Override + protected void handleException(ICallTask callTask, Throwable e) { + callTask.resultPromise().completeExceptionally(e); + } } - @Test - public void executeManySmallBatchesNoRecursion() { - when(storeClient.latestEffectiveRouter()).thenReturn(new TreeMap<>(BoundaryUtil::compare) {{ - put(FULL_BOUNDARY, setting(id, "V1", 0)); - }}); - when(storeClient.createLinearizedQueryPipeline("V1")).thenReturn(queryPipeline1); - - when(queryPipeline1.query(any())).thenAnswer(invocation -> - CompletableFuture.supplyAsync(KVRangeROReply::newBuilder) - .thenApply(KVRangeROReply.Builder::build) - ); - - TestQueryCallScheduler scheduler = new TestQueryCallScheduler(storeClient, Duration.ofSeconds(1), true); - int n = 5000; - List reqList = new ArrayList<>(n); - List respList = new CopyOnWriteArrayList<>(); - List> futures = new ArrayList<>(n); - for (int i = 0; i < n; i++) { - reqList.add(i); - futures.add(scheduler.schedule(ByteString.copyFromUtf8(Integer.toString(i))) - .thenAccept(v -> respList.add(Integer.parseInt(v.toStringUtf8())))); + private static class DummyTask implements ICallTask { + private final QueryCallBatcherKey key; + private final CompletableFuture promise = new CompletableFuture<>(); + + DummyTask(QueryCallBatcherKey key) { + this.key = key; + } + + @Override + public Integer call() { + return 1; + } + + @Override + public CompletableFuture resultPromise() { + return promise; + } + + @Override + public QueryCallBatcherKey batcherKey() { + return key; + } + + @Override + public long ts() { + return System.nanoTime(); } - CompletableFuture.allOf(futures.toArray(CompletableFuture[]::new)).join(); - assertEquals(respList.size(), n); - assertEquals(reqList, respList); } } + diff --git a/base-kv/base-kv-store-coproc-api/pom.xml b/base-kv/base-kv-store-coproc-api/pom.xml new file mode 100644 index 000000000..be3d67f1f --- /dev/null +++ b/base-kv/base-kv-store-coproc-api/pom.xml @@ -0,0 +1,49 @@ + + + + + 4.0.0 + + org.apache.bifromq + base-kv + ${revision} + + + base-kv-store-coproc-api + jar + + + + org.apache.bifromq + base-kv-type-proto + + + org.apache.bifromq + base-kv-store-rpc-definition + + + com.google.protobuf + protobuf-java + + + + diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/api/IKVIterator.java b/base-kv/base-kv-store-coproc-api/src/main/java/org/apache/bifromq/basekv/store/api/IKVIterator.java similarity index 88% rename from base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/api/IKVIterator.java rename to base-kv/base-kv-store-coproc-api/src/main/java/org/apache/bifromq/basekv/store/api/IKVIterator.java index 0082ca263..38ae49146 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/api/IKVIterator.java +++ b/base-kv/base-kv-store-coproc-api/src/main/java/org/apache/bifromq/basekv/store/api/IKVIterator.java @@ -14,14 +14,17 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.store.api; import com.google.protobuf.ByteString; -public interface IKVIterator { +/** + * The iterator interface for KVRange. + */ +public interface IKVIterator extends AutoCloseable { ByteString key(); ByteString value(); @@ -39,4 +42,6 @@ public interface IKVIterator { void seek(ByteString key); void seekForPrev(ByteString key); + + void close(); } diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/api/IKVRangeCoProc.java b/base-kv/base-kv-store-coproc-api/src/main/java/org/apache/bifromq/basekv/store/api/IKVRangeCoProc.java similarity index 88% rename from base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/api/IKVRangeCoProc.java rename to base-kv/base-kv-store-coproc-api/src/main/java/org/apache/bifromq/basekv/store/api/IKVRangeCoProc.java index b0da72205..42ebda600 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/api/IKVRangeCoProc.java +++ b/base-kv/base-kv-store-coproc-api/src/main/java/org/apache/bifromq/basekv/store/api/IKVRangeCoProc.java @@ -30,28 +30,29 @@ import org.apache.bifromq.basekv.store.proto.RWCoProcOutput; /** - * The interface of range co-processor. + * The interface of KVRange co-processor. */ public interface IKVRangeCoProc { /** * Execute a query co-proc. * * @param input the query input + * @param reader the reader of latest consistent-view of KVRange * @return the future of query result */ - CompletableFuture query(ROCoProcInput input, IKVReader client); + CompletableFuture query(ROCoProcInput input, IKVRangeReader reader); /** * Execute a mutation co-proc, returns a supplier of mutation output. The supplier will be called after mutation is * persisted successfully. * * @param input the mutation input - * @param reader the range data reader - * @param writer the range data writer + * @param reader the reader of latest consistent-view of KVRange + * @param writer the writer of KVRange * @param isLeader indicating whether current node was the leader committing the log * @return the future of mutation result */ - Supplier mutate(RWCoProcInput input, IKVReader reader, IKVWriter writer, boolean isLeader); + Supplier mutate(RWCoProcInput input, IKVRangeReader reader, IKVWriter writer, boolean isLeader); /** * This method will be called whenever owner range is restored from a snapshot or boundary changed via split/merge. diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/api/IKVRangeCoProcFactory.java b/base-kv/base-kv-store-coproc-api/src/main/java/org/apache/bifromq/basekv/store/api/IKVRangeCoProcFactory.java similarity index 67% rename from base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/api/IKVRangeCoProcFactory.java rename to base-kv/base-kv-store-coproc-api/src/main/java/org/apache/bifromq/basekv/store/api/IKVRangeCoProcFactory.java index eabbab2b1..fcb7eeaca 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/api/IKVRangeCoProcFactory.java +++ b/base-kv/base-kv-store-coproc-api/src/main/java/org/apache/bifromq/basekv/store/api/IKVRangeCoProcFactory.java @@ -14,29 +14,27 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.store.api; -import org.apache.bifromq.basekv.proto.KVRangeId; -import java.util.Collections; -import java.util.List; +import com.google.protobuf.ByteString; +import java.util.Optional; import java.util.function.Supplier; +import org.apache.bifromq.basekv.proto.Boundary; +import org.apache.bifromq.basekv.proto.KVRangeId; /** - * The interface of range co-processor factory. + * The interface of KVRange co-processor factory. */ public interface IKVRangeCoProcFactory { - default List createHinters(String clusterId, - String storeId, - KVRangeId id, - Supplier readerProvider) { - return Collections.emptyList(); + default Optional toSplitKey(ByteString key, Boundary boundary) { + return Optional.ofNullable(key); } IKVRangeCoProc createCoProc(String clusterId, String storeId, KVRangeId id, - Supplier readerProvider); + Supplier readerProvider); } diff --git a/base-kv/base-kv-store-coproc-api/src/main/java/org/apache/bifromq/basekv/store/api/IKVRangeReader.java b/base-kv/base-kv-store-coproc-api/src/main/java/org/apache/bifromq/basekv/store/api/IKVRangeReader.java new file mode 100644 index 000000000..753e1c2fc --- /dev/null +++ b/base-kv/base-kv-store-coproc-api/src/main/java/org/apache/bifromq/basekv/store/api/IKVRangeReader.java @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.store.api; + +import com.google.protobuf.ByteString; +import java.util.Optional; +import org.apache.bifromq.basekv.proto.Boundary; +import org.apache.bifromq.basekv.proto.State; +import org.apache.bifromq.basekv.raft.proto.ClusterConfig; + +/** + * Interface for reading a KVRange's consistent-view. + */ +public interface IKVRangeReader extends AutoCloseable { + + /** + * Get the current version of the KVRange. + * + * @return the version of the KVRange + */ + long version(); + + /** + * Get the current state of the KVRange. + * + * @return the state of the KVRange + */ + State state(); + + /** + * Get the last applied WAL index of the KVRange. + * + * @return the last applied WAL index of the KVRange + */ + long lastAppliedIndex(); + + /** + * Get the current boundary of the KVRange. + * + * @return the boundary of the KVRange + */ + Boundary boundary(); + + /** + * Get the current cluster configuration of the KVRange. + * + * @return the cluster configuration of the KVRange + */ + ClusterConfig clusterConfig(); + + /** + * Get the size of the KVRange within the given boundary intersecting with the KVRange's current boundary. + * + * @param boundary the boundary to calculate the size. + * @return the size of the KVRange within the given boundary + */ + long size(Boundary boundary); + + /** + * Check if a key exists. + * + * @param key the key + * @return true if the key exists, false otherwise + */ + boolean exist(ByteString key); + + /** + * Get the value of a key. + * + * @param key the key + * @return the value of the key, or empty if the key does not exist + */ + Optional get(ByteString key); + + /** + * Get an iterator for the KVRange sharing same consistent-view. + * + * @return the iterator. + */ + IKVIterator iterator(); + + /** + * Get an iterator for a sub-boundary of the KVRange sharing same consistent-view. + * The sub-boundary is calculated as the intersection of the given boundary and the KVRange's current boundary. + * + * @param boundary the boundary + * @return the iterator. + */ + IKVIterator iterator(Boundary boundary); + + /** + * Close the reader and release all resources. + */ + void close(); +} diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/api/IKVRangeReader.java b/base-kv/base-kv-store-coproc-api/src/main/java/org/apache/bifromq/basekv/store/api/IKVRangeRefreshableReader.java similarity index 76% rename from base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/api/IKVRangeReader.java rename to base-kv/base-kv-store-coproc-api/src/main/java/org/apache/bifromq/basekv/store/api/IKVRangeRefreshableReader.java index f01edf83e..0f39011ce 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/api/IKVRangeReader.java +++ b/base-kv/base-kv-store-coproc-api/src/main/java/org/apache/bifromq/basekv/store/api/IKVRangeRefreshableReader.java @@ -14,15 +14,17 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.store.api; /** - * Thread-Unsafe reader to access the consistent view of KVRange. + * A KVRange reader that can be refreshed to the latest consistent-view. */ -public interface IKVRangeReader extends IKVRangeMetadata { - - IKVReader newDataReader(); +public interface IKVRangeRefreshableReader extends IKVRangeReader { + /** + * Refresh the reader to the latest consistent-view. + */ + void refresh(); } diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/api/IKVWriter.java b/base-kv/base-kv-store-coproc-api/src/main/java/org/apache/bifromq/basekv/store/api/IKVWriter.java similarity index 86% rename from base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/api/IKVWriter.java rename to base-kv/base-kv-store-coproc-api/src/main/java/org/apache/bifromq/basekv/store/api/IKVWriter.java index bc26ca4c6..25576a627 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/api/IKVWriter.java +++ b/base-kv/base-kv-store-coproc-api/src/main/java/org/apache/bifromq/basekv/store/api/IKVWriter.java @@ -14,14 +14,17 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.store.api; -import org.apache.bifromq.basekv.proto.Boundary; import com.google.protobuf.ByteString; +import org.apache.bifromq.basekv.proto.Boundary; +/** + * The writer for KV mutation. + */ public interface IKVWriter { void delete(ByteString key); @@ -31,16 +34,16 @@ public interface IKVWriter { /** * Insert a non-exist key value pair, if the key is already exist, the result is undefined. * - * @param key - * @param value + * @param key the key to insert + * @param value the value to insert */ void insert(ByteString key, ByteString value); /** * Put a key value pair, if the key is existed, its value will be overridden. * - * @param key - * @param value + * @param key the key to put + * @param value the value to put */ void put(ByteString key, ByteString value); } diff --git a/base-kv/base-kv-store-rpc-definition/src/main/java/org/apache/bifromq/basekv/RPCBluePrint.java b/base-kv/base-kv-store-rpc-definition/src/main/java/org/apache/bifromq/basekv/RPCBluePrint.java index f31187930..43122b069 100644 --- a/base-kv/base-kv-store-rpc-definition/src/main/java/org/apache/bifromq/basekv/RPCBluePrint.java +++ b/base-kv/base-kv-store-rpc-definition/src/main/java/org/apache/bifromq/basekv/RPCBluePrint.java @@ -19,6 +19,7 @@ package org.apache.bifromq.basekv; +import static io.grpc.MethodDescriptor.generateFullMethodName; import static org.apache.bifromq.basekv.store.proto.BaseKVStoreServiceGrpc.getBootstrapMethod; import static org.apache.bifromq.basekv.store.proto.BaseKVStoreServiceGrpc.getChangeReplicaConfigMethod; import static org.apache.bifromq.basekv.store.proto.BaseKVStoreServiceGrpc.getExecuteMethod; @@ -28,16 +29,16 @@ import static org.apache.bifromq.basekv.store.proto.BaseKVStoreServiceGrpc.getRecoverMethod; import static org.apache.bifromq.basekv.store.proto.BaseKVStoreServiceGrpc.getSplitMethod; import static org.apache.bifromq.basekv.store.proto.BaseKVStoreServiceGrpc.getTransferLeadershipMethod; -import static io.grpc.MethodDescriptor.generateFullMethodName; +import static org.apache.bifromq.basekv.store.proto.BaseKVStoreServiceGrpc.getZombieQuitMethod; -import org.apache.bifromq.basekv.store.proto.BaseKVStoreServiceGrpc; -import org.apache.bifromq.baserpc.BluePrint; import io.grpc.MethodDescriptor; import io.grpc.ServerMethodDefinition; import io.grpc.ServerServiceDefinition; import io.grpc.ServiceDescriptor; import java.util.HashMap; import java.util.Map; +import org.apache.bifromq.basekv.store.proto.BaseKVStoreServiceGrpc; +import org.apache.bifromq.baserpc.BluePrint; public class RPCBluePrint { public static ServerServiceDefinition scope(ServerServiceDefinition definition, String clusterId) { @@ -74,6 +75,7 @@ public static BluePrint build(String clusterId) { .serviceDescriptor(serviceDesc) .methodSemantic(methodMap.get(getBootstrapMethod()), BluePrint.DDUnaryMethod.getInstance()) .methodSemantic(methodMap.get(getRecoverMethod()), BluePrint.DDUnaryMethod.getInstance()) + .methodSemantic(methodMap.get(getZombieQuitMethod()), BluePrint.DDUnaryMethod.getInstance()) .methodSemantic(methodMap.get(getChangeReplicaConfigMethod()), BluePrint.DDUnaryMethod.getInstance()) .methodSemantic(methodMap.get(getSplitMethod()), BluePrint.DDUnaryMethod.getInstance()) .methodSemantic(methodMap.get(getMergeMethod()), BluePrint.DDUnaryMethod.getInstance()) diff --git a/base-kv/base-kv-store-rpc-definition/src/main/proto/basekv/BaseKVStoreService.proto b/base-kv/base-kv-store-rpc-definition/src/main/proto/basekv/BaseKVStoreService.proto index a6bf304be..05ba100e2 100644 --- a/base-kv/base-kv-store-rpc-definition/src/main/proto/basekv/BaseKVStoreService.proto +++ b/base-kv/base-kv-store-rpc-definition/src/main/proto/basekv/BaseKVStoreService.proto @@ -31,6 +31,7 @@ option optimize_for = SPEED; service BaseKVStoreService{ rpc bootstrap(BootstrapRequest) returns (BootstrapReply); rpc recover(RecoverRequest) returns(RecoverReply); + rpc zombieQuit(ZombieQuitRequest) returns(ZombieQuitReply); rpc transferLeadership(TransferLeadershipRequest) returns (TransferLeadershipReply); rpc changeReplicaConfig(ChangeReplicaConfigRequest) returns (ChangeReplicaConfigReply); rpc split(KVRangeSplitRequest) returns (KVRangeSplitReply); @@ -74,6 +75,22 @@ message RecoverReply{ Result result = 2; } +message ZombieQuitRequest{ + uint64 reqId = 1; + basekv.KVRangeId kvRangeId = 2; +} + +message ZombieQuitReply{ + enum Result { + Ok = 0; + NotFound = 1; + Error = 2; + } + uint64 reqId = 1; + Result result = 2; + bool quit = 3; +} + enum ReplyCode{ Ok = 0; BadVersion = 1; @@ -127,6 +144,7 @@ message KVRangeMergeRequest{ uint64 ver = 2; basekv.KVRangeId mergerId = 3; basekv.KVRangeId mergeeId = 4; + repeated string mergeeVoters = 5; // the mergee voters to contact to trigger the merge workflow } message KVRangeMergeReply{ diff --git a/base-kv/base-kv-store-rpc-definition/src/main/proto/basekv/Command.proto b/base-kv/base-kv-store-rpc-definition/src/main/proto/basekv/Command.proto index f55d53621..f33979feb 100644 --- a/base-kv/base-kv-store-rpc-definition/src/main/proto/basekv/Command.proto +++ b/base-kv/base-kv-store-rpc-definition/src/main/proto/basekv/Command.proto @@ -39,6 +39,7 @@ message SplitRange { message PrepareMergeWith{ basekv.KVRangeId mergeeId = 1; + repeated string voters = 2; // mergee's voter set seen at the time when request was made } message CancelMerging{ @@ -56,6 +57,7 @@ message Merge{ uint64 mergeeVer = 2; basekv.Boundary Boundary = 3; string storeId = 4; + raft.ClusterConfig config = 5; // mergee's config at the moment } message MergeDone{ diff --git a/base-kv/base-kv-store-rpc-definition/src/main/proto/basekv/StoreMessage.proto b/base-kv/base-kv-store-rpc-definition/src/main/proto/basekv/StoreMessage.proto index b9a57a296..55174ec66 100644 --- a/base-kv/base-kv-store-rpc-definition/src/main/proto/basekv/StoreMessage.proto +++ b/base-kv/base-kv-store-rpc-definition/src/main/proto/basekv/StoreMessage.proto @@ -38,6 +38,11 @@ message SnapshotSyncRequest{ KVRangeSnapshot snapshot = 2; } +message DataMergeRequest{ + string sessionId = 1; + KVRangeId mergerId = 2; +} + message KVPair{ bytes key = 1; bytes value = 2; @@ -50,6 +55,7 @@ message SaveSnapshotDataRequest{ More = 0; End = 1; Error = 2; + NotFound = 3; // no range found } Flag flag = 3; repeated KVPair kv = 4; @@ -91,7 +97,7 @@ message PrepareMergeToRequest{ message PrepareMergeToReply{ string taskId = 1; - bool accept = 2; + optional bool accept = 2; // null means no mergee found, true means accepted, false means rejected and needs retry } message MergeRequest{ @@ -101,16 +107,16 @@ message MergeRequest{ uint64 mergeeVer = 4; basekv.Boundary boundary = 5; string storeId = 6; + raft.ClusterConfig config = 7; // mergee's config at the moment } message MergeReply{ string taskId = 1; - bool accept = 2; + optional bool accept = 2; // null means no merger found, true means accepted, false means rejected and needs retry } message MergeDoneRequest{ string taskId = 1; - uint64 reqId = 2; basekv.KVRangeId id = 3; // merger's id uint64 mergeeVer = 4; string storeId = 5; @@ -118,7 +124,7 @@ message MergeDoneRequest{ message MergeDoneReply{ string taskId = 1; - bool accept = 2; + optional bool accept = 2; // null means no mergee found, true means accepted, false means rejected and needs retry } message CancelMergingRequest{ @@ -130,12 +136,21 @@ message CancelMergingRequest{ message CancelMergingReply{ string taskId = 1; - bool accept = 2; + optional bool accept = 2; // null means no merger/mergee found, true means accepted, false means rejected and needs retry +} + +message MergeHelpRequest{ + string taskId = 1; + basekv.KVRangeId mergeeId = 2; + uint64 ver = 3; + basekv.Boundary boundary = 4; + raft.ClusterConfig config = 5; } +// used for both direction message KVRangeMessage{ - KVRangeId rangeId = 1; - optional string hostStoreId = 2; // null for broadcast + optional KVRangeId rangeId = 1; // send direction: the target rangeId or null for broadcast; recv direction: the source rangeId + optional string hostStoreId = 2; // send direction: the target store, or null for broadcast; recv direction: the source storeId oneof PayloadType{ WALRaftMessages walRaftMessages = 4; SnapshotSyncRequest snapshotSyncRequest = 5; @@ -151,6 +166,8 @@ message KVRangeMessage{ CancelMergingReply cancelMergingReply = 15; MergeDoneRequest mergeDoneRequest = 16; MergeDoneReply mergeDoneReply = 17; + DataMergeRequest dataMergeRequest = 18; + MergeHelpRequest mergeHelpRequest = 19; } } diff --git a/base-kv/base-kv-store-server/pom.xml b/base-kv/base-kv-store-server/pom.xml index 5514a7e45..4990c4f22 100644 --- a/base-kv/base-kv-store-server/pom.xml +++ b/base-kv/base-kv-store-server/pom.xml @@ -55,6 +55,14 @@ org.apache.bifromq base-crdt-service + + org.apache.bifromq + base-kv-store-coproc-api + + + org.apache.bifromq + base-kv-split-hinter-spi + org.apache.bifromq base-kv-meta-service @@ -65,7 +73,15 @@ org.apache.bifromq - base-kv-local-engine + base-kv-local-engine-spi + + + org.apache.bifromq + base-kv-local-engine-rocksdb + + + org.apache.bifromq + base-kv-local-engine-memory org.apache.bifromq @@ -129,4 +145,4 @@ test - \ No newline at end of file + diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/server/AgentHostStoreMessenger.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/server/AgentHostStoreMessenger.java index cb3a5abb9..f8dab46b0 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/server/AgentHostStoreMessenger.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/server/AgentHostStoreMessenger.java @@ -23,6 +23,9 @@ import com.google.protobuf.InvalidProtocolBufferException; import io.reactivex.rxjava3.core.Observable; +import io.reactivex.rxjava3.disposables.CompositeDisposable; +import io.reactivex.rxjava3.subjects.PublishSubject; +import io.reactivex.rxjava3.subjects.Subject; import java.util.Optional; import java.util.concurrent.atomic.AtomicBoolean; import org.apache.bifromq.basecluster.IAgentHost; @@ -41,8 +44,10 @@ class AgentHostStoreMessenger implements IStoreMessenger { private final IAgentHost agentHost; private final IAgent agent; private final IAgentMember agentMember; + private final Subject receiveSubject = PublishSubject.create().toSerialized(); private final String clusterId; private final String storeId; + private final CompositeDisposable disposables = new CompositeDisposable(); AgentHostStoreMessenger(IAgentHost agentHost, String clusterId, String storeId) { this.agentHost = agentHost; @@ -51,6 +56,23 @@ class AgentHostStoreMessenger implements IStoreMessenger { this.agent = agentHost.host(toBaseKVAgentId(clusterId)); this.agentMember = agent.register(storeId); log = MDCLogger.getLogger(AgentHostStoreMessenger.class, "clusterId", clusterId, "storeId", storeId); + disposables.add(agentMember.receive() + .mapOptional(agentMessage -> { + try { + StoreMessage message = ZeroCopyParser.parse(agentMessage.getPayload(), StoreMessage.parser()); + KVRangeMessage payload = message.getPayload(); + if (!payload.hasHostStoreId()) { + // this is a broadcast message + message = message.toBuilder().setPayload(payload.toBuilder() + .setHostStoreId(storeId) + .build()).build(); + } + return Optional.of(message); + } catch (InvalidProtocolBufferException e) { + log.warn("Unable to parse store message", e); + return Optional.empty(); + } + }).subscribe(receiveSubject::onNext)); } static String agentId(String clusterId) { @@ -60,6 +82,10 @@ static String agentId(String clusterId) { @Override public void send(StoreMessage message) { if (message.getPayload().hasHostStoreId()) { + if (message.getPayload().getHostStoreId().equals(storeId)) { + receiveSubject.onNext(message); + return; + } agentMember.multicast(message.getPayload().getHostStoreId(), message.toByteString(), true); } else { agentMember.broadcast(message.toByteString(), true); @@ -68,28 +94,14 @@ public void send(StoreMessage message) { @Override public Observable receive() { - return agentMember.receive() - .mapOptional(agentMessage -> { - try { - StoreMessage message = ZeroCopyParser.parse(agentMessage.getPayload(), StoreMessage.parser()); - KVRangeMessage payload = message.getPayload(); - if (!payload.hasHostStoreId()) { - // this is a broadcast message - message = message.toBuilder().setPayload(payload.toBuilder() - .setHostStoreId(storeId) - .build()).build(); - } - return Optional.of(message); - } catch (InvalidProtocolBufferException e) { - log.warn("Unable to parse store message", e); - return Optional.empty(); - } - }); + return receiveSubject; } @Override public void close() { if (stopped.compareAndSet(false, true)) { + disposables.dispose(); + receiveSubject.onComplete(); agent.deregister(agentMember).join(); } } diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/server/BaseKVStoreService.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/server/BaseKVStoreService.java index b5a2f83ab..111e75ea5 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/server/BaseKVStoreService.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/server/BaseKVStoreService.java @@ -52,6 +52,8 @@ import org.apache.bifromq.basekv.store.proto.ReplyCode; import org.apache.bifromq.basekv.store.proto.TransferLeadershipReply; import org.apache.bifromq.basekv.store.proto.TransferLeadershipRequest; +import org.apache.bifromq.basekv.store.proto.ZombieQuitReply; +import org.apache.bifromq.basekv.store.proto.ZombieQuitRequest; import org.apache.bifromq.basekv.utils.KVRangeIdUtil; import org.apache.bifromq.logger.MDCLogger; import org.slf4j.Logger; @@ -150,6 +152,22 @@ public void recover(RecoverRequest request, StreamObserver respons .handle((v, e) -> RecoverReply.newBuilder().setReqId(request.getReqId()).build()), responseObserver); } + @Override + public void zombieQuit(ZombieQuitRequest request, StreamObserver responseObserver) { + response(tenantId -> kvRangeStore.quit(request.getKvRangeId()) + .thenApply(result -> ZombieQuitReply.newBuilder() + .setReqId(request.getReqId()) + .setResult(ZombieQuitReply.Result.Ok) + .setQuit(result) + .build()) + .exceptionally(e -> ZombieQuitReply.newBuilder() + .setReqId(request.getReqId()) + .setResult(e instanceof KVRangeStoreException + ? ZombieQuitReply.Result.NotFound : ZombieQuitReply.Result.Error) + .build()) + .handle((v, e) -> ZombieQuitReply.newBuilder().setReqId(request.getReqId()).build()), responseObserver); + } + @Override public void transferLeadership(TransferLeadershipRequest request, StreamObserver responseObserver) { @@ -232,7 +250,8 @@ public void split(KVRangeSplitRequest request, StreamObserver @Override public void merge(KVRangeMergeRequest request, StreamObserver responseObserver) { - response(tenantId -> kvRangeStore.merge(request.getVer(), request.getMergerId(), request.getMergeeId()) + response(tenantId -> kvRangeStore.merge(request.getVer(), + request.getMergerId(), request.getMergeeId(), Sets.newHashSet(request.getMergeeVotersList())) .thenApply(result -> KVRangeMergeReply.newBuilder() .setReqId(request.getReqId()) .setCode(ReplyCode.Ok) @@ -271,7 +290,8 @@ public StreamObserver linearizedQuery(StreamObserver { @@ -125,7 +132,10 @@ private void submitForExecution() { .build(); })) .thenAccept(v -> { - task.onDone.complete(v); + // complete only if not canceled concurrently + if (!task.onDone.isDone()) { + task.onDone.complete(v); + } executing.set(false); if (!requests.isEmpty()) { submitForExecution(); diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/IKVRangeStore.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/IKVRangeStore.java index adbed20c4..b3ab913fe 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/IKVRangeStore.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/IKVRangeStore.java @@ -19,6 +19,12 @@ package org.apache.bifromq.basekv.store; +import com.google.protobuf.ByteString; +import io.reactivex.rxjava3.core.Observable; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CompletionStage; import org.apache.bifromq.basekv.proto.Boundary; import org.apache.bifromq.basekv.proto.KVRangeId; import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor; @@ -26,12 +32,6 @@ import org.apache.bifromq.basekv.store.proto.ROCoProcOutput; import org.apache.bifromq.basekv.store.proto.RWCoProcInput; import org.apache.bifromq.basekv.store.proto.RWCoProcOutput; -import com.google.protobuf.ByteString; -import io.reactivex.rxjava3.core.Observable; -import java.util.Optional; -import java.util.Set; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.CompletionStage; /** * The interface of a KVRangeStore, which is responsible for hosting a KVRange. @@ -86,6 +86,14 @@ public interface IKVRangeStore { */ CompletionStage recover(KVRangeId rangeId); + /** + * Quit a zombie KVRange hosted in current store. A 'zombie' state is detected when the KVRange kept in Candidate state. + * + * @param rangeId the id of the zombie KVRange + * @return the future of the quit task + */ + CompletionStage quit(KVRangeId rangeId); + /** * The observable of the KVRangeStoreDescriptor, which is used for store discovery. * @@ -129,9 +137,10 @@ public interface IKVRangeStore { * @param ver the version of the KVRange * @param mergerId the id of the KVRange to be merged * @param mergeeId the id of the KVRange to be merged into + * @param mergeeVoters the voters of the mergee KVRange * @return the future of the task */ - CompletionStage merge(long ver, KVRangeId mergerId, KVRangeId mergeeId); + CompletionStage merge(long ver, KVRangeId mergerId, KVRangeId mergeeId, Set mergeeVoters); /** * Check if the specified key exists in the KVRange. diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/KVRangeMessenger.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/KVRangeMessenger.java index 999c3811c..83f51ef3e 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/KVRangeMessenger.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/KVRangeMessenger.java @@ -19,16 +19,17 @@ package org.apache.bifromq.basekv.store; -import org.apache.bifromq.basekv.proto.KVRangeId; -import org.apache.bifromq.basekv.proto.KVRangeMessage; -import org.apache.bifromq.basekv.proto.StoreMessage; -import org.apache.bifromq.basekv.store.exception.KVRangeException; -import org.apache.bifromq.basekv.store.range.IKVRangeMessenger; import io.reactivex.rxjava3.core.Observable; import io.reactivex.rxjava3.disposables.Disposable; import java.util.Optional; import java.util.concurrent.CompletableFuture; import java.util.function.Predicate; +import org.apache.bifromq.base.util.CascadeCancelCompletableFuture; +import org.apache.bifromq.basekv.proto.KVRangeId; +import org.apache.bifromq.basekv.proto.KVRangeMessage; +import org.apache.bifromq.basekv.proto.StoreMessage; +import org.apache.bifromq.basekv.store.exception.KVRangeException; +import org.apache.bifromq.basekv.store.range.IKVRangeMessenger; public class KVRangeMessenger implements IKVRangeMessenger { private final String id; @@ -53,10 +54,9 @@ public void send(KVRangeMessage message) { @Override public Observable receive() { return messenger.receive().mapOptional(storeMessage -> { - assert storeMessage.getFrom() != null; - assert storeMessage.hasSrcRange(); KVRangeMessage payload = storeMessage.getPayload(); - if (!payload.getHostStoreId().equals(id) || !payload.getRangeId().equals(rangeId)) { + if (!payload.getHostStoreId().equals(id) + || (payload.hasRangeId() && !payload.getRangeId().equals(rangeId))) { return Optional.empty(); } // swap the origin @@ -87,6 +87,6 @@ public CompletableFuture once(Predicate conditio }); onDone.whenComplete((v, e) -> disposable.dispose()); - return onDone; + return CascadeCancelCompletableFuture.fromRoot(onDone); } } diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/KVRangeStore.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/KVRangeStore.java index a3bdabe5d..32e93e134 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/KVRangeStore.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/KVRangeStore.java @@ -14,13 +14,14 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.store; import static java.util.Collections.emptyList; import static org.apache.bifromq.basekv.InProcStores.regInProcStore; +import static org.apache.bifromq.basekv.proto.State.StateType.NoUse; import static org.apache.bifromq.basekv.proto.State.StateType.Normal; import static org.apache.bifromq.basekv.store.exception.KVRangeStoreException.rangeNotFound; import static org.apache.bifromq.basekv.store.util.ExecutorServiceUtil.awaitShutdown; @@ -52,6 +53,7 @@ import java.util.concurrent.atomic.AtomicReference; import java.util.concurrent.atomic.LongAdder; import java.util.stream.Collectors; +import javax.annotation.Nullable; import lombok.NonNull; import org.apache.bifromq.base.util.AsyncRunner; import org.apache.bifromq.baseenv.EnvProvider; @@ -67,6 +69,10 @@ import org.apache.bifromq.basekv.proto.KVRangeMessage; import org.apache.bifromq.basekv.proto.KVRangeSnapshot; import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor; +import org.apache.bifromq.basekv.proto.MergeDoneReply; +import org.apache.bifromq.basekv.proto.MergeReply; +import org.apache.bifromq.basekv.proto.PrepareMergeToReply; +import org.apache.bifromq.basekv.proto.SaveSnapshotDataRequest; import org.apache.bifromq.basekv.proto.State; import org.apache.bifromq.basekv.proto.StoreMessage; import org.apache.bifromq.basekv.raft.proto.ClusterConfig; @@ -80,8 +86,11 @@ import org.apache.bifromq.basekv.store.proto.RWCoProcOutput; import org.apache.bifromq.basekv.store.range.IKVRange; import org.apache.bifromq.basekv.store.range.IKVRangeFSM; -import org.apache.bifromq.basekv.store.range.KVRange; import org.apache.bifromq.basekv.store.range.KVRangeFSM; +import org.apache.bifromq.basekv.store.range.KVRangeFactory; +import org.apache.bifromq.basekv.store.range.hinter.IKVRangeSplitHinter; +import org.apache.bifromq.basekv.store.range.hinter.SplitHinterContext; +import org.apache.bifromq.basekv.store.range.hinter.SplitHinterRegistry; import org.apache.bifromq.basekv.store.stats.IStatsCollector; import org.apache.bifromq.basekv.store.wal.IKVRangeWALStore; import org.apache.bifromq.basekv.store.wal.KVRangeWALStorageEngine; @@ -110,6 +119,7 @@ public class KVRangeStore implements IKVRangeStore { private final KVRangeStoreOptions opts; private final MetricsManager metricsManager; private final Map attributes; + private final SplitHinterRegistry splitHinterRegistry; private volatile ScheduledFuture tickFuture; private IStoreMessenger messenger; @@ -123,8 +133,8 @@ public KVRangeStore(String clusterId, this.clusterId = clusterId; this.coProcFactory = coProcFactory; this.opts = opts.toBuilder().build(); - this.walStorageEngine = - new KVRangeWALStorageEngine(clusterId, opts.getOverrideIdentity(), opts.getWalEngineConfigurator()); + this.walStorageEngine = new KVRangeWALStorageEngine(clusterId, opts.getOverrideIdentity(), + opts.getWalEngineType(), opts.getWalEngineConf()); id = walStorageEngine.id(); String[] tags = new String[] {"clusterId", clusterId, "storeId", id}; log = MDCLogger.getLogger(KVRangeStore.class, tags); @@ -134,7 +144,7 @@ public KVRangeStore(String clusterId, log.warn("KVRangeStore has been initialized with identity[{}], the override[{}] is ignored", id, opts.getOverrideIdentity()); } - kvRangeEngine = KVEngineFactory.createCPable(null, opts.getDataEngineConfigurator()); + kvRangeEngine = KVEngineFactory.createCPable(null, opts.getDataEngineType(), opts.getDataEngineConf()); this.queryExecutor = queryExecutor; this.bgTaskExecutor = bgTaskExecutor; this.tickExecutor = ExecutorServiceMetrics.monitor(Metrics.globalRegistry, @@ -148,6 +158,7 @@ public KVRangeStore(String clusterId, this.mgmtTaskRunner = new AsyncRunner(mgmtTaskExecutor); this.metricsManager = new MetricsManager(clusterId, id); this.attributes = attributes; + this.splitHinterRegistry = new SplitHinterRegistry(opts.getSplitHinterFactoryConfig(), log); storeStatsCollector = new KVRangeStoreStatsCollector(opts, Duration.ofSeconds(opts.getStatsCollectIntervalSec()), this.bgTaskExecutor); @@ -192,22 +203,23 @@ public void start(IStoreMessenger messenger) { private void loadExisting() { mgmtTaskRunner.add(() -> { - kvRangeEngine.spaces().forEach((id, keyRange) -> { + kvRangeEngine.spaces().forEach((id, kvSpace) -> { KVRangeId rangeId = KVRangeIdUtil.fromString(id); if (walStorageEngine.has(rangeId)) { IKVRangeWALStore walStore = walStorageEngine.get(rangeId); - IKVRange range = new KVRange(rangeId, keyRange); + String[] rangeTags = rangeTags(rangeId); + IKVRange range = buildKVRange(rangeId, kvSpace, null, rangeTags); // verify the integrity of wal and range state if (!validate(range, walStore)) { log.warn("Destroy inconsistent KVRange: {}", id); - keyRange.destroy(); + kvSpace.destroy(); walStore.destroy(); return; } - putAndOpen(loadKVRangeFSM(rangeId, range, walStore)).join(); + putAndOpen(loadKVRangeFSM(rangeId, range, walStore, rangeTags)).join(); } else { log.debug("Destroy orphan KVRange: {}", id); - keyRange.destroy(); + kvSpace.destroy(); } }); updateDescriptorList(); @@ -215,8 +227,8 @@ private void loadExisting() { } private boolean validate(IKVRange range, IKVRangeWALStore walStore) { - return range.lastAppliedIndex() <= -1 - || range.lastAppliedIndex() >= walStore.latestSnapshot().getIndex(); + long lastAppliedIndex = range.lastAppliedIndex().blockingFirst(); + return lastAppliedIndex <= -1 || lastAppliedIndex >= walStore.latestSnapshot().getIndex(); } @Override @@ -225,6 +237,7 @@ public void stop() { try { log.debug("Stopping KVRange store"); log.debug("Await for all management tasks to finish"); + tickFuture.get(); mgmtTaskRunner.awaitDone().toCompletableFuture().join(); List> closeFutures = new ArrayList<>(); try { @@ -245,7 +258,6 @@ public void stop() { descriptorListSubject.onComplete(); status.set(Status.CLOSED); status.set(Status.TERMINATING); - tickFuture.get(); } catch (Throwable e) { log.error("Error occurred during stopping range store", e); } finally { @@ -318,6 +330,18 @@ public CompletionStage recover(KVRangeId rangeId) { return CompletableFuture.failedFuture(rangeNotFound()); } + @Override + public CompletionStage quit(KVRangeId rangeId) { + checkStarted(); + RangeFSMHolder holder = kvRangeMap.get(rangeId); + if (holder != null) { + metricsManager.runningQuitNum.increment(); + return holder.fsm.quit() + .whenComplete((v, e) -> metricsManager.runningQuitNum.decrement()); + } + return CompletableFuture.failedFuture(rangeNotFound()); + } + @Override public Observable describe() { checkStarted(); @@ -344,24 +368,56 @@ public Observable describe() { private void receive(StoreMessage storeMessage) { if (status.get() == Status.STARTED) { KVRangeMessage payload = storeMessage.getPayload(); - if (payload.hasEnsureRange()) { - EnsureRange request = storeMessage.getPayload().getEnsureRange(); - mgmtTaskRunner.add(() -> { - if (status.get() != Status.STARTED) { - return CompletableFuture.completedFuture(null); - } - KVRangeId rangeId = payload.getRangeId(); - RangeFSMHolder holder = kvRangeMap.get(rangeId); - try { - Snapshot walSnapshot = request.getInitSnapshot(); - KVRangeSnapshot rangeSnapshot = KVRangeSnapshot.parseFrom(walSnapshot.getData()); - if (holder != null) { - // pin the range - if (holder.fsm.ver() < request.getVer()) { - log.debug("Range already exists, pinning it: rangeId={}", - KVRangeIdUtil.toString(rangeId)); - holder.pin(request.getVer(), walSnapshot, rangeSnapshot); + switch (payload.getPayloadTypeCase()) { + case ENSURERANGE -> { + EnsureRange request = storeMessage.getPayload().getEnsureRange(); + mgmtTaskRunner.add(() -> { + if (status.get() != Status.STARTED) { + return CompletableFuture.completedFuture(null); + } + KVRangeId rangeId = payload.getRangeId(); + RangeFSMHolder holder = kvRangeMap.get(rangeId); + try { + Snapshot walSnapshot = request.getInitSnapshot(); + KVRangeSnapshot rangeSnapshot = KVRangeSnapshot.parseFrom(walSnapshot.getData()); + if (holder != null) { + // pin the range + if (holder.fsm.ver() < request.getVer()) { + log.debug("Range already exists, pinning it: rangeId={}", + KVRangeIdUtil.toString(rangeId)); + holder.pin(request.getVer(), walSnapshot, rangeSnapshot); + } + messenger.send(StoreMessage.newBuilder() + .setFrom(id) + .setSrcRange(payload.getRangeId()) + .setPayload(KVRangeMessage.newBuilder() + .setRangeId(storeMessage.getSrcRange()) + .setHostStoreId(storeMessage.getFrom()) + .setEnsureRangeReply(EnsureRangeReply.newBuilder() + .setResult(EnsureRangeReply.Result.OK).build()) + .build()) + .build()); + return CompletableFuture.completedFuture(null); + } else { + return ensureRange(rangeId, walSnapshot, rangeSnapshot) + .whenComplete((v, e) -> { + updateDescriptorList(); + messenger.send(StoreMessage.newBuilder() + .setFrom(id) + .setSrcRange(payload.getRangeId()) + .setPayload(KVRangeMessage.newBuilder() + .setRangeId(storeMessage.getSrcRange()) + .setHostStoreId(storeMessage.getFrom()) + .setEnsureRangeReply(EnsureRangeReply.newBuilder() + .setResult(EnsureRangeReply.Result.OK) + .build()) + .build()) + .build()); + }); } + } catch (Throwable e) { + // should never happen + log.error("Unexpected error", e); messenger.send(StoreMessage.newBuilder() .setFrom(id) .setSrcRange(payload.getRangeId()) @@ -369,44 +425,93 @@ private void receive(StoreMessage storeMessage) { .setRangeId(storeMessage.getSrcRange()) .setHostStoreId(storeMessage.getFrom()) .setEnsureRangeReply(EnsureRangeReply.newBuilder() - .setResult(EnsureRangeReply.Result.OK).build()) + .setResult(EnsureRangeReply.Result.Error) + .build()) .build()) .build()); return CompletableFuture.completedFuture(null); - } else { - return ensureRange(rangeId, walSnapshot, rangeSnapshot) - .whenComplete((v, e) -> { - updateDescriptorList(); - messenger.send(StoreMessage.newBuilder() - .setFrom(id) - .setSrcRange(payload.getRangeId()) - .setPayload(KVRangeMessage.newBuilder() - .setRangeId(storeMessage.getSrcRange()) - .setHostStoreId(storeMessage.getFrom()) - .setEnsureRangeReply(EnsureRangeReply.newBuilder() - .setResult(EnsureRangeReply.Result.OK) - .build()) - .build()) - .build()); - }); } - } catch (Throwable e) { - // should never happen - log.error("Unexpected error", e); - messenger.send(StoreMessage.newBuilder() - .setFrom(id) - .setSrcRange(payload.getRangeId()) - .setPayload(KVRangeMessage.newBuilder() - .setRangeId(storeMessage.getSrcRange()) - .setHostStoreId(storeMessage.getFrom()) - .setEnsureRangeReply(EnsureRangeReply.newBuilder() - .setResult(EnsureRangeReply.Result.Error) + }); + } + case PREPAREMERGETOREQUEST -> { + KVRangeId mergeeRangeId = payload.getRangeId(); + mgmtTaskRunner.add(() -> { + if (!kvRangeMap.containsKey(mergeeRangeId)) { + messenger.send(StoreMessage.newBuilder() + .setFrom(id) + .setSrcRange(mergeeRangeId) // although it's not existing + .setPayload(KVRangeMessage.newBuilder() + .setRangeId(payload.getPrepareMergeToRequest().getId()) // the merger + .setHostStoreId(storeMessage.getFrom()) + .setPrepareMergeToReply(PrepareMergeToReply.newBuilder() + .setTaskId(payload.getPrepareMergeToRequest().getTaskId()) + .build()) + // do not set 'accept' field means the merger is not found + .build()) + .build()); + } + }); + } + case MERGEREQUEST -> { + KVRangeId mergerRangeId = payload.getRangeId(); + mgmtTaskRunner.add(() -> { + if (!kvRangeMap.containsKey(mergerRangeId)) { + messenger.send(StoreMessage.newBuilder() + .setFrom(id) + .setSrcRange(mergerRangeId) // although it's not existing + .setPayload(KVRangeMessage.newBuilder() + .setRangeId(payload.getMergeRequest().getMergeeId()) // the mergee + .setHostStoreId(storeMessage.getFrom()) + .setMergeReply(MergeReply.newBuilder() + .setTaskId(payload.getMergeRequest().getTaskId()) + // do not set 'accept' field means the merger is not found + .build()) .build()) - .build()) - .build()); - return CompletableFuture.completedFuture(null); - } - }); + .build()); + } + }); + } + case MERGEDONEREQUEST -> { + KVRangeId mergeeRangeId = payload.getRangeId(); + mgmtTaskRunner.add(() -> { + if (!kvRangeMap.containsKey(mergeeRangeId)) { + messenger.send(StoreMessage.newBuilder() + .setFrom(id) + .setSrcRange(mergeeRangeId) // although it's not existing + .setPayload(KVRangeMessage.newBuilder() + .setRangeId(payload.getMergeDoneRequest().getId()) // the merger + .setHostStoreId(storeMessage.getFrom()) + .setMergeDoneReply(MergeDoneReply.newBuilder() + .setTaskId(payload.getMergeDoneRequest().getTaskId()) + // do not set 'accept' field means the mergee is not found + .build()) + .build()) + .build()); + } + }); + } + case DATAMERGEREQUEST -> { + KVRangeId mergeeRangeId = payload.getRangeId(); + mgmtTaskRunner.add(() -> { + if (!kvRangeMap.containsKey(mergeeRangeId)) { + messenger.send(StoreMessage.newBuilder() + .setFrom(id) + .setSrcRange(mergeeRangeId) // although it's not existing + .setPayload(KVRangeMessage.newBuilder() + .setRangeId(payload.getDataMergeRequest().getMergerId()) // the merger + .setHostStoreId(storeMessage.getFrom()) + .setSaveSnapshotDataRequest(SaveSnapshotDataRequest.newBuilder() + .setSessionId(payload.getDataMergeRequest().getSessionId()) + .setFlag(SaveSnapshotDataRequest.Flag.NotFound) + .build()) + .build()) + .build()); + } + }); + } + default -> { + // ignore other messages + } } } } @@ -449,12 +554,13 @@ public CompletionStage split(long ver, KVRangeId rangeId, ByteString split } @Override - public CompletionStage merge(long ver, KVRangeId mergerId, KVRangeId mergeeId) { + public CompletionStage merge(long ver, KVRangeId mergerId, KVRangeId mergeeId, Set mergeeVoters) { checkStarted(); RangeFSMHolder holder = kvRangeMap.get(mergerId); if (holder != null) { metricsManager.runningMergeNum.increment(); - return holder.fsm.merge(ver, mergeeId).whenComplete((v, e) -> metricsManager.runningMergeNum.decrement()); + return holder.fsm.merge(ver, mergeeId, mergeeVoters) + .whenComplete((v, e) -> metricsManager.runningMergeNum.decrement()); } return CompletableFuture.failedFuture(rangeNotFound()); } @@ -526,6 +632,9 @@ public CompletionStage mutateCoProc(long ver, KVRangeId id, RWCo } private void scheduleTick(long delayInMS) { + if (status.get() != Status.STARTED && status.get() != Status.CLOSING) { + return; + } tickFuture = tickExecutor.schedule(this::tick, delayInMS, TimeUnit.MILLISECONDS); } @@ -545,8 +654,8 @@ private void tick() { private CompletableFuture ensureRange(KVRangeId rangeId, Snapshot walSnapshot, KVRangeSnapshot rangeSnapshot) { - ICPableKVSpace keyRange = kvRangeEngine.spaces().get(KVRangeIdUtil.toString(rangeId)); - if (keyRange == null) { + ICPableKVSpace kvSpace = kvRangeEngine.spaces().get(KVRangeIdUtil.toString(rangeId)); + if (kvSpace == null) { if (walStorageEngine.has(rangeId)) { log.warn("Destroy staled WALStore: rangeId={}", KVRangeIdUtil.toString(rangeId)); walStorageEngine.get(rangeId).destroy(); @@ -558,34 +667,54 @@ private CompletableFuture ensureRange(KVRangeId rangeId, Snapshot walSnaps if (walStore == null) { walStore = walStorageEngine.create(rangeId, walSnapshot); } - return putAndOpen(loadKVRangeFSM(rangeId, new KVRange(rangeId, keyRange), walStore)); + return putAndOpen( + loadKVRangeFSM(rangeId, KVRangeFactory.create(rangeId, kvSpace), walStore, rangeTags(rangeId))); } } - private void quitKVRange(IKVRangeFSM range) { + private void quitKVRange(IKVRangeFSM range, boolean reset) { if (status.get() != Status.STARTED) { return; } - CompletableFuture> afterDestroyed = mgmtTaskRunner.add(() -> { + CompletableFuture> afterDestroyed = mgmtTaskRunner.add(() -> { if (status.get() != Status.STARTED) { return CompletableFuture.failedFuture(new KVRangeStoreException("Not started")); } RangeFSMHolder holder = kvRangeMap.remove(range.id()); assert holder.fsm == range; + KVRangeId id = range.id(); long ver = range.ver(); + Boundary boundary = range.boundary(); log.debug("Destroy kvrange: rangeId={}", KVRangeIdUtil.toString(range.id())); return range.destroy() .thenApply(v -> { if (holder.pinned != null && holder.pinned.ver > ver) { return Optional.of(holder.pinned); } + if (reset) { + KVRangeSnapshot fsmSnapshot = KVRangeSnapshot.newBuilder() + .setVer(0) + .setId(id) + .setLastAppliedIndex(0) + .setBoundary(boundary) + .setState(State.newBuilder().setType(NoUse).build()) + .setClusterConfig(ClusterConfig.getDefaultInstance()) + .build(); + Snapshot walSnapshot = Snapshot.newBuilder() + .setTerm(0) + .setIndex(0) + .setClusterConfig(ClusterConfig.getDefaultInstance()) // empty voter set + .setData(fsmSnapshot.toByteString()) + .build(); + return Optional.of(new PinnedRange(ver, walSnapshot, fsmSnapshot)); + } return Optional.empty(); }); }); afterDestroyed.thenCompose(pinned -> mgmtTaskRunner.add(() -> { if (pinned.isPresent()) { - RangeFSMHolder.PinnedRange pinnedRange = pinned.get(); + PinnedRange pinnedRange = pinned.get(); log.debug("Recreate range after destroy: rangeId={}", KVRangeIdUtil.toString(range.id())); return ensureRange(range.id(), pinnedRange.walSnapshot, pinnedRange.fsmSnapshot); } @@ -594,33 +723,65 @@ private void quitKVRange(IKVRangeFSM range) { .thenAccept(v -> updateDescriptorList()); } - private IKVRangeFSM loadKVRangeFSM(KVRangeId rangeId, IKVRange range, IKVRangeWALStore walStore) { + private IKVRangeFSM loadKVRangeFSM(KVRangeId rangeId, IKVRange range, IKVRangeWALStore walStore, String... tags) { log.debug("Load existing kvrange: rangeId={}", KVRangeIdUtil.toString(rangeId)); - return new KVRangeFSM(clusterId, - id, - rangeId, - coProcFactory, - range, - walStore, - queryExecutor, - bgTaskExecutor, - opts.getKvRangeOptions(), - this::quitKVRange); + return buildKVRangeFSM(rangeId, range, walStore, tags); } private IKVRangeFSM createKVRangeFSM(KVRangeId rangeId, Snapshot snapshot, KVRangeSnapshot rangeSnapshot) { log.debug("Creating new kvrange: rangeId={}", KVRangeIdUtil.toString(rangeId)); + String[] rangeTags = rangeTags(rangeId); IKVRangeWALStore walStore = walStorageEngine.create(rangeId, snapshot); - return new KVRangeFSM(clusterId, + ICPableKVSpace kvSpace = kvRangeEngine.createIfMissing(KVRangeIdUtil.toString(rangeId)); + IKVRange kvRange = buildKVRange(rangeId, kvSpace, rangeSnapshot, rangeTags); + return buildKVRangeFSM(rangeId, kvRange, walStore, rangeTags); + } + + private IKVRange buildKVRange(KVRangeId rangeId, + ICPableKVSpace kvSpace, + @Nullable KVRangeSnapshot snapshot, + String... tags) { + if (snapshot == null) { + return KVRangeFactory.create(rangeId, kvSpace, tags); + } else { + return KVRangeFactory.create(rangeId, kvSpace, snapshot, tags); + } + } + + private String[] rangeTags(KVRangeId rangeId) { + return new String[] { + "clusterId", clusterId, + "storeId", id, + "rangeId", KVRangeIdUtil.toString(rangeId) + }; + } + + private IKVRangeFSM buildKVRangeFSM(KVRangeId rangeId, + IKVRange kvRange, + IKVRangeWALStore walStore, + String... tags) { + List hinters = splitHinterRegistry.createHinters( + SplitHinterContext.builder() + .clusterId(clusterId) + .storeId(id) + .id(rangeId) + .readerProvider(kvRange::newReader) + .tags(tags) + .build()); + return new KVRangeFSM( + clusterId, id, rangeId, coProcFactory, - new KVRange(rangeId, kvRangeEngine.createIfMissing(KVRangeIdUtil.toString(rangeId)), rangeSnapshot), + kvRange, walStore, queryExecutor, bgTaskExecutor, opts.getKvRangeOptions(), - this::quitKVRange); + hinters, + this::quitKVRange, + tags); + } private void updateDescriptorList() { @@ -648,6 +809,10 @@ private enum Status { TERMINATED // resource released } + record PinnedRange(long ver, Snapshot walSnapshot, KVRangeSnapshot fsmSnapshot) { + + } + private static class RangeFSMHolder { private final IKVRangeFSM fsm; private PinnedRange pinned; @@ -662,10 +827,6 @@ void pin(long ver, Snapshot walSnapshot, KVRangeSnapshot fsmSnapshot) { this.pinned = new PinnedRange(ver, walSnapshot, fsmSnapshot); } } - - record PinnedRange(long ver, Snapshot walSnapshot, KVRangeSnapshot fsmSnapshot) { - - } } private static class MetricsManager { @@ -674,6 +835,7 @@ private static class MetricsManager { private final LongAdder runningSplitNum; private final LongAdder runningMergeNum; private final LongAdder runningRecoverNum; + private final LongAdder runningQuitNum; private final LongAdder runningQueryNum; private final LongAdder runningMutateNum; @@ -689,6 +851,8 @@ private static class MetricsManager { Metrics.gauge("basekv.store.running", tags.and("cmd", "merge"), new LongAdder()); runningRecoverNum = Metrics.gauge("basekv.store.running", tags.and("cmd", "recover"), new LongAdder()); + runningQuitNum = + Metrics.gauge("basekv.store.running", tags.and("cmd", "quit"), new LongAdder()); runningQueryNum = Metrics.gauge("basekv.store.running", tags.and("cmd", "query"), new LongAdder()); runningMutateNum = diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/KVRangeStoreStatsCollector.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/KVRangeStoreStatsCollector.java index bdf2b21ff..52e006024 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/KVRangeStoreStatsCollector.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/KVRangeStoreStatsCollector.java @@ -19,16 +19,17 @@ package org.apache.bifromq.basekv.store; -import org.apache.bifromq.basekv.localengine.rocksdb.RocksDBCPableKVEngineConfigurator; -import org.apache.bifromq.basekv.localengine.rocksdb.RocksDBWALableKVEngineConfigurator; -import org.apache.bifromq.basekv.store.option.KVRangeStoreOptions; -import org.apache.bifromq.basekv.store.stats.StatsCollector; -import org.apache.bifromq.basekv.store.util.ProcessUtil; +import static org.apache.bifromq.basekv.localengine.StructUtil.strVal; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.DB_ROOT_DIR; + import java.io.File; import java.time.Duration; import java.util.Map; import java.util.concurrent.Executor; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.basekv.store.option.KVRangeStoreOptions; +import org.apache.bifromq.basekv.store.stats.StatsCollector; +import org.apache.bifromq.basekv.store.util.ProcessUtil; @Slf4j class KVRangeStoreStatsCollector extends StatsCollector { @@ -42,9 +43,9 @@ class KVRangeStoreStatsCollector extends StatsCollector { @Override protected void scrap(Map stats) { - if (opt.getDataEngineConfigurator() instanceof RocksDBCPableKVEngineConfigurator conf) { + if ("rocksdb".equalsIgnoreCase(opt.getDataEngineType())) { try { - File dbRootDir = new File(conf.dbRootDir()); + File dbRootDir = new File(strVal(opt.getDataEngineConf(), DB_ROOT_DIR)); long total = dbRootDir.getTotalSpace(); if (total > 0) { stats.put("db.usage", roundUsage(dbRootDir.getUsableSpace() / (double) total)); @@ -53,9 +54,9 @@ protected void scrap(Map stats) { log.error("Failed to calculate db usage", e); } } - if (opt.getWalEngineConfigurator() instanceof RocksDBWALableKVEngineConfigurator conf) { + if ("rocksdb".equalsIgnoreCase(opt.getWalEngineType())) { try { - File walRootDir = new File(conf.dbRootDir()); + File walRootDir = new File(strVal(opt.getDataEngineConf(), DB_ROOT_DIR)); long total = walRootDir.getTotalSpace(); if (total > 0) { stats.put("wal.usage", roundUsage(walRootDir.getUsableSpace() / (double) total)); diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/api/IKVCloseableReader.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/api/IKVCloseableReader.java deleted file mode 100644 index c248f32a8..000000000 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/api/IKVCloseableReader.java +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.bifromq.basekv.store.api; - -public interface IKVCloseableReader extends IKVReader, AutoCloseable { - @Override - void close(); -} diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/api/IKVReader.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/api/IKVReader.java deleted file mode 100644 index 4dbaa8bae..000000000 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/api/IKVReader.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.bifromq.basekv.store.api; - -import org.apache.bifromq.basekv.proto.Boundary; -import com.google.protobuf.ByteString; -import java.util.Optional; - -public interface IKVReader { - Boundary boundary(); - - long size(Boundary boundary); - - boolean exist(ByteString key); - - Optional get(ByteString key); - - IKVIterator iterator(); - - void refresh(); -} diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/option/KVRangeOptions.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/option/KVRangeOptions.java index fd4aafd79..0f5d5b218 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/option/KVRangeOptions.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/option/KVRangeOptions.java @@ -40,7 +40,7 @@ public class KVRangeOptions { @Builder.Default private int snapshotSyncBytesPerSec = 128 * 1024 * 1024; // 128MB @Builder.Default - private int compactWALThreshold = 10000; // the max number of logs before compaction + private int compactWALThreshold = 256 * 1024 * 1024; // the max log bytes before compaction @Builder.Default private int shrinkWALCheckIntervalSec = 60; @Builder.Default @@ -54,6 +54,8 @@ public class KVRangeOptions { @Builder.Default private int zombieTimeoutSec = 60; // 1min @Builder.Default + private int mergeTimeoutSec = 60; // 1min + @Builder.Default private RaftConfig walRaftConfig = new RaftConfig() .setPreVote(true) .setHeartbeatTimeoutTick(5) // 500ms diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/option/KVRangeStoreOptions.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/option/KVRangeStoreOptions.java index 6e6e278b8..e61dcd14c 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/option/KVRangeStoreOptions.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/option/KVRangeStoreOptions.java @@ -14,17 +14,18 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.store.option; -import org.apache.bifromq.basekv.localengine.ICPableKVEngineConfigurator; -import org.apache.bifromq.basekv.localengine.IWALableKVEngineConfigurator; -import org.apache.bifromq.basekv.localengine.rocksdb.RocksDBCPableKVEngineConfigurator; -import org.apache.bifromq.basekv.localengine.rocksdb.RocksDBWALableKVEngineConfigurator; -import org.apache.bifromq.basekv.store.util.ProcessUtil; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.DB_CHECKPOINT_ROOT_DIR; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.DB_ROOT_DIR; + +import com.google.protobuf.Struct; import java.nio.file.Paths; +import java.util.HashMap; +import java.util.Map; import lombok.AccessLevel; import lombok.AllArgsConstructor; import lombok.Builder; @@ -33,6 +34,11 @@ import lombok.Setter; import lombok.ToString; import lombok.experimental.Accessors; +import org.apache.bifromq.basehookloader.BaseHookLoader; +import org.apache.bifromq.basekv.localengine.StructUtil; +import org.apache.bifromq.basekv.localengine.spi.IKVEngineProvider; +import org.apache.bifromq.basekv.store.util.ProcessUtil; + @Accessors(chain = true) @Getter @@ -48,17 +54,49 @@ public class KVRangeStoreOptions { @Builder.Default private int statsCollectIntervalSec = 5; + // Struct-only engine spec + @Builder.Default + private String dataEngineType = "rocksdb"; @Builder.Default - private ICPableKVEngineConfigurator dataEngineConfigurator = RocksDBCPableKVEngineConfigurator.builder() - .dbRootDir(Paths.get(System.getProperty("java.io.tmpdir"), "basekv", - ProcessUtil.processId(), "data").toString()) - .dbCheckpointRootDir(Paths.get(System.getProperty("java.io.tmpdir"), "basekvcp", - ProcessUtil.processId(), "data").toString()) - .build(); + private Struct dataEngineConf = defaultDataConf(); @Builder.Default - private IWALableKVEngineConfigurator walEngineConfigurator = RocksDBWALableKVEngineConfigurator.builder() - .dbRootDir(Paths.get(System.getProperty("java.io.tmpdir"), "basekv", - ProcessUtil.processId(), "wal").toString()) - .build(); + private String walEngineType = "rocksdb"; + @Builder.Default + private Struct walEngineConf = defaultWalConf(); + + @Builder.Default + private Map splitHinterFactoryConfig = new HashMap<>(); + + private static Struct defaultDataConf() { + // use provider defaults and set temp dirs + IKVEngineProvider provider = findProvider("rocksdb"); + Struct.Builder b = provider.defaultsForCPable().toBuilder(); + String dbRoot = Paths.get(System.getProperty("java.io.tmpdir"), "basekv", ProcessUtil.processId(), "data") + .toString(); + String cpRoot = Paths.get(System.getProperty("java.io.tmpdir"), "basekvcp", ProcessUtil.processId(), "data") + .toString(); + b.putFields(DB_ROOT_DIR, StructUtil.toValue(dbRoot)); + b.putFields(DB_CHECKPOINT_ROOT_DIR, StructUtil.toValue(cpRoot)); + return b.build(); + } + + private static Struct defaultWalConf() { + IKVEngineProvider provider = findProvider("rocksdb"); + Struct.Builder b = provider.defaultsForWALable().toBuilder(); + String dbRoot = Paths.get(System.getProperty("java.io.tmpdir"), "basekv", ProcessUtil.processId(), "wal") + .toString(); + b.putFields(DB_ROOT_DIR, StructUtil.toValue(dbRoot)); + return b.build(); + } + + private static IKVEngineProvider findProvider(String type) { + Map providers = BaseHookLoader.load(IKVEngineProvider.class); + for (IKVEngineProvider p : providers.values()) { + if (p.type().equalsIgnoreCase(type)) { + return p; + } + } + throw new IllegalArgumentException("Unsupported storage engine type: " + type); + } } diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/AbstractKVRangeMetadata.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/AbstractKVRangeMetadata.java deleted file mode 100644 index df991a3cd..000000000 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/AbstractKVRangeMetadata.java +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.bifromq.basekv.store.range; - -import static org.apache.bifromq.basekv.store.range.KVRangeKeys.METADATA_LAST_APPLIED_INDEX_BYTES; - -import org.apache.bifromq.basekv.localengine.IKVSpaceMetadata; -import org.apache.bifromq.basekv.proto.Boundary; -import org.apache.bifromq.basekv.proto.KVRangeId; -import org.apache.bifromq.basekv.proto.State; -import org.apache.bifromq.basekv.raft.proto.ClusterConfig; -import org.apache.bifromq.basekv.store.api.IKVRangeMetadata; -import org.apache.bifromq.basekv.store.util.KVUtil; -import com.google.protobuf.ByteString; -import lombok.SneakyThrows; - -abstract class AbstractKVRangeMetadata implements IKVRangeMetadata { - protected final KVRangeId id; - private final IKVSpaceMetadata keyRangeMetadata; - - AbstractKVRangeMetadata(KVRangeId id, IKVSpaceMetadata keyRangeMetadata) { - this.id = id; - this.keyRangeMetadata = keyRangeMetadata; - } - - @Override - public final KVRangeId id() { - return id; - } - - protected long version(ByteString versionBytes) { - if (versionBytes != null) { - return KVUtil.toLongNativeOrder(versionBytes); - } - return -1L; - } - - @SneakyThrows - protected State state(ByteString stateBytes) { - if (stateBytes != null) { - return State.parseFrom(stateBytes); - } - return State.newBuilder().setType(State.StateType.NoUse).build(); - } - - @Override - public final long lastAppliedIndex() { - return keyRangeMetadata.metadata(METADATA_LAST_APPLIED_INDEX_BYTES).map(KVUtil::toLong).orElse(-1L); - } - - @SneakyThrows - protected ClusterConfig clusterConfig(ByteString clusterConfigBytes) { - if (clusterConfigBytes != null) { - return ClusterConfig.parseFrom(clusterConfigBytes); - } - return ClusterConfig.getDefaultInstance(); - } - - @SneakyThrows - protected Boundary boundary(ByteString boundaryBytes) { - if (boundaryBytes != null) { - return Boundary.parseFrom(boundaryBytes); - } - return Boundary.getDefaultInstance(); - } - - @Override - public final long size() { - return keyRangeMetadata.size(); - } - - @Override - public final long size(Boundary boundary) { - return keyRangeMetadata.size(boundary); - } -} diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/AbstractKVRangeMetadataUpdatable.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/AbstractKVRangeMetadataUpdatable.java deleted file mode 100644 index eb5bdbefd..000000000 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/AbstractKVRangeMetadataUpdatable.java +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.bifromq.basekv.store.range; - -import static org.apache.bifromq.basekv.store.range.KVRangeKeys.METADATA_CLUSTER_CONFIG_BYTES; -import static org.apache.bifromq.basekv.store.range.KVRangeKeys.METADATA_LAST_APPLIED_INDEX_BYTES; -import static org.apache.bifromq.basekv.store.range.KVRangeKeys.METADATA_RANGE_BOUND_BYTES; -import static org.apache.bifromq.basekv.store.range.KVRangeKeys.METADATA_STATE_BYTES; -import static org.apache.bifromq.basekv.store.range.KVRangeKeys.METADATA_VER_BYTES; -import static org.apache.bifromq.basekv.store.util.VerUtil.bump; - -import org.apache.bifromq.basekv.localengine.IKVSpaceMetadata; -import org.apache.bifromq.basekv.localengine.IKVSpaceMetadataUpdatable; -import org.apache.bifromq.basekv.proto.Boundary; -import org.apache.bifromq.basekv.proto.KVRangeId; -import org.apache.bifromq.basekv.proto.State; -import org.apache.bifromq.basekv.raft.proto.ClusterConfig; -import org.apache.bifromq.basekv.store.util.KVUtil; - -abstract class AbstractKVRangeMetadataUpdatable> - extends AbstractKVRangeMetadata implements IKVRangeMetadataUpdatable { - - AbstractKVRangeMetadataUpdatable(KVRangeId id, IKVSpaceMetadata keyRangeMetadata) { - super(id, keyRangeMetadata); - } - - @Override - public final T bumpVer(boolean boundaryChange) { - resetVer(bump(version(), boundaryChange)); - return thisT(); - } - - @Override - public final T resetVer(long ver) { - keyRangeWriter().metadata(METADATA_VER_BYTES, KVUtil.toByteStringNativeOrder(ver)); - return thisT(); - } - - @Override - public final T lastAppliedIndex(long lastAppliedIndex) { - keyRangeWriter().metadata(METADATA_LAST_APPLIED_INDEX_BYTES, KVUtil.toByteString(lastAppliedIndex)); - return thisT(); - } - - @Override - public final T boundary(Boundary boundary) { - keyRangeWriter().metadata(METADATA_RANGE_BOUND_BYTES, boundary.toByteString()); - return thisT(); - } - - @Override - public final T state(State state) { - keyRangeWriter().metadata(METADATA_STATE_BYTES, state.toByteString()); - return thisT(); - } - - @Override - public final T clusterConfig(ClusterConfig clusterConfig) { - keyRangeWriter().metadata(METADATA_CLUSTER_CONFIG_BYTES, clusterConfig.toByteString()); - return thisT(); - } - - @SuppressWarnings("unchecked") - private T thisT() { - return (T) this; - } - - protected abstract IKVSpaceMetadataUpdatable keyRangeWriter(); -} diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVCheckpointReader.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVCheckpointReader.java deleted file mode 100644 index 0cd0c2962..000000000 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVCheckpointReader.java +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.bifromq.basekv.store.range; - -import org.apache.bifromq.basekv.store.api.IKVReader; - -public interface IKVCheckpointReader extends IKVReader { - @Override - IKVCheckpointIterator iterator(); -} diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVLoadRecorder.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVLoadRecorder.java index 81c6b2130..077d00949 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVLoadRecorder.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVLoadRecorder.java @@ -14,17 +14,17 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.store.range; -import org.apache.bifromq.basekv.store.api.IKVLoadRecord; import com.google.protobuf.ByteString; +import org.apache.bifromq.basekv.store.range.hinter.IKVLoadRecord; public interface IKVLoadRecorder { /** - * The latency spent for accessing this key + * The latency spent for accessing this key. * * @param key the accessed key * @param latencyNanos the nanos spent @@ -32,7 +32,7 @@ public interface IKVLoadRecorder { void record(ByteString key, long latencyNanos); /** - * The latency spent for other kv activity + * The latency spent for other kv activity. * * @param latencyNanos the nanos spent */ diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVLoadTracker.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVLoadTracker.java deleted file mode 100644 index e0fa4550f..000000000 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVLoadTracker.java +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.bifromq.basekv.store.range; - -import com.google.protobuf.ByteString; -import java.util.Map; - -public interface IKVLoadTracker { - interface ILoadRecorder { - long startNanos(); - - /** - * Get the kv io times - * - * @return the access times to kv engine - */ - int getKVIOs(); - - /** - * Get the total time spent on io of kv engine - * - * @return the total time in nanos - */ - long getKVIONanos(); - - Map keyDistribution(); - - /** - * The latency spent for accessing this key - * - * @param key the accessed key - * @param latencyNanos the nanos spent - */ - void record(ByteString key, long latencyNanos); - - /** - * The latency spent for other kv activity - * - * @param latencyNanos the nanos spent - */ - void record(long latencyNanos); - - void stop(); - } - - /** - * Start a recorder - * - * @return the recorder - */ - ILoadRecorder start(); -} diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVRange.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVRange.java index 42dcf2059..dd5d8a6a4 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVRange.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVRange.java @@ -19,22 +19,88 @@ package org.apache.bifromq.basekv.store.range; +import io.reactivex.rxjava3.core.Observable; import org.apache.bifromq.basekv.proto.Boundary; import org.apache.bifromq.basekv.proto.KVRangeSnapshot; import org.apache.bifromq.basekv.proto.State; import org.apache.bifromq.basekv.raft.proto.ClusterConfig; -import org.apache.bifromq.basekv.store.api.IKVCloseableReader; import org.apache.bifromq.basekv.store.api.IKVRangeReader; -import org.apache.bifromq.basekv.store.api.IKVReader; -import io.reactivex.rxjava3.core.Observable; +import org.apache.bifromq.basekv.store.api.IKVRangeRefreshableReader; + +/** + * Interface for accessing and updating a key-value range. + */ +public interface IKVRange extends IKVRangeIdentifiable { + + /** + * Get the observable of version. + * + * @return the observable + */ + Observable ver(); + + /** + * Get the current version. + * + * @return the current version + */ + long currentVer(); + + /** + * Get the observable of state. + * + * @return the observable + */ + Observable state(); + + /** + * Get the current state. + * + * @return the current state + */ + State currentState(); + + /** + * Get the observable of cluster config. + * + * @return the observable + */ + Observable clusterConfig(); + + /** + * Get the current cluster config. + * + * @return the current cluster config + */ + ClusterConfig currentClusterConfig(); -public interface IKVRange extends IKVRangeReader { /** - * Get the observable of metadata. + * Get the observable of boundary. * * @return the observable */ - Observable metadata(); + Observable boundary(); + + /** + * Get the current boundary. + * + * @return the current boundary + */ + Boundary currentBoundary(); + + /** + * Get the observable of last applied index. + * + * @return the observable + */ + Observable lastAppliedIndex(); + + /** + * Get the current last applied index. + * + * @return the current last applied index + */ + long currentLastAppliedIndex(); /** * Make a checkpoint of current state and return a descriptor. @@ -52,19 +118,19 @@ public interface IKVRange extends IKVRangeReader { boolean hasCheckpoint(KVRangeSnapshot checkpoint); /** - * Open an iterator for accessing the checkpoint data. + * Open a reader for accessing the checkpoint data. * * @param checkpoint the descriptor * @return the checkpoint reader */ - IKVRangeCheckpointReader open(KVRangeSnapshot checkpoint); - - IKVReader borrowDataReader(); + IKVRangeReader open(KVRangeSnapshot checkpoint); - void returnDataReader(IKVReader borrowed); - - @Override - IKVCloseableReader newDataReader(); + /** + * Create a refreshable consistent-view reader. + * + * @return the reader + */ + IKVRangeRefreshableReader newReader(); /** * Get a writer for updating the range. @@ -81,19 +147,30 @@ public interface IKVRange extends IKVRangeReader { */ IKVRangeWriter toWriter(IKVLoadRecorder recorder); - IKVReseter toReseter(KVRangeSnapshot snapshot); + /** + * Open a restore session to receiving data from given snapshot. + * + * @param snapshot the snapshot + * @param progressListener the progress listener + * @return the session + */ + IKVRangeRestoreSession startRestore(KVRangeSnapshot snapshot, + IKVRangeRestoreSession.IKVRestoreProgressListener progressListener); - void close(); + /** + * The current estimated size of the KVRange. + * + * @return the size in bytes + */ + long size(); - void destroy(); + /** + * Close the KVRange. + */ + void close(); /** - * Metadata about the KVRange. - * - * @param ver the version - * @param state the state - * @param boundary the boundary + * Close and destroy the KVRange. */ - record KVRangeMeta(long ver, State state, Boundary boundary, ClusterConfig clusterConfig) { - } + void destroy(); } diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVRangeFSM.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVRangeFSM.java index d49d95da0..a62e932ee 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVRangeFSM.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVRangeFSM.java @@ -14,11 +14,17 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.store.range; +import com.google.protobuf.ByteString; +import io.reactivex.rxjava3.core.Observable; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CompletionStage; import org.apache.bifromq.basekv.proto.Boundary; import org.apache.bifromq.basekv.proto.KVRangeDescriptor; import org.apache.bifromq.basekv.proto.KVRangeId; @@ -26,11 +32,6 @@ import org.apache.bifromq.basekv.store.proto.ROCoProcOutput; import org.apache.bifromq.basekv.store.proto.RWCoProcInput; import org.apache.bifromq.basekv.store.proto.RWCoProcOutput; -import com.google.protobuf.ByteString; -import io.reactivex.rxjava3.core.Observable; -import java.util.Optional; -import java.util.Set; -import java.util.concurrent.CompletableFuture; /** * The interface of the range finite state machine. @@ -71,13 +72,20 @@ public interface IKVRangeFSM { */ CompletableFuture recover(); + /** + * Trigger quit, if it's in zombie state. + * + * @return a future that will be completed with true if the range is in zombie state and quit is triggered, + */ + CompletionStage quit(); + CompletableFuture transferLeadership(long ver, String newLeader); CompletableFuture changeReplicaConfig(long ver, Set newVoters, Set newLearners); CompletableFuture split(long ver, ByteString splitKey); - CompletableFuture merge(long ver, KVRangeId mergeeId); + CompletableFuture merge(long ver, KVRangeId mergeeId, Set mergeeVoters); CompletableFuture exist(long ver, ByteString key, boolean linearized); diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVReseter.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVRangeIdentifiable.java similarity index 77% rename from base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVReseter.java rename to base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVRangeIdentifiable.java index 2a9dafc9b..e0811d333 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVReseter.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVRangeIdentifiable.java @@ -19,14 +19,16 @@ package org.apache.bifromq.basekv.store.range; -import com.google.protobuf.ByteString; +import org.apache.bifromq.basekv.proto.KVRangeId; -public interface IKVReseter { - void put(ByteString key, ByteString value); - - void flush(); - - IKVRange abort(); - - IKVRange done(); +/** + * Interface for identifying a KVRange. + */ +public interface IKVRangeIdentifiable { + /** + * Get the id of the KVRange. + * + * @return the id of the KVRange + */ + KVRangeId id(); } diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVRangeMetadataUpdatable.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVRangeMetadataUpdatable.java index e2616ec7a..d049ab60b 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVRangeMetadataUpdatable.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVRangeMetadataUpdatable.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.store.range; @@ -22,18 +22,50 @@ import org.apache.bifromq.basekv.proto.Boundary; import org.apache.bifromq.basekv.proto.State; import org.apache.bifromq.basekv.raft.proto.ClusterConfig; -import org.apache.bifromq.basekv.store.api.IKVRangeMetadata; -public interface IKVRangeMetadataUpdatable> extends IKVRangeMetadata { - T bumpVer(boolean boundaryChange); - - T resetVer(long ver); +/** + * A builder-like interface to update the KVRange metadata. + * + * @param the type of the updatable + */ +interface IKVRangeMetadataUpdatable> { + /** + * Set the version to a specific value. + * + * @param ver the version to set + * @return the updatable + */ + T ver(long ver); + /** + * Set the last applied index. + * + * @param lastAppliedIndex the last applied index + * @return the updatable + */ T lastAppliedIndex(long lastAppliedIndex); + /** + * Set the boundary. + * + * @param boundary the boundary + * @return the updatable + */ T boundary(Boundary boundary); + /** + * Set the state. + * + * @param state the state + * @return the updatable + */ T state(State state); + /** + * Set the cluster configuration. + * + * @param clusterConfig the cluster configuration + * @return the updatable + */ T clusterConfig(ClusterConfig clusterConfig); } diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVRangeMetricManager.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVRangeMetricManager.java index c2650e574..577dd444e 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVRangeMetricManager.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVRangeMetricManager.java @@ -14,21 +14,21 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.store.range; -import org.apache.bifromq.basekv.proto.KVRangeDescriptor; -import org.apache.bifromq.basekv.store.proto.ROCoProcOutput; -import org.apache.bifromq.basekv.store.proto.RWCoProcOutput; import com.google.protobuf.ByteString; import io.micrometer.core.instrument.Timer; import java.util.Optional; import java.util.concurrent.CompletableFuture; import java.util.function.Supplier; +import org.apache.bifromq.basekv.proto.KVRangeDescriptor; +import org.apache.bifromq.basekv.store.proto.ROCoProcOutput; +import org.apache.bifromq.basekv.store.proto.RWCoProcOutput; -public interface IKVRangeMetricManager { +interface IKVRangeMetricManager { void report(KVRangeDescriptor descriptor); void reportDump(int bytes); @@ -59,6 +59,8 @@ public interface IKVRangeMetricManager { CompletableFuture recordQueryCoProc(Supplier> supplier); + CompletableFuture recordLinearization(Supplier> supplier); + CompletableFuture recordCompact(Supplier> supplier); CompletableFuture recordLogApply(Supplier> supplier); diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVRangeQueryLinearizer.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVRangeQueryLinearizer.java index b7180a3e8..2355ede63 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVRangeQueryLinearizer.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVRangeQueryLinearizer.java @@ -21,7 +21,7 @@ import java.util.concurrent.CompletionStage; -public interface IKVRangeQueryLinearizer { +interface IKVRangeQueryLinearizer { CompletionStage linearize(); void afterLogApplied(long logIndex); diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/api/IKVRangeMetadata.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVRangeRestoreSession.java similarity index 56% rename from base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/api/IKVRangeMetadata.java rename to base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVRangeRestoreSession.java index 0612f3a30..57ede1af5 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/api/IKVRangeMetadata.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVRangeRestoreSession.java @@ -14,30 +14,39 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ -package org.apache.bifromq.basekv.store.api; +package org.apache.bifromq.basekv.store.range; +import com.google.protobuf.ByteString; import org.apache.bifromq.basekv.proto.Boundary; -import org.apache.bifromq.basekv.proto.KVRangeId; import org.apache.bifromq.basekv.proto.State; import org.apache.bifromq.basekv.raft.proto.ClusterConfig; -public interface IKVRangeMetadata { - KVRangeId id(); +public interface IKVRangeRestoreSession { + IKVRangeRestoreSession ver(long ver); - long version(); + IKVRangeRestoreSession lastAppliedIndex(long lastAppliedIndex); - State state(); + IKVRangeRestoreSession boundary(Boundary boundary); - long lastAppliedIndex(); + IKVRangeRestoreSession state(State state); - Boundary boundary(); + IKVRangeRestoreSession clusterConfig(ClusterConfig clusterConfig); - ClusterConfig clusterConfig(); + IKVRangeRestoreSession put(ByteString key, ByteString value); - long size(); + void done(); - long size(Boundary boundary); + void abort(); + + int count(); + + interface IKVRestoreProgressListener { + IKVRestoreProgressListener NOOP = (count, bytes) -> { + }; + + void onProgress(int count, long bytes); + } } diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceIterator.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVRangeSnapshotReceiver.java similarity index 66% rename from base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceIterator.java rename to base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVRangeSnapshotReceiver.java index e1c6e43b1..4339d3493 100644 --- a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/IKVSpaceIterator.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVRangeSnapshotReceiver.java @@ -17,30 +17,23 @@ * under the License. */ -package org.apache.bifromq.basekv.localengine; +package org.apache.bifromq.basekv.store.range; import com.google.protobuf.ByteString; +import java.util.concurrent.CompletableFuture; -public interface IKVSpaceIterator extends AutoCloseable { - ByteString key(); +interface IKVRangeSnapshotReceiver { - ByteString value(); + CompletableFuture start(ReceiveListener listener); - boolean isValid(); + enum Code { + TIME_OUT, NOT_FOUND, ERROR, DONE + } - void next(); + interface ReceiveListener { + void onReceive(ByteString key, ByteString value); + } - void prev(); - - void seekToFirst(); - - void seekToLast(); - - void seek(ByteString target); - - void seekForPrev(ByteString target); - - void refresh(); - - void close(); + record Result(Code code, long totalEntries, long totalBytes) { + } } diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVRangeWritable.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVRangeWritable.java index ff97c7e7c..00de2e31a 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVRangeWritable.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVRangeWritable.java @@ -14,19 +14,38 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.store.range; +import com.google.protobuf.ByteString; import org.apache.bifromq.basekv.proto.Boundary; import org.apache.bifromq.basekv.proto.KVRangeId; +import org.apache.bifromq.basekv.proto.KVRangeSnapshot; +import org.apache.bifromq.basekv.proto.State; import org.apache.bifromq.basekv.store.api.IKVWriter; -public interface IKVRangeWritable> extends IKVRangeMetadataUpdatable { - IKVRangeMetadataWriter migrateTo(KVRangeId targetRangeId, Boundary boundary); +interface IKVRangeWritable> extends IKVRangeMetadataUpdatable { + void migrateTo(KVRangeId targetRangeId, KVRangeSnapshot rightRangeSnapshot); - IKVRangeMetadataWriter migrateFrom(KVRangeId fromRangeId, Boundary boundary); + Migrater startMerging(MigrationProgressListener progressListener); IKVWriter kvWriter(); + + interface Migrater { + Migrater ver(long ver); + + Migrater state(State state); + + Migrater boundary(Boundary boundary); + + void put(ByteString key, ByteString value); + + void abort(); + } + + interface MigrationProgressListener { + void onProgress(int count, long bytes); + } } diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVRangeWriter.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVRangeWriter.java index 2ad7de5b2..052665e35 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVRangeWriter.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/IKVRangeWriter.java @@ -19,7 +19,7 @@ package org.apache.bifromq.basekv.store.range; -public interface IKVRangeWriter> extends IKVRangeWritable { +public interface IKVRangeWriter> extends IKVRangeWritable, IKVRangeIdentifiable { void done(); void abort(); diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVCheckpointReader.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVCheckpointReader.java deleted file mode 100644 index 2dce22344..000000000 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVCheckpointReader.java +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.bifromq.basekv.store.range; - -import static org.apache.bifromq.basekv.utils.BoundaryUtil.inRange; - -import org.apache.bifromq.basekv.localengine.IKVSpaceIterator; -import org.apache.bifromq.basekv.localengine.IKVSpaceReader; -import org.apache.bifromq.basekv.proto.Boundary; -import org.apache.bifromq.basekv.store.api.IKVRangeReader; -import com.google.protobuf.ByteString; -import java.util.Optional; - -public class KVCheckpointReader implements IKVCheckpointReader { - private final IKVSpaceReader kvSpace; - private final IKVRangeReader kvRangeReader; - private volatile IKVSpaceIterator kvSpaceIterator; - - KVCheckpointReader(IKVSpaceReader kvSpace, IKVRangeReader reader) { - this.kvSpace = kvSpace; - this.kvRangeReader = reader; - } - - @Override - public Boundary boundary() { - return kvRangeReader.boundary(); - } - - @Override - public long size(Boundary boundary) { - assert inRange(boundary, boundary()); - return kvRangeReader.size(boundary); - } - - @Override - public boolean exist(ByteString key) { - assert inRange(key, boundary()); - return kvSpace.exist(key); - } - - @Override - public Optional get(ByteString key) { - assert inRange(key, boundary()); - return kvSpace.get(key); - } - - @Override - public IKVCheckpointIterator iterator() { - return new KVCheckpointDataIterator(kvSpace.newIterator()); - } - - @Override - public void refresh() { - } -} diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVIterator.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVIterator.java index 16ed284ca..ab076943e 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVIterator.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVIterator.java @@ -14,14 +14,14 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.store.range; +import com.google.protobuf.ByteString; import org.apache.bifromq.basekv.localengine.IKVSpaceIterator; import org.apache.bifromq.basekv.store.api.IKVIterator; -import com.google.protobuf.ByteString; class KVIterator implements IKVIterator { private final IKVSpaceIterator kvSpaceIterator; @@ -74,4 +74,9 @@ public void seek(ByteString key) { public void seekForPrev(ByteString key) { kvSpaceIterator.seekForPrev(key); } + + @Override + public void close() { + kvSpaceIterator.close(); + } } diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVLoadRecorder.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVLoadRecorder.java index 70394b157..91406f298 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVLoadRecorder.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVLoadRecorder.java @@ -14,16 +14,16 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.store.range; -import org.apache.bifromq.basekv.store.api.IKVLoadRecord; import com.google.protobuf.ByteString; import java.util.HashMap; import java.util.Map; import java.util.function.Supplier; +import org.apache.bifromq.basekv.store.range.hinter.IKVLoadRecord; public class KVLoadRecorder implements IKVLoadRecorder { private final long startNanos; diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRange.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRange.java index e7c42a508..bfbfd1f09 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRange.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRange.java @@ -27,89 +27,120 @@ import com.google.protobuf.ByteString; import io.reactivex.rxjava3.core.Observable; +import io.reactivex.rxjava3.disposables.Disposable; import io.reactivex.rxjava3.subjects.BehaviorSubject; -import java.util.concurrent.ConcurrentLinkedQueue; -import lombok.Getter; -import lombok.SneakyThrows; +import java.util.Map; import org.apache.bifromq.basekv.localengine.ICPableKVSpace; import org.apache.bifromq.basekv.proto.Boundary; import org.apache.bifromq.basekv.proto.KVRangeId; import org.apache.bifromq.basekv.proto.KVRangeSnapshot; import org.apache.bifromq.basekv.proto.State; import org.apache.bifromq.basekv.raft.proto.ClusterConfig; -import org.apache.bifromq.basekv.store.api.IKVCloseableReader; import org.apache.bifromq.basekv.store.api.IKVRangeReader; -import org.apache.bifromq.basekv.store.api.IKVReader; -import org.apache.bifromq.basekv.store.api.IKVWriter; +import org.apache.bifromq.basekv.store.api.IKVRangeRefreshableReader; +import org.apache.bifromq.basekv.store.util.KVUtil; +import org.apache.bifromq.logger.MDCLogger; +import org.slf4j.Logger; -public class KVRange extends AbstractKVRangeMetadata implements IKVRange { - @Getter +class KVRange implements IKVRange { + private final KVRangeId id; private final ICPableKVSpace kvSpace; - private final ConcurrentLinkedQueue sharedDataReaders = new ConcurrentLinkedQueue<>(); - private final BehaviorSubject metaSubject; - - public KVRange(KVRangeId id, ICPableKVSpace kvSpace) { - super(id, kvSpace); + private final Logger logger; + private final BehaviorSubject versionSubject; + private final BehaviorSubject stateSubject; + private final BehaviorSubject clusterConfigSubject; + private final BehaviorSubject boundarySubject; + private final BehaviorSubject lastAppliedIndexSubject; + private final Disposable disposable; + + KVRange(KVRangeId id, ICPableKVSpace kvSpace, String... tags) { + this.id = id; this.kvSpace = kvSpace; - metaSubject = BehaviorSubject.createDefault( - new IKVRange.KVRangeMeta(-1L, - State.newBuilder().setType(State.StateType.NoUse).build(), - NULL_BOUNDARY, - ClusterConfig.getDefaultInstance())); - kvSpace.metadata() - .map(metadataMap -> { - long version = version(metadataMap.get(METADATA_VER_BYTES)); - State state = state(metadataMap.get(METADATA_STATE_BYTES)); - Boundary boundary = boundary(metadataMap.get(METADATA_RANGE_BOUND_BYTES)); - ClusterConfig clusterConfig = clusterConfig(metadataMap.get(METADATA_CLUSTER_CONFIG_BYTES)); - return new IKVRange.KVRangeMeta(version, state, boundary, clusterConfig); - }) - .subscribe(metaSubject); + this.logger = MDCLogger.getLogger(KVRange.class, tags); + versionSubject = BehaviorSubject.createDefault(-1L); + stateSubject = BehaviorSubject.createDefault( + State.newBuilder().setType(State.StateType.NoUse).build()); + clusterConfigSubject = BehaviorSubject.createDefault(ClusterConfig.getDefaultInstance()); + boundarySubject = BehaviorSubject.createDefault(NULL_BOUNDARY); + lastAppliedIndexSubject = BehaviorSubject.createDefault(-1L); + disposable = kvSpace.metadata().subscribe(this::onMetadataChanged); + } + + public KVRange(KVRangeId id, ICPableKVSpace kvSpace, KVRangeSnapshot snapshot, String... tags) { + this(id, kvSpace, tags); + startRestore(snapshot, IKVRangeRestoreSession.IKVRestoreProgressListener.NOOP).done(); + } + + @Override + public KVRangeId id() { + return id; + } + + @Override + public Observable ver() { + return versionSubject.distinctUntilChanged(); } - public KVRange(KVRangeId id, ICPableKVSpace kvSpace, KVRangeSnapshot snapshot) { - this(id, kvSpace); - toReseter(snapshot).done(); + @Override + public long currentVer() { + return versionSubject.blockingFirst(); } @Override - public final long version() { - return metaSubject.getValue().ver(); + public Observable state() { + return stateSubject.distinctUntilChanged(); } @Override - public final State state() { - return metaSubject.getValue().state(); + public State currentState() { + return stateSubject.blockingFirst(); } @Override - public final Boundary boundary() { - return metaSubject.getValue().boundary(); + public Observable clusterConfig() { + return clusterConfigSubject.distinctUntilChanged(); } @Override - public ClusterConfig clusterConfig() { - return metaSubject.getValue().clusterConfig(); + public ClusterConfig currentClusterConfig() { + return clusterConfigSubject.blockingFirst(); } @Override - public Observable metadata() { - return metaSubject; + public Observable boundary() { + return boundarySubject.distinctUntilChanged(); + } + + @Override + public Boundary currentBoundary() { + return boundarySubject.blockingFirst(); + } + + @Override + public Observable lastAppliedIndex() { + return lastAppliedIndexSubject.distinctUntilChanged(); + } + + @Override + public long currentLastAppliedIndex() { + return lastAppliedIndexSubject.blockingFirst(); } @Override public KVRangeSnapshot checkpoint() { String checkpointId = kvSpace.checkpoint(); - IKVRangeReader kvRangeCheckpoint = new KVRangeCheckpoint(id, kvSpace.openCheckpoint(checkpointId).get()); - KVRangeSnapshot.Builder builder = KVRangeSnapshot.newBuilder() - .setVer(kvRangeCheckpoint.version()) - .setId(id) - .setCheckpointId(checkpointId) - .setLastAppliedIndex(kvRangeCheckpoint.lastAppliedIndex()) - .setState(kvRangeCheckpoint.state()) - .setBoundary(kvRangeCheckpoint.boundary()) - .setClusterConfig(kvRangeCheckpoint.clusterConfig()); - return builder.build(); + try (IKVRangeReader checkpointReader = new KVRangeReader( + kvSpace.openCheckpoint(checkpointId).get().newReader())) { + KVRangeSnapshot.Builder builder = KVRangeSnapshot.newBuilder() + .setVer(checkpointReader.version()) + .setId(id) + .setCheckpointId(checkpointId) + .setLastAppliedIndex(checkpointReader.lastAppliedIndex()) + .setState(checkpointReader.state()) + .setBoundary(checkpointReader.boundary()) + .setClusterConfig(checkpointReader.clusterConfig()); + return builder.build(); + } } @Override @@ -119,135 +150,104 @@ public boolean hasCheckpoint(KVRangeSnapshot checkpoint) { } @Override - public IKVRangeCheckpointReader open(KVRangeSnapshot checkpoint) { - return new KVRangeCheckpoint(id, kvSpace.openCheckpoint(checkpoint.getCheckpointId()).get()); + public IKVRangeReader open(KVRangeSnapshot checkpoint) { + return new KVRangeReader(kvSpace.openCheckpoint(checkpoint.getCheckpointId()).get().newReader()); } - @SneakyThrows @Override - public final IKVReader borrowDataReader() { - IKVReader reader = sharedDataReaders.poll(); - if (reader == null) { - return newDataReader(); - } - return reader; + public IKVRangeRefreshableReader newReader() { + return new KVRangeRefreshableReader(kvSpace.reader()); } @Override - public final void returnDataReader(IKVReader borrowed) { - sharedDataReaders.add((IKVCloseableReader) borrowed); + public IKVRangeWriter toWriter() { + return new KVRangeWriter(id, kvSpace); } @Override - public IKVCloseableReader newDataReader() { - return new KVReader(kvSpace, this); + public IKVRangeWriter toWriter(IKVLoadRecorder recorder) { + return new LoadRecordableKVRangeWriter(id, kvSpace, recorder); } @Override - public IKVRangeWriter toWriter() { - return new KVRangeWriter(id, kvSpace.toWriter()); + public IKVRangeRestoreSession startRestore(KVRangeSnapshot snapshot, + IKVRangeRestoreSession.IKVRestoreProgressListener progressListener) { + return new KVRangeRestoreSession(kvSpace.startRestore(progressListener::onProgress)) + .ver(snapshot.getVer()) + .lastAppliedIndex(snapshot.getLastAppliedIndex()) + .state(snapshot.getState()) + .boundary(snapshot.getBoundary()) + .clusterConfig(snapshot.getClusterConfig()); } @Override - public IKVRangeWriter toWriter(IKVLoadRecorder recorder) { - return new LoadRecordableKVRangeWriter(id, kvSpace.toWriter(), recorder); - } - - @Override - public IKVReseter toReseter(KVRangeSnapshot snapshot) { - return new IKVReseter() { - private IKVRangeWriter rangeWriter; - private IKVWriter kvWriter; - private boolean closed = false; - private boolean dirty = false; - - { - IKVRangeWriter newWriter = toWriter(); - IKVWriter newKVWriter = newWriter - .resetVer(snapshot.getVer()) - .lastAppliedIndex(snapshot.getLastAppliedIndex()) - .state(snapshot.getState()) - .boundary(snapshot.getBoundary()) - .clusterConfig(snapshot.getClusterConfig()) - .kvWriter(); - newKVWriter.clear(boundary()); - rangeWriter = newWriter; - kvWriter = newKVWriter; - } - - private void initWriter() { - rangeWriter = toWriter(); - kvWriter = rangeWriter.kvWriter(); - dirty = false; - } + public long size() { + return kvSpace.size(); + } - private void rotateWriter() { - if (rangeWriter != null) { - rangeWriter.done(); - } - initWriter(); - } + @Override + public void close() { + disposable.dispose(); + versionSubject.onComplete(); + stateSubject.onComplete(); + clusterConfigSubject.onComplete(); + boundarySubject.onComplete(); + lastAppliedIndexSubject.onComplete(); + kvSpace.close(); + } - private void ensureActive() { - if (closed) { - throw new IllegalStateException("KVRange resetter already closed"); - } - } + @Override + public void destroy() { + kvSpace.destroy(); + } - @Override - public void put(ByteString key, ByteString value) { - ensureActive(); - kvWriter.put(key, value); - dirty = true; - } + private void onMetadataChanged(Map metadata) { + updateVersion(metadata.get(METADATA_VER_BYTES)); + updateState(metadata.get(METADATA_STATE_BYTES)); + updateClusterConfig(metadata.get(METADATA_CLUSTER_CONFIG_BYTES)); + updateBoundary(metadata.get(METADATA_RANGE_BOUND_BYTES)); + updateLastAppliedIndex(metadata.get(KVRangeKeys.METADATA_LAST_APPLIED_INDEX_BYTES)); + } - @Override - public void flush() { - ensureActive(); - if (dirty) { - rotateWriter(); - } - } + private void updateVersion(ByteString versionBytes) { + if (versionBytes != null) { + versionSubject.onNext(KVUtil.toLongNativeOrder(versionBytes)); + } + } - @Override - public IKVRange abort() { - if (!closed) { - closed = true; - if (rangeWriter != null) { - rangeWriter.abort(); - } - rangeWriter = null; - kvWriter = null; - } - return KVRange.this; + private void updateState(ByteString stateBytes) { + if (stateBytes != null) { + try { + stateSubject.onNext(State.parseFrom(stateBytes)); + } catch (Throwable e) { + logger.warn("Failed to parse state from bytes", e); } + } + } - @Override - public IKVRange done() { - if (!closed) { - closed = true; - if (rangeWriter != null) { - rangeWriter.done(); - } - rangeWriter = null; - kvWriter = null; - } - return KVRange.this; + private void updateClusterConfig(ByteString clusterConfigBytes) { + if (clusterConfigBytes != null) { + try { + clusterConfigSubject.onNext(ClusterConfig.parseFrom(clusterConfigBytes)); + } catch (Throwable e) { + logger.warn("Failed to parse cluster config from bytes", e); } - }; + } } - @Override - public void close() { - IKVCloseableReader reader; - while ((reader = sharedDataReaders.poll()) != null) { - reader.close(); + private void updateBoundary(ByteString boundaryBytes) { + if (boundaryBytes != null) { + try { + boundarySubject.onNext(Boundary.parseFrom(boundaryBytes)); + } catch (Throwable e) { + logger.warn("Failed to parse boundary from bytes", e); + } } - metaSubject.onComplete(); } - @Override - public void destroy() { - kvSpace.destroy(); + private void updateLastAppliedIndex(ByteString lastAppliedIndexBytes) { + if (lastAppliedIndexBytes != null) { + lastAppliedIndexSubject.onNext(KVUtil.toLong(lastAppliedIndexBytes)); + } } } diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeCheckpoint.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeCheckpoint.java deleted file mode 100644 index b34be1f59..000000000 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeCheckpoint.java +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.bifromq.basekv.store.range; - -import static org.apache.bifromq.basekv.store.range.KVRangeKeys.METADATA_CLUSTER_CONFIG_BYTES; -import static org.apache.bifromq.basekv.store.range.KVRangeKeys.METADATA_RANGE_BOUND_BYTES; -import static org.apache.bifromq.basekv.store.range.KVRangeKeys.METADATA_STATE_BYTES; -import static org.apache.bifromq.basekv.store.range.KVRangeKeys.METADATA_VER_BYTES; - -import org.apache.bifromq.basekv.localengine.IKVSpaceReader; -import org.apache.bifromq.basekv.proto.Boundary; -import org.apache.bifromq.basekv.proto.KVRangeId; -import org.apache.bifromq.basekv.proto.State; -import org.apache.bifromq.basekv.raft.proto.ClusterConfig; -import com.google.protobuf.ByteString; -import java.util.Optional; - -public class KVRangeCheckpoint extends AbstractKVRangeMetadata implements IKVRangeCheckpointReader { - private final IKVSpaceReader keyRangeCheckpoint; - - KVRangeCheckpoint(KVRangeId id, IKVSpaceReader keyRangeCheckpoint) { - super(id, keyRangeCheckpoint); - this.keyRangeCheckpoint = keyRangeCheckpoint; - } - - @Override - public IKVCheckpointReader newDataReader() { - return new KVCheckpointReader(keyRangeCheckpoint, this); - } - - @Override - public long version() { - Optional verBytes = keyRangeCheckpoint.metadata(METADATA_VER_BYTES); - return version(verBytes.orElse(null)); - } - - @Override - public State state() { - Optional stateData = keyRangeCheckpoint.metadata(METADATA_STATE_BYTES); - return state(stateData.orElse(null)); - } - - @Override - public Boundary boundary() { - return boundary(keyRangeCheckpoint.metadata(METADATA_RANGE_BOUND_BYTES).orElse(null)); - } - - @Override - public ClusterConfig clusterConfig() { - return clusterConfig(keyRangeCheckpoint.metadata(METADATA_CLUSTER_CONFIG_BYTES).orElse(null)); - } -} diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeDumpSession.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeDumpSession.java index 3a5bae74d..cc70dcd00 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeDumpSession.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeDumpSession.java @@ -24,6 +24,7 @@ import io.micrometer.core.instrument.Tags; import io.micrometer.core.instrument.binder.jvm.ExecutorServiceMetrics; import io.reactivex.rxjava3.disposables.Disposable; +import io.reactivex.rxjava3.schedulers.Schedulers; import java.time.Duration; import java.util.Optional; import java.util.concurrent.CompletableFuture; @@ -36,10 +37,13 @@ import org.apache.bifromq.base.util.AsyncRunner; import org.apache.bifromq.baseenv.EnvProvider; import org.apache.bifromq.basekv.proto.KVPair; +import org.apache.bifromq.basekv.proto.KVRangeId; import org.apache.bifromq.basekv.proto.KVRangeMessage; +import org.apache.bifromq.basekv.proto.KVRangeSnapshot; import org.apache.bifromq.basekv.proto.SaveSnapshotDataReply; import org.apache.bifromq.basekv.proto.SaveSnapshotDataRequest; -import org.apache.bifromq.basekv.proto.SnapshotSyncRequest; +import org.apache.bifromq.basekv.store.api.IKVIterator; +import org.apache.bifromq.basekv.store.api.IKVRangeReader; import org.apache.bifromq.logger.MDCLogger; import org.slf4j.Logger; @@ -49,8 +53,10 @@ class KVRangeDumpSession { private static final double TARGET_ROUND_TRIP_NANOS = Duration.ofMillis(70).toNanos(); private static final double EMA_ALPHA = 0.2d; private final Logger log; - private final String follower; - private final SnapshotSyncRequest request; + private final String sessionId; + private final KVRangeSnapshot snapshot; + private final KVRangeId receiverRangeId; + private final String receiverStoreId; private final IKVRangeMessenger messenger; private final ExecutorService executor; private final AsyncRunner runner; @@ -61,18 +67,21 @@ class KVRangeDumpSession { private final DumpBytesRecorder recorder; private final SnapshotBandwidthGovernor bandwidthGovernor; private final long startDumpTS = System.nanoTime(); - private IKVCheckpointIterator snapshotDataItr; + private IKVRangeReader snapshotReader; + private IKVIterator snapshotDataItr; private long totalEntries = 0; private long totalBytes = 0; + private long lastSendTS; + private double buildTimeEwma = TARGET_ROUND_TRIP_NANOS; + private double roundTripEwma = TARGET_ROUND_TRIP_NANOS; + private int chunkHint; private volatile KVRangeMessage currentRequest; private volatile long lastReplyTS; - private volatile long lastSendTS; - private volatile double buildTimeEwma = TARGET_ROUND_TRIP_NANOS; - private volatile double roundTripEwma = TARGET_ROUND_TRIP_NANOS; - private volatile int chunkHint; - KVRangeDumpSession(String follower, - SnapshotSyncRequest request, + KVRangeDumpSession(String sessionId, + KVRangeSnapshot snapshot, + KVRangeId receiverRangeId, + String receiverStoreId, IKVRange accessor, IKVRangeMessenger messenger, Duration maxIdleDuration, @@ -80,8 +89,10 @@ class KVRangeDumpSession { SnapshotBandwidthGovernor bandwidthGovernor, DumpBytesRecorder recorder, String... tags) { - this.follower = follower; - this.request = request; + this.sessionId = sessionId; + this.snapshot = snapshot; + this.receiverRangeId = receiverRangeId; + this.receiverStoreId = receiverStoreId; this.messenger = messenger; this.executor = ExecutorServiceMetrics.monitor(Metrics.globalRegistry, new ThreadPoolExecutor(1, 1, @@ -94,43 +105,46 @@ class KVRangeDumpSession { this.bandwidthGovernor = bandwidthGovernor; this.chunkHint = initialChunkHint(bandwidth); this.log = MDCLogger.getLogger(KVRangeDumpSession.class, tags); - if (!request.getSnapshot().hasCheckpointId()) { + if (!snapshot.hasCheckpointId()) { messenger.send(KVRangeMessage.newBuilder() - .setRangeId(request.getSnapshot().getId()) - .setHostStoreId(follower) + .setRangeId(receiverRangeId) + .setHostStoreId(receiverStoreId) .setSaveSnapshotDataRequest(SaveSnapshotDataRequest.newBuilder() - .setSessionId(request.getSessionId()) + .setSessionId(sessionId) .setFlag(SaveSnapshotDataRequest.Flag.End) .build()) .build()); executor.execute(() -> doneSignal.complete(Result.OK)); - } else if (!accessor.hasCheckpoint(request.getSnapshot())) { - log.warn("No checkpoint found for snapshot: {}", request.getSnapshot()); + } else if (!accessor.hasCheckpoint(snapshot)) { + log.warn("No checkpoint found for snapshot: {}", snapshot); messenger.send(KVRangeMessage.newBuilder() - .setRangeId(request.getSnapshot().getId()) - .setHostStoreId(follower) + .setRangeId(receiverRangeId) + .setHostStoreId(receiverStoreId) .setSaveSnapshotDataRequest(SaveSnapshotDataRequest.newBuilder() - .setSessionId(request.getSessionId()) - .setFlag(SaveSnapshotDataRequest.Flag.Error) + .setSessionId(sessionId) + .setFlag(SaveSnapshotDataRequest.Flag.NotFound) .build()) .build()); executor.execute(() -> doneSignal.complete(Result.NoCheckpoint)); } else { - snapshotDataItr = accessor.open(request.getSnapshot()).newDataReader().iterator(); + snapshotReader = accessor.open(snapshot); + snapshotDataItr = snapshotReader.iterator(); snapshotDataItr.seekToFirst(); Disposable disposable = messenger.receive() .mapOptional(m -> { if (m.hasSaveSnapshotDataReply()) { SaveSnapshotDataReply reply = m.getSaveSnapshotDataReply(); - if (reply.getSessionId().equals(request.getSessionId())) { + if (reply.getSessionId().equals(sessionId)) { return Optional.of(reply); } } return Optional.empty(); }) + .observeOn(Schedulers.from(executor)) .subscribe(this::handleReply); doneSignal.whenComplete((v, e) -> { snapshotDataItr.close(); + snapshotReader.close(); disposable.dispose(); }); nextSaveRequest(); @@ -138,11 +152,11 @@ class KVRangeDumpSession { } String id() { - return request.getSessionId(); + return sessionId; } String checkpointId() { - return request.getSnapshot().getCheckpointId(); + return snapshot.getCheckpointId(); } void tick() { @@ -151,7 +165,7 @@ void tick() { } long elapseNanos = Duration.ofNanos(System.nanoTime() - lastReplyTS).toNanos(); if (maxIdleDuration.toNanos() < elapseNanos) { - log.debug("DumpSession idle: session={}, follower={}", request.getSessionId(), follower); + log.debug("DumpSession idle: session={}, follower={}", sessionId, receiverStoreId); cancel(); } else if (maxIdleDuration.toNanos() / 2 < elapseNanos && currentRequest != null) { runner.add(() -> { @@ -164,6 +178,14 @@ void tick() { void cancel() { if (canceled.compareAndSet(false, true)) { + messenger.send(KVRangeMessage.newBuilder() + .setRangeId(receiverRangeId) + .setHostStoreId(receiverStoreId) + .setSaveSnapshotDataRequest(SaveSnapshotDataRequest.newBuilder() + .setSessionId(sessionId) + .setFlag(SaveSnapshotDataRequest.Flag.Error) + .build()) + .build()); runner.add(() -> doneSignal.complete(Result.Canceled)); } } @@ -206,7 +228,7 @@ private void handleReply(SaveSnapshotDataReply reply) { private void nextSaveRequest() { runner.add(() -> { SaveSnapshotDataRequest.Builder reqBuilder = SaveSnapshotDataRequest.newBuilder() - .setSessionId(request.getSessionId()) + .setSessionId(sessionId) .setReqId(reqId.getAndIncrement()); long buildStart = System.nanoTime(); int dumpEntries = 0; @@ -235,13 +257,13 @@ private void nextSaveRequest() { snapshotDataItr.next(); } } catch (Throwable e) { - log.error("DumpSession error: session={}, follower={}", request.getSessionId(), follower, e); + log.error("DumpSession error: session={}, follower={}", sessionId, receiverStoreId, e); reqBuilder.clearKv(); reqBuilder.setFlag(SaveSnapshotDataRequest.Flag.Error); } } if (canceled.get() && reqBuilder.getFlag() != SaveSnapshotDataRequest.Flag.Error) { - log.debug("DumpSession has been canceled: session={}, follower={}", request.getSessionId(), follower); + log.debug("DumpSession has been canceled: session={}, follower={}", sessionId, receiverStoreId); reqBuilder.clearKv(); reqBuilder.setFlag(SaveSnapshotDataRequest.Flag.Error); } @@ -264,8 +286,8 @@ private void nextSaveRequest() { adjustChunkHint(buildCost); } currentRequest = KVRangeMessage.newBuilder() - .setRangeId(request.getSnapshot().getId()) - .setHostStoreId(follower) + .setRangeId(receiverRangeId) + .setHostStoreId(receiverStoreId) .setSaveSnapshotDataRequest(reqBuilder.build()) .build(); long now = System.nanoTime(); @@ -277,11 +299,11 @@ private void nextSaveRequest() { if (reqBuilder.getFlag() == SaveSnapshotDataRequest.Flag.End) { log.info( "Dump snapshot completed: sessionId={}, follower={}, totalEntries={}, totalBytes={}, cost={}ms", - request.getSessionId(), follower, totalEntries, totalBytes, + sessionId, receiverStoreId, totalEntries, totalBytes, TimeUnit.NANOSECONDS.toMillis(now - startDumpTS)); } else { log.info("Dump snapshot data: sessionId={}, follower={}, entries={}, bytes={}", - request.getSessionId(), follower, reqBuilder.getKvCount(), dumpBytes); + sessionId, receiverStoreId, reqBuilder.getKvCount(), dumpBytes); } messenger.send(currentRequest); if (currentRequest.getSaveSnapshotDataRequest().getFlag() == SaveSnapshotDataRequest.Flag.Error) { diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeFSM.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeFSM.java index 58fc2e162..1a9b609db 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeFSM.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeFSM.java @@ -42,35 +42,44 @@ import static org.apache.bifromq.basekv.store.range.KVRangeFSM.Lifecycle.Open; import static org.apache.bifromq.basekv.store.util.ExecutorServiceUtil.awaitShutdown; import static org.apache.bifromq.basekv.store.util.VerUtil.boundaryCompatible; +import static org.apache.bifromq.basekv.store.util.VerUtil.bump; import static org.apache.bifromq.basekv.store.util.VerUtil.print; import static org.apache.bifromq.basekv.utils.BoundaryUtil.NULL_BOUNDARY; import static org.apache.bifromq.basekv.utils.BoundaryUtil.canCombine; import static org.apache.bifromq.basekv.utils.BoundaryUtil.combine; +import static org.apache.bifromq.basekv.utils.BoundaryUtil.endKey; import static org.apache.bifromq.basekv.utils.BoundaryUtil.isSplittable; +import static org.apache.bifromq.basekv.utils.BoundaryUtil.startKey; +import com.google.common.base.Preconditions; import com.google.common.collect.Maps; +import com.google.common.collect.Sets; import com.google.protobuf.Any; import com.google.protobuf.ByteString; -import com.google.protobuf.InvalidProtocolBufferException; import io.micrometer.core.instrument.Metrics; import io.micrometer.core.instrument.Tags; import io.micrometer.core.instrument.binder.jvm.ExecutorServiceMetrics; import io.reactivex.rxjava3.core.Observable; +import io.reactivex.rxjava3.core.ObservableSource; import io.reactivex.rxjava3.disposables.CompositeDisposable; +import io.reactivex.rxjava3.disposables.Disposable; import io.reactivex.rxjava3.schedulers.Schedulers; import io.reactivex.rxjava3.subjects.BehaviorSubject; import io.reactivex.rxjava3.subjects.Subject; import java.time.Duration; -import java.util.HashSet; +import java.util.ArrayList; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Optional; import java.util.Set; +import java.util.UUID; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.Executor; import java.util.concurrent.ExecutorService; import java.util.concurrent.LinkedTransferQueue; +import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; @@ -80,6 +89,7 @@ import java.util.concurrent.locks.StampedLock; import java.util.function.Supplier; import java.util.stream.Collectors; +import org.apache.bifromq.base.util.AsyncRetry; import org.apache.bifromq.base.util.AsyncRunner; import org.apache.bifromq.baseenv.EnvProvider; import org.apache.bifromq.baseenv.ZeroCopyParser; @@ -89,6 +99,7 @@ import org.apache.bifromq.basekv.proto.CancelMergingReply; import org.apache.bifromq.basekv.proto.CancelMergingRequest; import org.apache.bifromq.basekv.proto.ChangeConfig; +import org.apache.bifromq.basekv.proto.DataMergeRequest; import org.apache.bifromq.basekv.proto.Delete; import org.apache.bifromq.basekv.proto.EnsureRange; import org.apache.bifromq.basekv.proto.EnsureRangeReply; @@ -101,6 +112,7 @@ import org.apache.bifromq.basekv.proto.MergeDone; import org.apache.bifromq.basekv.proto.MergeDoneReply; import org.apache.bifromq.basekv.proto.MergeDoneRequest; +import org.apache.bifromq.basekv.proto.MergeHelpRequest; import org.apache.bifromq.basekv.proto.MergeReply; import org.apache.bifromq.basekv.proto.MergeRequest; import org.apache.bifromq.basekv.proto.PrepareMergeTo; @@ -119,18 +131,20 @@ import org.apache.bifromq.basekv.raft.proto.LogEntry; import org.apache.bifromq.basekv.raft.proto.RaftMessage; import org.apache.bifromq.basekv.raft.proto.RaftNodeStatus; +import org.apache.bifromq.basekv.raft.proto.RaftNodeSyncState; import org.apache.bifromq.basekv.raft.proto.Snapshot; -import org.apache.bifromq.basekv.store.api.IKVLoadRecord; import org.apache.bifromq.basekv.store.api.IKVRangeCoProc; import org.apache.bifromq.basekv.store.api.IKVRangeCoProcFactory; -import org.apache.bifromq.basekv.store.api.IKVRangeSplitHinter; -import org.apache.bifromq.basekv.store.api.IKVReader; +import org.apache.bifromq.basekv.store.api.IKVRangeReader; +import org.apache.bifromq.basekv.store.api.IKVRangeRefreshableReader; import org.apache.bifromq.basekv.store.exception.KVRangeException; import org.apache.bifromq.basekv.store.option.KVRangeOptions; import org.apache.bifromq.basekv.store.proto.ROCoProcInput; import org.apache.bifromq.basekv.store.proto.ROCoProcOutput; import org.apache.bifromq.basekv.store.proto.RWCoProcInput; import org.apache.bifromq.basekv.store.proto.RWCoProcOutput; +import org.apache.bifromq.basekv.store.range.hinter.IKVLoadRecord; +import org.apache.bifromq.basekv.store.range.hinter.IKVRangeSplitHinter; import org.apache.bifromq.basekv.store.stats.IStatsCollector; import org.apache.bifromq.basekv.store.util.VerUtil; import org.apache.bifromq.basekv.store.wal.IKVRangeWAL; @@ -159,6 +173,7 @@ public class KVRangeFSM implements IKVRangeFSM { private final ExecutorService fsmExecutor; private final ExecutorService mgmtExecutor; private final AsyncRunner mgmtTaskRunner; + private final IKVRangeCoProcFactory coProcFactory; private final IKVRangeCoProc coProc; private final KVRangeQueryLinearizer linearizer; private final IKVRangeQueryRunner queryRunner; @@ -175,7 +190,7 @@ public class KVRangeFSM implements IKVRangeFSM { private final AtomicReference lifecycle = new AtomicReference<>(Lifecycle.Init); private final CompositeDisposable disposables = new CompositeDisposable(); private final CompletableFuture closeSignal = new CompletableFuture<>(); - private final CompletableFuture quitSignal = new CompletableFuture<>(); + private final CompletableFuture quitSignal = new CompletableFuture<>(); private final CompletableFuture destroyedSignal = new CompletableFuture<>(); private final AtomicLong lastShrinkCheckAt = new AtomicLong(); private final AtomicBoolean shrinkingWAL = new AtomicBoolean(); @@ -183,10 +198,27 @@ public class KVRangeFSM implements IKVRangeFSM { private final List splitHinters; private final StampedLock resetLock = new StampedLock(); private final String[] tags; + private final AtomicReference> quitZombie = new AtomicReference<>(); + private final AtomicBoolean cancelingMerge = new AtomicBoolean(); + private volatile long mergePendingAt = -1; private volatile long zombieAt = -1; private IKVRangeMessenger messenger; private KVRangeRestorer restorer; + /** + * The constructor of KVRange FSM. + * + * @param clusterId the cluster id + * @param hostStoreId the store id that hosts this range + * @param id the range id + * @param coProcFactory the coprocessor factory + * @param kvRange the backing data range + * @param walStore the backing WAL Store + * @param queryExecutor the query executor + * @param bgExecutor the background task executor + * @param opts the options + * @param quitListener the listener to be notified when this range is quitting + */ public KVRangeFSM(String clusterId, String hostStoreId, KVRangeId id, @@ -196,13 +228,16 @@ public KVRangeFSM(String clusterId, Executor queryExecutor, Executor bgExecutor, KVRangeOptions opts, - QuitListener quitListener) { + List hinters, + QuitListener quitListener, + String... tags) { this.opts = opts.toBuilder().build(); this.id = id; - this.hostStoreId = hostStoreId; // keep a local copy to decouple it from store's state + this.hostStoreId = hostStoreId; this.kvRange = kvRange; - tags = new String[] {"clusterId", clusterId, "storeId", hostStoreId, "rangeId", KVRangeIdUtil.toString(id)}; + this.tags = tags; this.log = MDCLogger.getLogger(KVRangeFSM.class, tags); + this.metricManager = new KVRangeMetricManager(clusterId, hostStoreId, id); this.wal = new KVRangeWAL(clusterId, hostStoreId, id, walStore, opts.getWalRaftConfig(), opts.getMaxWALFatchBatchSize()); this.fsmExecutor = ExecutorServiceMetrics.monitor(Metrics.globalRegistry, @@ -217,17 +252,18 @@ public KVRangeFSM(String clusterId, "manager", "basekv.range", Tags.of(tags)); this.mgmtTaskRunner = new AsyncRunner("basekv.runner.rangemanager", mgmtExecutor, "rangeId", KVRangeIdUtil.toString(id)); - this.splitHinters = coProcFactory.createHinters(clusterId, hostStoreId, id, this.kvRange::newDataReader); - this.coProc = coProcFactory.createCoProc(clusterId, hostStoreId, id, this.kvRange::newDataReader); + this.splitHinters = hinters; + this.coProcFactory = coProcFactory; + this.coProc = coProcFactory.createCoProc(clusterId, hostStoreId, id, this.kvRange::newReader); this.snapshotBandwidthGovernor = new SnapshotBandwidthGovernor(opts.getSnapshotSyncBytesPerSec()); - long lastAppliedIndex = this.kvRange.lastAppliedIndex(); - this.linearizer = new KVRangeQueryLinearizer(wal::readIndex, queryExecutor, lastAppliedIndex, tags); + long lastAppliedIndex = this.kvRange.lastAppliedIndex().blockingFirst(); + this.linearizer = new KVRangeQueryLinearizer(wal::readIndex, queryExecutor, lastAppliedIndex, + metricManager::recordLinearization, tags); this.queryRunner = new KVRangeQueryRunner(this.kvRange, coProc, queryExecutor, linearizer, splitHinters, this::latestDescriptor, resetLock, tags); this.statsCollector = new KVRangeStatsCollector(this.kvRange, wal, Duration.ofSeconds(opts.getStatsCollectIntervalSec()), bgExecutor); - this.metricManager = new KVRangeMetricManager(clusterId, hostStoreId, id); this.walSubscription = wal.subscribe(lastAppliedIndex, new IKVRangeWALSubscriber() { @Override @@ -242,7 +278,7 @@ public CompletableFuture restore(KVRangeSnapshot snapshot, String leader, () -> KVRangeFSM.this.restore(snapshot, leader, callback)); } }, fsmExecutor); - quitSignal.thenAccept(v -> quitListener.onQuit(this)); + quitSignal.thenAccept(reset -> quitListener.onQuit(this, reset)); } @Override @@ -251,12 +287,12 @@ public KVRangeId id() { } public long ver() { - return kvRange.version(); + return kvRange.currentVer(); } @Override public Boundary boundary() { - return kvRange.boundary(); + return kvRange.currentBoundary(); } @Override @@ -267,9 +303,11 @@ public CompletableFuture open(IKVRangeMessenger messenger) { return mgmtTaskRunner.add(() -> { if (lifecycle.compareAndSet(Init, Lifecycle.Opening)) { log.info("Opening range: appliedIndex={}, state={}, ver={}", - kvRange.lastAppliedIndex(), kvRange.state().getType(), print(kvRange.version())); + kvRange.currentLastAppliedIndex(), + kvRange.state().blockingFirst().getType(), + print(kvRange.currentVer())); this.messenger = messenger; - factSubject.onNext(reset(kvRange.boundary())); + factSubject.onNext(reset(kvRange.boundary().blockingFirst())); // start the wal wal.start(); this.restorer = new KVRangeRestorer(wal.latestSnapshot(), kvRange, messenger, @@ -288,41 +326,59 @@ public CompletableFuture open(IKVRangeMessenger messenger) { } })); disposables.add(descriptorSubject.subscribe(metricManager::report)); - disposables.add(Observable.combineLatest( - kvRange.metadata().distinctUntilChanged(), - wal.state().distinctUntilChanged(), - wal.replicationStatus().distinctUntilChanged(), - statsCollector.collect().distinctUntilChanged(), - splitHintsSubject.distinctUntilChanged(), - factSubject.distinctUntilChanged(), - queryReadySubject.distinctUntilChanged() - .switchMap(v -> { - if (v) { - return Observable.timer(5, TimeUnit.SECONDS, Schedulers.from(mgmtExecutor)) - .map(t -> true); - } else { - return Observable.just(false); - } - }) - .distinctUntilChanged(), - (meta, - role, - syncStats, - rangeStats, - splitHints, - fact, - readyForQuery) -> { + disposables.add(Observable.combineLatestArray( + new ObservableSource[] {kvRange.ver(), + kvRange.state(), + kvRange.boundary(), + kvRange.clusterConfig(), + wal.state().distinctUntilChanged(), + wal.replicationStatus().distinctUntilChanged(), + statsCollector.collect().distinctUntilChanged(), + splitHintsSubject.distinctUntilChanged(), + factSubject.distinctUntilChanged(), + queryReadySubject.distinctUntilChanged() + .switchMap(v -> { + if (v) { + return Observable.timer(5, TimeUnit.SECONDS, Schedulers.from(mgmtExecutor)) + .map(t -> true); + } else { + return Observable.just(false); + } + }) + .distinctUntilChanged()}, + (latest) -> { + long ver = (long) latest[0]; + State state = (State) latest[1]; + Boundary boundary = (Boundary) latest[2]; + ClusterConfig clusterConfig = (ClusterConfig) latest[3]; + RaftNodeStatus role = (RaftNodeStatus) latest[4]; + @SuppressWarnings("unchecked") + Map syncStats = (Map) latest[5]; + @SuppressWarnings("unchecked") + Map rangeStats = (Map) latest[6]; + @SuppressWarnings("unchecked") + List splitHints = (List) latest[7]; + Any fact = (Any) latest[8]; + boolean readyForQuery = (boolean) latest[9]; log.trace("Split hints: \n{}", splitHints); + List alignedHints = splitHints.stream().map(h -> { + if (h.hasSplitKey()) { + return coProcFactory.toSplitKey(h.getSplitKey(), boundary) + .map(k -> h.toBuilder().setSplitKey(k).build()) + .orElseGet(() -> h.toBuilder().clearSplitKey().build()); + } + return h; + }).toList(); return KVRangeDescriptor.newBuilder() - .setVer(meta.ver()) + .setVer(ver) .setId(id) - .setBoundary(meta.boundary()) + .setBoundary(boundary) .setRole(role) - .setState(meta.state().getType()) - .setConfig(meta.clusterConfig()) + .setState(state.getType()) + .setConfig(clusterConfig) .putAllSyncState(syncStats) .putAllStatistics(rangeStats) - .addAllHints(splitHints) + .addAllHints(alignedHints) .setHlc(HLC.INST.get()) .setFact(fact) .setReadyForQuery(readyForQuery) @@ -336,9 +392,11 @@ public CompletableFuture open(IKVRangeMessenger messenger) { .observeOn(Schedulers.from(mgmtExecutor)) .subscribe(role -> coProc.onLeader(role == RaftNodeStatus.Leader))); lifecycle.set(Open); - metricManager.reportLastAppliedIndex(kvRange.lastAppliedIndex()); + metricManager.reportLastAppliedIndex(kvRange.lastAppliedIndex().blockingFirst()); log.info("Range opened: appliedIndex={}, state={}, ver={}", - kvRange.lastAppliedIndex(), kvRange.state().getType(), print(kvRange.version())); + kvRange.currentLastAppliedIndex(), + kvRange.state().blockingFirst().getType(), + print(kvRange.currentVer())); // make sure latest snapshot exists if (!kvRange.hasCheckpoint(wal.latestSnapshot())) { log.debug("Latest snapshot not available, do compaction: \n{}", wal.latestSnapshot()); @@ -357,8 +415,9 @@ public void tick() { statsCollector.tick(); dumpSessions.values().forEach(KVRangeDumpSession::tick); shrinkWAL(); - checkAndRepairFromZombieState(); + judgeZombieState(); estimateSplitHint(); + checkMergeTimeout(); } @Override @@ -379,26 +438,38 @@ private CompletableFuture doClose() { log.info("Closing range"); descriptorSubject.onComplete(); disposables.dispose(); - walSubscription.stop(); - splitHinters.forEach(IKVRangeSplitHinter::close); - coProc.close(); - CompletableFuture.allOf(dumpSessions.values() + CompletableFuture.completedFuture(null) + .thenComposeAsync(v -> walSubscription.stop(), mgmtExecutor) + .thenAcceptAsync(v -> { + try { + splitHinters.forEach(IKVRangeSplitHinter::close); + } catch (Throwable e) { + log.error("Split hinter close error", e); + } + try { + coProc.close(); + } catch (Throwable e) { + log.error("CoProc close error", e); + } + }, mgmtExecutor) + .thenComposeAsync(v -> CompletableFuture.allOf(dumpSessions.values() .stream() .map(dumpSession -> { dumpSession.cancel(); return dumpSession.awaitDone(); }) - .toArray(CompletableFuture[]::new)) - .thenCompose(v -> restorer.awaitDone()) - .thenCompose(v -> statsCollector.stop()) - .thenCompose(v -> mgmtTaskRunner.awaitDone()) - .thenCompose(v -> wal.close()) - .thenCompose(v -> awaitShutdown(fsmExecutor)) + .toArray(CompletableFuture[]::new)), mgmtExecutor) + .thenComposeAsync(v -> restorer.awaitDone(), mgmtExecutor) + .thenComposeAsync(v -> statsCollector.stop(), mgmtExecutor) + .thenComposeAsync(v -> mgmtTaskRunner.awaitDone(), mgmtExecutor) + .thenComposeAsync(v -> wal.close(), mgmtExecutor) + .thenComposeAsync(v -> awaitShutdown(fsmExecutor), mgmtExecutor) .whenComplete((v, e) -> { kvRange.close(); metricManager.close(); cmdFutures.values() - .forEach(f -> f.completeExceptionally(new KVRangeException.TryLater("Range closed"))); + .forEach( + f -> f.completeExceptionally(new KVRangeException.TryLater("Range closed"))); queryRunner.close(); log.info("Range closed"); lifecycle.set(Closed); @@ -418,13 +489,13 @@ public CompletableFuture destroy() { if (lifecycle.get() == Open) { log.info("Destroying range"); doClose() - .thenCompose(v -> { + .thenComposeAsync(v -> { if (lifecycle.compareAndSet(Closed, Destroying)) { kvRange.destroy(); return wal.destroy(); } return CompletableFuture.completedFuture(null); - }) + }, mgmtExecutor) .whenComplete((v, e) -> { if (lifecycle.compareAndSet(Destroying, Destroyed)) { log.info("Range destroyed"); @@ -441,16 +512,22 @@ public CompletableFuture recover() { return wal.recover(); } + @Override + public CompletableFuture quit() { + quitZombie.compareAndSet(null, new CompletableFuture<>()); + return quitZombie.get(); + } + @Override public CompletableFuture transferLeadership(long ver, String newLeader) { return metricManager.recordTransferLeader(() -> { - if (ver != kvRange.version()) { + if (ver != kvRange.currentVer()) { // version not exactly match return CompletableFuture.failedFuture( new KVRangeException.BadVersion("Version Mismatch", latestLeaderDescriptor())); } log.info("Transferring leader[ver={}, state={}]: newLeader={}", - print(ver), kvRange.state().getType(), newLeader); + print(ver), kvRange.currentState().getType(), newLeader); return wal.transferLeadership(newLeader) .exceptionally(unwrap(e -> { if (e instanceof LeaderTransferException.NotFoundOrQualifiedException) { @@ -473,7 +550,8 @@ public CompletableFuture changeReplicaConfig(long ver, Set newVote return changeReplicaConfig(nextTaskId(), ver, newVoters, newLearners); } - private CompletableFuture changeReplicaConfig(String taskId, long ver, + private CompletableFuture changeReplicaConfig(String taskId, + long ver, Set newVoters, Set newLearners) { return metricManager.recordConfigChange(() -> submitManagementCommand(KVRangeCommand.newBuilder() @@ -499,12 +577,13 @@ public CompletableFuture split(long ver, ByteString splitKey) { } @Override - public CompletableFuture merge(long ver, KVRangeId mergeeId) { + public CompletableFuture merge(long ver, KVRangeId mergeeId, Set mergeeVoters) { return metricManager.recordMerge(() -> submitManagementCommand(KVRangeCommand.newBuilder() .setTaskId(nextTaskId()) .setVer(ver) .setPrepareMergeWith(PrepareMergeWith.newBuilder() .setMergeeId(mergeeId) + .addAllVoters(mergeeVoters) .buildPartial()) .build())); } @@ -572,11 +651,11 @@ private CompletableFuture submitMutationCommand(KVRangeCommand mutationCo return CompletableFuture.failedFuture( new KVRangeException.InternalException("Range not open:" + KVRangeIdUtil.toString(id))); } - if (!boundaryCompatible(mutationCommand.getVer(), kvRange.version())) { + if (!boundaryCompatible(mutationCommand.getVer(), kvRange.currentVer())) { return CompletableFuture.failedFuture( new KVRangeException.BadVersion("Version Mismatch", latestLeaderDescriptor())); } - State state = kvRange.state(); + State state = kvRange.currentState(); if (state.getType() == NoUse || state.getType() == WaitingForMerge || state.getType() == Merged @@ -594,7 +673,7 @@ private CompletableFuture submitManagementCommand(KVRangeCommand manageme return CompletableFuture.failedFuture( new KVRangeException.InternalException("Range not open:" + KVRangeIdUtil.toString(id))); } - if (managementCommand.getVer() != kvRange.version()) { + if (managementCommand.getVer() != kvRange.currentVer()) { return CompletableFuture.failedFuture( new KVRangeException.BadVersion("Version Mismatch", latestLeaderDescriptor())); } @@ -607,7 +686,7 @@ public Observable describe() { } private String nextTaskId() { - return hostStoreId + "-" + taskSeqNo.getAndIncrement(); + return hostStoreId + "-" + id.getId() + "-" + taskSeqNo.getAndIncrement(); } @SuppressWarnings("unchecked") @@ -657,7 +736,7 @@ private void finishCommandWithError(String taskId, Throwable e) { private CompletableFuture apply(LogEntry entry, boolean isLeader) { CompletableFuture onDone = new CompletableFuture<>(); - if (kvRange.lastAppliedIndex() > entry.getIndex()) { + if (kvRange.currentLastAppliedIndex() > entry.getIndex()) { // skip already applied log log.debug("Skip already applied log: index={}, term={}", entry.getIndex(), entry.getTerm()); onDone.complete(null); @@ -666,9 +745,9 @@ private CompletableFuture apply(LogEntry entry, boolean isLeader) { switch (entry.getTypeCase()) { case CONFIG -> { IKVRangeWriter rangeWriter = kvRange.toWriter(); - try { + try (IKVRangeRefreshableReader rangeReader = kvRange.newReader()) { Supplier> afterLogApplied = applyConfigChange(entry.getTerm(), - entry.getIndex(), entry.getConfig(), rangeWriter); + entry.getIndex(), entry.getConfig(), rangeReader, rangeWriter); rangeWriter.lastAppliedIndex(entry.getIndex()); rangeWriter.done(); afterLogApplied.get() @@ -689,48 +768,53 @@ private CompletableFuture apply(LogEntry entry, boolean isLeader) { } } case DATA -> { + IKVLoadRecorder loadRecorder = new KVLoadRecorder(); + IKVRangeWriter rangeWriter = kvRange.toWriter(loadRecorder); + IKVRangeRefreshableReader rangeReader = new LoadRecordableKVReader(kvRange.newReader(), loadRecorder); try { KVRangeCommand command = ZeroCopyParser.parse(entry.getData(), KVRangeCommand.parser()); - IKVLoadRecorder loadRecorder = new KVLoadRecorder(); - IKVRangeWriter rangeWriter = kvRange.toWriter(loadRecorder); - IKVReader borrowedReader = kvRange.borrowDataReader(); - IKVReader recordableReader = new LoadRecordableKVReader(borrowedReader, loadRecorder); - long version = kvRange.version(); - State state = kvRange.state(); - Boundary boundary = kvRange.boundary(); - ClusterConfig clusterConfig = kvRange.clusterConfig(); - applyCommand(isLeader, version, state, boundary, clusterConfig, - entry.getTerm(), entry.getIndex(), command, recordableReader, rangeWriter) - .whenComplete((callback, e) -> { - if (onDone.isCancelled()) { - rangeWriter.abort(); - } else { - try { - if (e != null) { - rangeWriter.abort(); - onDone.completeExceptionally(e); - } else { - rangeWriter.lastAppliedIndex(entry.getIndex()); - rangeWriter.done(); - if (command.hasRwCoProc()) { - IKVLoadRecord loadRecord = loadRecorder.stop(); - splitHinters.forEach( - hint -> hint.recordMutate(command.getRwCoProc(), loadRecord)); + CompletableFuture applyFuture = applyCommand(isLeader, entry.getTerm(), entry.getIndex(), + command, rangeReader, rangeWriter) + .whenCompleteAsync((callback, e) -> { + try { + if (onDone.isCancelled()) { + rangeWriter.abort(); + } else { + try { + if (e != null) { + log.debug("Failed to apply log: {}", log, e); + rangeWriter.abort(); + onDone.completeExceptionally(e); + } else { + rangeWriter.lastAppliedIndex(entry.getIndex()); + rangeWriter.done(); + if (command.hasRwCoProc()) { + IKVLoadRecord loadRecord = loadRecorder.stop(); + splitHinters.forEach( + hint -> hint.recordMutate(command.getRwCoProc(), loadRecord)); + } + callback.run(); + linearizer.afterLogApplied(entry.getIndex()); + metricManager.reportLastAppliedIndex(entry.getIndex()); + onDone.complete(null); } - callback.run(); - linearizer.afterLogApplied(entry.getIndex()); - metricManager.reportLastAppliedIndex(entry.getIndex()); - onDone.complete(null); + } catch (Throwable t) { + log.error("Failed to apply log", t); + onDone.completeExceptionally(t); } - } catch (Throwable t) { - log.error("Failed to apply log", t); - onDone.completeExceptionally(t); } + } finally { + rangeReader.close(); } - kvRange.returnDataReader(borrowedReader); - }); + }, fsmExecutor); + onDone.whenCompleteAsync((v, e) -> { + if (onDone.isCancelled()) { + applyFuture.cancel(true); + } + }); } catch (Throwable t) { - log.error("Failed to apply command", t); + rangeReader.close(); + log.error("Failed to apply log: {}", log, t); onDone.completeExceptionally(t); } } @@ -744,10 +828,12 @@ private CompletableFuture apply(LogEntry entry, boolean isLeader) { private Supplier> applyConfigChange(long term, long index, ClusterConfig config, + IKVRangeReader rangeReader, IKVRangeWritable rangeWriter) { - State state = rangeWriter.state(); - log.info("Apply new config[term={}, index={}]: state={}, leader={}\n{}", - term, index, state, wal.isLeader(), config); + long ver = rangeReader.version(); + State state = rangeReader.state(); + log.info("Apply new config[term={}, index={}]: state={}, ver={}, leader={}\n{}", + term, index, state, print(ver), wal.isLeader(), config); rangeWriter.clusterConfig(config); if (config.getNextVotersCount() != 0 || config.getNextLearnersCount() != 0) { // skip joint-config @@ -760,7 +846,7 @@ private Supplier> applyConfigChange(long term, case ConfigChanging -> { // reset back to normal String taskId = state.getTaskId(); - rangeWriter.bumpVer(false); + rangeWriter.ver(bump(ver, false)); if (taskId.equals(config.getCorrelateId())) { // request config change success, requested config applied boolean remove = !members.contains(hostStoreId); @@ -771,7 +857,7 @@ private Supplier> applyConfigChange(long term, .setTaskId(taskId) .build()); return () -> { - quitSignal.complete(null); + quitSignal.complete(false); finishCommand(taskId); return CompletableFuture.completedFuture(null); }; @@ -780,28 +866,19 @@ private Supplier> applyConfigChange(long term, .setType(Normal) .setTaskId(taskId) .build()); - return () -> compactWAL().thenRun(() -> finishCommand(taskId)); + return () -> compactWAL().thenRunAsync(() -> finishCommand(taskId), fsmExecutor); } } else { - // request config change failed, the config entry is appended due to leader reelection - boolean remove = !members.contains(hostStoreId); - if (remove) { - rangeWriter.state(State.newBuilder() - .setType(Removed) - .setTaskId(taskId) - .build()); - return () -> { - quitSignal.complete(null); - finishCommand(taskId); - return CompletableFuture.completedFuture(null); - }; - } else { - rangeWriter.state(State.newBuilder() - .setType(Normal) - .setTaskId(taskId) - .build()); - return () -> compactWAL().thenRun(() -> finishCommand(taskId)); - } + rangeWriter.state(State.newBuilder() + .setType(Normal) + .setTaskId(taskId) + .build()); + // config entry append during leader change + return () -> { + finishCommandWithError(taskId, + new KVRangeException.TryLater("ConfigChange aborted by leader changes")); + return CompletableFuture.completedFuture(null); + }; } } case MergedQuiting -> { @@ -818,11 +895,11 @@ private Supplier> applyConfigChange(long term, .setTaskId(taskId) .build()); } - rangeWriter.bumpVer(false); + rangeWriter.ver(bump(ver, false)); return () -> { finishCommand(taskId); if (remove) { - quitSignal.complete(null); + quitSignal.complete(false); } return CompletableFuture.completedFuture(null); }; @@ -837,7 +914,7 @@ private Supplier> applyConfigChange(long term, .build()); return () -> { finishCommand(taskId); - quitSignal.complete(null); + quitSignal.complete(false); return CompletableFuture.completedFuture(null); }; } else { @@ -845,11 +922,11 @@ private Supplier> applyConfigChange(long term, .setType(Normal) .setTaskId(taskId) .build()); - return () -> compactWAL().thenRun(() -> { + return () -> compactWAL().thenRunAsync(() -> { // purge failed due to leader change, reset back to normal log.debug("Purge failed due to leader change[newConfig={}]", config); finishCommand(taskId); - }); + }, fsmExecutor); } } default -> { @@ -860,18 +937,18 @@ private Supplier> applyConfigChange(long term, } private CompletableFuture applyCommand(boolean isLeader, - long ver, - State state, - Boundary boundary, - ClusterConfig clusterConfig, long logTerm, long logIndex, KVRangeCommand command, - IKVReader dataReader, + IKVRangeRefreshableReader rangeReader, IKVRangeWritable rangeWriter) { CompletableFuture onDone = new CompletableFuture<>(); long reqVer = command.getVer(); String taskId = command.getTaskId(); + long ver = rangeReader.version(); + State state = rangeReader.state(); + Boundary boundary = rangeReader.boundary(); + ClusterConfig clusterConfig = rangeReader.clusterConfig(); if (log.isTraceEnabled()) { log.trace("Execute KVRange Command[term={}, index={}, taskId={}]: ver={}, state={}, \n{}", logTerm, logIndex, taskId, print(ver), state, command); @@ -1026,7 +1103,7 @@ logTerm, logIndex, taskId, print(ver), state, newConfig.getVotersList(), .build()); } } - rangeWriter.bumpVer(false); + rangeWriter.ver(bump(ver, false)); onDone.complete(NOOP); } }, fsmExecutor); @@ -1066,33 +1143,26 @@ logTerm, logIndex, taskId, print(ver), state, .setTaskId(taskId) .build()) .build(); - rangeWriter.boundary(leftBoundary).bumpVer(true); + rangeWriter.boundary(leftBoundary).ver(bump(ver, true)); // migrate data to right-hand keyspace which created implicitly - rangeWriter.migrateTo(request.getNewId(), rightBoundary) - .resetVer(rhsSS.getVer()) - .boundary(rhsSS.getBoundary()) - .lastAppliedIndex(rhsSS.getLastAppliedIndex()) - .state(rhsSS.getState()) - .clusterConfig(rhsSS.getClusterConfig()) - .done(); - onDone.complete(() -> { - try { - log.debug("Range split completed[taskId={}]", taskId); - // reset hinter when boundary changed - splitHinters.forEach(hinter -> hinter.reset(leftBoundary)); - factSubject.onNext(reset(leftBoundary)); - } catch (Throwable t) { - log.error("Failed to reset hinter or coProc", t); - finishCommandWithError(taskId, t); - } finally { - finishCommand(taskId); + rangeWriter.migrateTo(request.getNewId(), rhsSS); + onDone.complete(() -> compactWAL().whenCompleteAsync((v, e) -> { + if (e != null) { + log.error("WAL compact failed after split", e); + quitSignal.complete(true); + return; } + log.debug("Range split completed[taskId={}]", taskId); + resetHinterAndCoProc(leftBoundary); + finishCommand(taskId); messenger.once(KVRangeMessage::hasEnsureRangeReply) .orTimeout(300, TimeUnit.SECONDS) - .whenCompleteAsync((v, e) -> { - if (e != null || v.getEnsureRangeReply().getResult() != EnsureRangeReply.Result.OK) { + .whenCompleteAsync((rangeMsg, t) -> { + if (t != null + || + rangeMsg.getEnsureRangeReply().getResult() != EnsureRangeReply.Result.OK) { log.error("Failed to load rhs range[taskId={}]: newRangeId={}", - taskId, request.getNewId(), e); + taskId, request.getNewId(), t); } }, fsmExecutor); // ensure the new range is loaded in the store @@ -1111,7 +1181,7 @@ logTerm, logIndex, taskId, print(ver), state, .setData(rhsSS.toByteString()) .build()) .build()).build()); - }); + }, fsmExecutor)); } else { onDone.complete(() -> finishCommandWithError(taskId, new KVRangeException.BadRequest("Invalid split key"))); @@ -1129,315 +1199,313 @@ logTerm, logIndex, taskId, print(ver), state, "Merge abort, range is in state:" + state.getType().name()))); break; } - log.info( - "Merging[term={}, index={}, taskId={}, ver={}, state={}]: mergerId={}, mergeeId={}", - logTerm, logIndex, taskId, print(ver), state, - KVRangeIdUtil.toString(id), KVRangeIdUtil.toString(command.getPrepareMergeWith().getMergeeId())); + if (!clusterConfig.getNextVotersList().isEmpty() || !clusterConfig.getNextLearnersList().isEmpty()) { + onDone.complete(() -> finishCommandWithError(taskId, + new KVRangeException.TryLater("Merge abort, range is in config changing"))); + break; + } - CompletableFuture onceFuture = messenger.once(m -> m.hasPrepareMergeToReply() - && m.getPrepareMergeToReply().getTaskId().equals(taskId) - && m.getPrepareMergeToReply().getAccept()); + PrepareMergeWith request = command.getPrepareMergeWith(); + if (request.getVotersList().isEmpty()) { + onDone.complete(() -> finishCommandWithError(taskId, + new KVRangeException.BadRequest("Merge abort, empty mergee voter set"))); + break; + } + boolean isVoter = clusterConfig.getVotersList().contains(hostStoreId); + if (!isVoter) { + rangeWriter + .ver(bump(ver, false)) + .state(State.newBuilder() + .setType(PreparedMerging) + .setTaskId(taskId) + .build()); + onDone.complete(NOOP); + break; + } + CompletableFuture requestFuture = trySendPrepareMergeToRequest(taskId, request.getMergeeId(), ver, + boundary, request.getVotersList(), + clusterConfig) + .thenAcceptAsync(v -> { + rangeWriter + .ver(bump(ver, false)) + .state(State.newBuilder() + .setType(PreparedMerging) + .setTaskId(taskId) + .build()); + onDone.complete(NOOP); + }, fsmExecutor); onDone.whenCompleteAsync((v, e) -> { if (onDone.isCancelled()) { - onceFuture.cancel(true); + requestFuture.cancel(true); } - }, fsmExecutor); - onceFuture.orTimeout(5, TimeUnit.SECONDS) - .whenCompleteAsync((v, e) -> { - if (e != null) { - onceFuture.cancel(true); - onDone.completeExceptionally(e); - } else { - Map waitingList = clusterConfig.getVotersList().stream() - .collect(Collectors.toMap(voter -> voter, voter -> false)); - rangeWriter.state(State.newBuilder() - .setType(PreparedMerging) + }); + } + case CANCELMERGING -> { + switch (state.getType()) { + case PreparedMerging -> { + // merger cancel workflow + if (reqVer != ver) { + onDone.complete(NOOP); + break; + } + log.info("Merger canceled[term={}, index={}, taskId={}, ver={}, state={}]", + logTerm, logIndex, taskId, print(ver), state); + rangeWriter + .ver(bump(ver, false)) + .state(State.newBuilder() + .setType(Normal) .setTaskId(taskId) - .putAllWaitingList(waitingList) .build()); - rangeWriter.bumpVer(false); + onDone.complete(() -> compactWAL() + .whenCompleteAsync((v, e) -> { + if (e != null) { + log.error("WAL compact failed after merger cancel", e); + } + finishCommandWithError(taskId, new KVRangeException.TryLater("Merger canceled")); + }, fsmExecutor)); + } + case WaitingForMerge -> { + // mergee cancel workflow + if (reqVer != ver) { onDone.complete(NOOP); + break; } - }, fsmExecutor); - - PrepareMergeWith request = command.getPrepareMergeWith(); - // broadcast - messenger.send(KVRangeMessage.newBuilder() - .setRangeId(request.getMergeeId()) - .setPrepareMergeToRequest(PrepareMergeToRequest.newBuilder() - .setTaskId(taskId) - .setId(id) - .setVer(VerUtil.bump(ver, false)) - .setBoundary(boundary) - .setConfig(clusterConfig) - .build()) - .build()); - } - case CANCELMERGING -> { - if (reqVer != ver) { - onDone.complete(NOOP); - break; - } - log.info( - "Merge canceled[term={}, index={}, taskId={}, ver={}, state={}]: mergerId={}, mergeeId={}", - logTerm, logIndex, taskId, print(ver), state, - KVRangeIdUtil.toString(id), KVRangeIdUtil.toString(command.getPrepareMergeWith().getMergeeId())); - if (state.getType() == PreparedMerging && state.hasTaskId() && taskId.equals(state.getTaskId())) { - rangeWriter.state(State.newBuilder() - .setType(Normal) - .setTaskId(taskId) - .build()); - rangeWriter.bumpVer(false); - onDone.complete( - () -> finishCommandWithError(taskId, new KVRangeException.TryLater("Merge canceled"))); - } else { - onDone.complete(NOOP); + log.info("Mergee canceled[term={}, index={}, taskId={}, ver={}, state={}]", + logTerm, logIndex, taskId, print(ver), state); + rangeWriter.state(State.newBuilder() + .setType(Normal) + .setTaskId(taskId) + .build()); + rangeWriter.ver(bump(ver, false)); + onDone.complete(() -> compactWAL() + .whenComplete((v, e) -> { + if (e != null) { + log.error("WAL compact failed after mergee cancel", e); + } + })); + } + default -> onDone.complete(NOOP); } } case PREPAREMERGETO -> { PrepareMergeTo request = command.getPrepareMergeTo(); - if (reqVer != ver) { + // skip PrepareMergeTo command either redundant from same merger or another merger + // which will not receive any response and cancel itself eventually + if (state.getType() == WaitingForMerge) { onDone.complete(NOOP); break; } - // here is the formal mergeable condition check + // we don't compare request ver and current ver here + // the formal mergeable condition check + boolean isVoter = clusterConfig.getVotersList().contains(hostStoreId); + ClusterConfig mergerConfig = request.getConfig(); + List mergerVoters = mergerConfig.getVotersList(); if (state.getType() != Normal - || !isCompatible(request.getConfig(), clusterConfig) + || !clusterConfig.getNextVotersList().isEmpty() // config changing + || !clusterConfig.getNextLearnersList().isEmpty() // config changing || !canCombine(request.getBoundary(), boundary)) { - if (!taskId.equals(state.getTaskId())) { - log.debug("Cancel the loser merger[{}]", - KVRangeIdUtil.toString(request.getMergerId())); - // help the loser merger cancel its operation by broadcast CancelMerging - // via store message and wait for at lease one response to make sure - // the loser merger has got canceled - log.debug("Loser merger[{}] not found in local store", - KVRangeIdUtil.toString(request.getMergerId())); - CompletableFuture onceFuture = messenger.once(m -> - m.hasCancelMergingReply() - && m.getCancelMergingReply().getTaskId().equals(taskId) - && m.getCancelMergingReply().getAccept()); - // cancel the future - onDone.whenCompleteAsync((v, e) -> { - if (onceFuture.isCancelled()) { - onceFuture.cancel(true); - } - }, fsmExecutor); - onceFuture.orTimeout(5, TimeUnit.SECONDS) - .whenCompleteAsync((v, e) -> { - if (e != null) { - onDone.completeExceptionally(e); - } else { - onDone.complete(NOOP); - } - }, fsmExecutor); - // broadcast - messenger.send(KVRangeMessage.newBuilder() - .setRangeId(request.getMergerId()) - .setCancelMergingRequest(CancelMergingRequest.newBuilder() - .setTaskId(taskId) - .setVer(request.getMergerVer()) - .setRequester(id) - .build()) - .build()); - } else { - // ignore duplicated requests from same merger + if (!isVoter) { + rangeWriter.ver(bump(ver, false)); onDone.complete(NOOP); + break; } + CompletableFuture requestFuture = trySendCancelMergingRequest(taskId, request.getMergerId(), + request.getMergerVer(), mergerVoters, + clusterConfig.getVotersList()) + .whenCompleteAsync((reply, e) -> { + rangeWriter.ver(bump(ver, false)); + onDone.complete(NOOP); + }, fsmExecutor); + onDone.whenCompleteAsync((v, e) -> { + if (onDone.isCancelled()) { + requestFuture.cancel(true); + } + }); break; } - // tell merger replica to start merge by broadcast Merge command which may end up - // multiple Merge commands targeting to same merger replica - CompletableFuture onceFuture = messenger.once(m -> - m.hasMergeReply() - && m.getMergeReply().getTaskId().equals(taskId) - && m.getMergeReply().getAccept()); - // cancel the future - onDone.whenCompleteAsync((v, e) -> { - if (onDone.isCancelled()) { - onceFuture.cancel(true); - } - }, fsmExecutor); - onceFuture.orTimeout(5, TimeUnit.SECONDS) - .whenCompleteAsync((v, e) -> { - if (e != null) { - onceFuture.cancel(true); - onDone.completeExceptionally(e); - } else { - rangeWriter.state(State.newBuilder() + // merge condition met + if (!isVoter) { + rangeWriter + .ver(bump(ver, false)) + .state(State.newBuilder() + .setType(WaitingForMerge) + .setTaskId(taskId) + .build()); + onDone.complete(NOOP); + break; + } + CompletableFuture requestFuture = trySendMergeRequest(taskId, request.getMergerId(), + request.getMergerVer(), ver, boundary, mergerVoters, clusterConfig) + .thenAcceptAsync(v -> { + rangeWriter + .ver(bump(ver, false)) + .state(State.newBuilder() .setType(WaitingForMerge) .setTaskId(taskId) .build()); - rangeWriter.bumpVer(false); - onDone.complete(NOOP); - } + onDone.complete(NOOP); }, fsmExecutor); - // broadcast - messenger.send(KVRangeMessage.newBuilder() - .setRangeId(request.getMergerId()) - .setMergeRequest(MergeRequest.newBuilder() - .setTaskId(taskId) - .setVer(request.getMergerVer()) - .setMergeeId(id) - .setMergeeVer(VerUtil.bump(ver, false)) - .setBoundary(boundary) - .setStoreId(hostStoreId) - .build()) - .build()); + onDone.whenCompleteAsync((v, e) -> { + if (onDone.isCancelled()) { + requestFuture.cancel(true); + } + }); } case MERGE -> { Merge request = command.getMerge(); - if (reqVer != ver) { - onDone.complete(NOOP); - break; - } - assert state.getType() == PreparedMerging; - Map waitingList = Maps.newHashMap(state.getWaitingListMap()); - // waiting list only track voter's progress - if (waitingList.containsKey(request.getStoreId())) { - waitingList.put(request.getStoreId(), true); - } - int quorumSize = waitingList.size() / 2 + 1; - int readyNum = Maps.filterValues(waitingList, v -> v).size(); - if (readyNum < quorumSize) { - // not ready for merge - // update waiting list only - rangeWriter.state(state.toBuilder() - .clearWaitingList() - .putAllWaitingList(waitingList) - .build()); - onDone.complete(NOOP); - break; - } - // quorum meets - CompletableFuture readyToMerge; - if (waitingList.containsKey(hostStoreId) && waitingList.get(hostStoreId)) { - // if local merger is a voter and is ready to merge - readyToMerge = CompletableFuture.completedFuture(null); - } else { - // waiting for Merge command from local mergee committed in merger's WAL - readyToMerge = wal.once(logIndex, - l -> { - if (l.hasData()) { - try { - KVRangeCommand nextCmd = ZeroCopyParser.parse(l.getData(), KVRangeCommand.parser()); - if (nextCmd.hasMerge() - && nextCmd.getMerge().getStoreId().equals(hostStoreId)) { - return true; - } - } catch (InvalidProtocolBufferException ex) { - throw new KVRangeException.InternalException("Unable to parse logEntry", - ex); + switch (state.getType()) { + case PreparedMerging -> { + if (reqVer != ver) { + onDone.complete(NOOP); + break; + } + KVRangeId mergeeId = request.getMergeeId(); + ClusterConfig mergeeConfig = request.getConfig(); + boolean isVoter = clusterConfig.getVotersList().contains(hostStoreId); + List mergeeReplicas = new ArrayList<>(mergeeConfig.getVotersList()); + mergeeReplicas.addAll(mergeeConfig.getLearnersList()); + IKVRangeWritable.Migrater migrater = rangeWriter.startMerging(((count, bytes) -> { + mergePendingAt = -1; + log.info("Merging data from mergee: taskId={}, received entries={}, bytes={}", + taskId, count, bytes); + })); + CompletableFuture migrateFuture = tryMigrate(mergeeId, mergeeReplicas, + clusterConfig.getVotersList(), migrater) + .thenComposeAsync(result -> { + switch (result) { + case SUCCESS_AFTER_RESET -> { + return CompletableFuture.completedFuture(() -> { + // quit and restore from leader + log.debug("Restore from leader: mergeeId={}", + KVRangeIdUtil.toString(mergeeId)); + quitSignal.complete(true); + }); + } + case SUCCESS -> { + return tryConfirmMerged(taskId, ver) + .thenComposeAsync(mergeResult -> { + if (mergeResult == TryConfirmMergedResult.ALREADY_CANCELED) { + // rollback + migrater.abort(); + return CompletableFuture.completedFuture( + TryConfirmMergedResult.ALREADY_CANCELED); + } + if (isVoter) { + return trySendMergeDoneRequest(taskId, mergeeId, + request.getMergeeVer(), mergeeConfig.getVotersList(), + clusterConfig.getVotersList()) + .thenApply(v -> TryConfirmMergedResult.MERGED); + } + return CompletableFuture.completedFuture(TryConfirmMergedResult.MERGED); + }, fsmExecutor) + .thenApplyAsync(mergeResult -> { + if (mergeResult == TryConfirmMergedResult.ALREADY_CANCELED) { + migrater.abort(); + // leave state & ver unchanged + return NOOP; + } else { + long newVer = Math.max(ver, request.getMergeeVer()); + Boundary mergedBoundary = combine(boundary, request.getBoundary()); + migrater + .ver(VerUtil.bump(newVer, true)) + .boundary(mergedBoundary) + .state(State.newBuilder() + .setType(Normal) + .setTaskId(taskId) + .build()); + return () -> { + log.info("Merger done[term={}, index={}, taskId={}, " + + "ver={}, state={}]: mergeeId={}, boundary={}", + logTerm, logIndex, taskId, print(ver), state, + KVRangeIdUtil.toString(request.getMergeeId()), + mergedBoundary); + compactWAL() + .whenCompleteAsync((v, t) -> { + if (t != null) { + log.error("WAL compact failed after merge", t); + } + resetHinterAndCoProc(mergedBoundary); + finishCommand(taskId); + }, fsmExecutor); + }; + } + }, fsmExecutor); + } + default -> { + // retry failed + return tryCancelMerging(taskId, ver) + .thenApplyAsync(cancelMergingResult -> { + if (cancelMergingResult == TryCancelMergingResult.CANCELLED) { + return NOOP; + } + // merged + return (Runnable) () -> { + log.info("Merge confirmed, restore from leader: mergeeId={}", + KVRangeIdUtil.toString(mergeeId)); + quitSignal.complete(true); + }; + }, fsmExecutor); } } - return false; }, fsmExecutor) - .thenAccept(v -> { + .thenAccept(onDone::complete); + onDone.whenCompleteAsync((v, e) -> { + if (onDone.isCancelled()) { + migrater.abort(); + migrateFuture.cancel(true); + } }); - // cancel the future - onDone.whenCompleteAsync((v, e) -> { - if (onDone.isCancelled()) { - readyToMerge.cancel(true); - } - }, fsmExecutor); - } - readyToMerge.whenCompleteAsync((v1, e1) -> { - if (e1 != null) { - onDone.completeExceptionally( - new KVRangeException.TryLater("Merge condition not met")); - return; } - // migrate data from mergee, and commit the migration after getting merge done reply - long newVer = Math.max(ver, request.getMergeeVer()); - // make sure the version is odd - Boundary mergedBoundary = combine(boundary, request.getBoundary()); - IKVRangeMetadataWriter rightRangeWriter = rangeWriter.resetVer(VerUtil.bump(newVer, true)) - .boundary(mergedBoundary) - .state(State.newBuilder() - .setType(Normal) - .setTaskId(taskId) - .build()) - .migrateFrom(request.getMergeeId(), request.getBoundary()); - rightRangeWriter.done(); - CompletableFuture onceFuture = messenger.once(m -> - m.hasMergeDoneReply() && m.getMergeDoneReply().getTaskId().equals(taskId)); - // cancel the future - onDone.whenCompleteAsync((v, e) -> { - if (onDone.isCancelled()) { - onceFuture.cancel(true); - } - }, fsmExecutor); - onceFuture.orTimeout(5, TimeUnit.SECONDS) - .whenCompleteAsync((v, e) -> { - if (e != null || !v.getMergeDoneReply().getAccept()) { - log.debug("Failed to send MergeDone request", e); - onceFuture.cancel(true); - onDone.completeExceptionally(e != null - ? e : new KVRangeException.InternalException("Failed to send MergeDone request")); - } else { - log.info( - "Merger done[term={}, index={}, taskId={}, ver={}, state={}]: mergerId={}, boundary={}", - logTerm, logIndex, taskId, print(ver), state, KVRangeIdUtil.toString(id), - mergedBoundary); - onDone.complete(() -> { - try { - // reset hinter when boundary changed - splitHinters.forEach(hinter -> hinter.reset(mergedBoundary)); - factSubject.onNext(reset(mergedBoundary)); - } catch (Throwable t) { - log.error("Failed to reset hinter and coProc", t); - } finally { - finishCommand(taskId); - } - }); + case Normal -> { + if (VerUtil.boundaryCompatible(reqVer, ver)) { + // Cancel happens before Merge command is applied + if (!clusterConfig.getVotersList().contains(hostStoreId)) { + onDone.complete(NOOP); + break; } - }, fsmExecutor); - // send merge done request to local mergee - log.debug("Send MergeDone request to Mergee[{}]", KVRangeIdUtil.toString(request.getMergeeId())); - messenger.send(KVRangeMessage.newBuilder() - .setRangeId(request.getMergeeId()) - .setHostStoreId(hostStoreId) - .setMergeDoneRequest(MergeDoneRequest.newBuilder() - .setId(id) - .setTaskId(taskId) - .setMergeeVer(request.getMergeeVer()) - .setStoreId(hostStoreId) - .build()) - .build()); - }, fsmExecutor); + CompletableFuture requestFuture = trySendCancelMergingRequest(taskId, + request.getMergeeId(), request.getMergeeVer(), + request.getConfig().getVotersList(), clusterConfig.getVotersList()) + .whenCompleteAsync((reply, e) -> { + onDone.complete(NOOP); + }, fsmExecutor); + onDone.whenCompleteAsync((v, e) -> { + if (onDone.isCancelled()) { + requestFuture.cancel(true); + } + }); + } else { + // merge has been done, ignore late Merge + log.debug("Late Merge command ignored: mergeeId={}", + KVRangeIdUtil.toString(request.getMergeeId())); + onDone.complete(NOOP); + } + } + default -> onDone.complete(NOOP); // ignore the Merge command in other states + } } case MERGEDONE -> { - MergeDone request = command.getMergeDone(); - if (reqVer != ver) { + if (state.getType() != WaitingForMerge || reqVer != ver) { onDone.complete(NOOP); break; } - log.info( - "Mergee done[term={}, index={}, taskId={}, ver={}, state={}]: mergeeId={}", + log.info("Mergee done[term={}, index={}, taskId={}, ver={}, state={}]: mergeeId={}", logTerm, logIndex, taskId, print(ver), state, KVRangeIdUtil.toString(id)); - if (request.getStoreId().equals(hostStoreId)) { - assert state.getType() == WaitingForMerge - && state.hasTaskId() - && taskId.equals(state.getTaskId()); - rangeWriter.boundary(NULL_BOUNDARY) - .bumpVer(true) - .state(State.newBuilder() - .setType(Merged) - .setTaskId(taskId) - .build()); - onDone.complete(() -> { - // reset hinter when boundary changed - splitHinters.forEach(hinter -> hinter.reset(NULL_BOUNDARY)); - factSubject.onNext(reset(NULL_BOUNDARY)); - }); - } else { - Map waitingList = Maps.newHashMap(state.getWaitingListMap()); - waitingList.put(request.getStoreId(), true); - rangeWriter.state(state.toBuilder() - .clearWaitingList() - .putAllWaitingList(waitingList) + rangeWriter.boundary(NULL_BOUNDARY) + .ver(bump(ver, true)) + .state(State.newBuilder() + .setType(Merged) + .setTaskId(taskId) .build()); - onDone.complete(NOOP); - } + onDone.complete(() -> compactWAL() + .whenCompleteAsync((v, e) -> { + if (e != null) { + log.error("WAL compact failed after merge", e); + quitSignal.complete(true); + return; + } + resetHinterAndCoProc(NULL_BOUNDARY); + }, fsmExecutor)); } case PUT, DELETE, RWCOPROC -> { if (!boundaryCompatible(reqVer, ver)) { @@ -1461,7 +1529,8 @@ logTerm, logIndex, taskId, print(ver), state, KVRangeIdUtil.toString(id), // normal commands case DELETE -> { Delete delete = command.getDelete(); - Optional value = dataReader.get(delete.getKey()); + Preconditions.checkArgument(BoundaryUtil.inRange(delete.getKey(), boundary)); + Optional value = rangeReader.get(delete.getKey()); if (value.isPresent()) { rangeWriter.kvWriter().delete(delete.getKey()); } @@ -1469,13 +1538,14 @@ logTerm, logIndex, taskId, print(ver), state, KVRangeIdUtil.toString(id), } case PUT -> { Put put = command.getPut(); - Optional value = dataReader.get(put.getKey()); + Preconditions.checkArgument(BoundaryUtil.inRange(put.getKey(), boundary)); + Optional value = rangeReader.get(put.getKey()); rangeWriter.kvWriter().put(put.getKey(), put.getValue()); onDone.complete(() -> finishCommand(taskId, value.orElse(ByteString.EMPTY))); } case RWCOPROC -> { Supplier resultSupplier = - coProc.mutate(command.getRwCoProc(), dataReader, rangeWriter.kvWriter(), isLeader); + coProc.mutate(command.getRwCoProc(), rangeReader, rangeWriter.kvWriter(), isLeader); onDone.complete(() -> { IKVRangeCoProc.MutationResult result = resultSupplier.get(); result.fact().ifPresent(factSubject::onNext); @@ -1500,6 +1570,341 @@ logTerm, logIndex, taskId, print(ver), state, KVRangeIdUtil.toString(id), return onDone; } + private String randomPickOne(List remoteVoters, List localVoters) { + if (remoteVoters.contains(hostStoreId)) { + return hostStoreId; + } + Set shared = Sets.intersection(Sets.newHashSet(remoteVoters), Sets.newHashSet(localVoters)); + if (shared.isEmpty()) { + return remoteVoters.get(ThreadLocalRandom.current().nextInt(remoteVoters.size())); + } + return shared.iterator().next(); + } + + private CompletableFuture tryMigrate(KVRangeId mergeeId, + List mergeeReplicas, + List mergerVoters, + IKVRangeWritable.Migrater migrater) { + String sessionId = UUID.randomUUID().toString(); + String mergeeVoter = randomPickOne(mergeeReplicas, mergerVoters); + IKVRangeSnapshotReceiver receiver = new KVRangeSnapshotReceiver(sessionId, mergeeId, + mergeeVoter, messenger, metricManager, fsmExecutor, Math.max(1, opts.getMergeTimeoutSec() / 4), log); + log.debug("Start migrating data from mergee: mergeeId={}, store={}, session={}", + KVRangeIdUtil.toString(mergeeId), mergeeVoter, sessionId); + CompletableFuture migrateTask = receiver.start(migrater::put) + .thenCompose(result -> { + switch (result.code()) { + case TIME_OUT, NOT_FOUND, ERROR -> { + // restore failed, abort and check if merge has done + migrater.abort(); + log.debug("Migration failed: mergeeId={}, store={}, result={}", + KVRangeIdUtil.toString(mergeeId), mergeeVoter, result); + return wal.retrieveCommitted(kvRange.currentLastAppliedIndex() + 1, Long.MAX_VALUE) + .handle((logEntryItr, e) -> { + if (e != null) { + log.error("Failed to retrieve log from wal from index[{}]", + kvRange.currentLastAppliedIndex() + 1, e); + return TryMigrateResult.FAILED; + } + boolean migrationDone = false; + while (logEntryItr.hasNext()) { + LogEntry logEntry = logEntryItr.next(); + if (logEntry.hasData()) { + try { + KVRangeCommand command = ZeroCopyParser.parse(logEntry.getData(), + KVRangeCommand.parser()); + if (command.hasMergeDone()) { + log.debug("Merge has done by {}", + command.getMergeDone().getStoreId()); + migrationDone = true; + break; + } + } catch (Throwable t) { + // should not happen + log.error("Failed to parse logEntry", t); + break; + } + } + } + logEntryItr.close(); + return migrationDone + ? TryMigrateResult.SUCCESS_AFTER_RESET : TryMigrateResult.FAILED; + }); + } + default -> { + log.debug("Migration completed: mergeeId={}, store={}", + KVRangeIdUtil.toString(mergeeId), mergeeVoter); + return CompletableFuture.completedFuture(TryMigrateResult.SUCCESS); + } + } + }); + messenger.send(KVRangeMessage.newBuilder() + .setRangeId(mergeeId) + .setHostStoreId(mergeeVoter) + .setDataMergeRequest(DataMergeRequest.newBuilder() + .setSessionId(sessionId) + .setMergerId(id) + .build()) + .build()); + return migrateTask; + } + + private CompletableFuture tryCancelMerging(String taskId, long ver) { + Supplier> proposeCancelTask = () -> wal.propose(KVRangeCommand.newBuilder() + .setTaskId(taskId) + .setVer(ver) + .setCancelMerging(CancelMerging.newBuilder().build()) + .build()); + // retry propose until success, the quorum must be hold to process + return AsyncRetry.exec(proposeCancelTask, (index, e) -> e != null, + Duration.ofSeconds(5).toNanos(), Long.MAX_VALUE) + .thenComposeAsync((index) -> wal.once(kvRange.currentLastAppliedIndex(), (logEntry) -> { + if (logEntry.hasData()) { + try { + KVRangeCommand command = ZeroCopyParser.parse(logEntry.getData(), KVRangeCommand.parser()); + if (command.hasMergeDone() || command.hasCancelMerging()) { + return true; + } + } catch (Throwable t) { + // should not happen + log.error("Failed to parse logEntry", t); + } + } + return false; + }, fsmExecutor), fsmExecutor) + .thenApply(logEntry -> { + try { + KVRangeCommand command = ZeroCopyParser.parse(logEntry.getData(), KVRangeCommand.parser()); + return command.hasCancelMerging() ? TryCancelMergingResult.CANCELLED : + TryCancelMergingResult.ALREADY_MERGED; + } catch (Throwable t) { + throw new KVRangeException("Should never happen", t); + } + }); + } + + private CompletableFuture tryConfirmMerged(String taskId, long ver) { + Supplier> proposeCancelTask = () -> wal.propose(KVRangeCommand.newBuilder() + .setTaskId(taskId) + .setVer(ver) + .setMergeDone(MergeDone.newBuilder().setStoreId(hostStoreId).build()) + .build()); + // retry propose until success, the quorum must be hold to process + return AsyncRetry.exec(proposeCancelTask, (index, e) -> e != null, + Duration.ofSeconds(5).toNanos(), Long.MAX_VALUE) + .thenComposeAsync((index) -> wal.once(kvRange.currentLastAppliedIndex(), (logEntry) -> { + if (logEntry.hasData()) { + try { + KVRangeCommand command = ZeroCopyParser.parse(logEntry.getData(), KVRangeCommand.parser()); + if (command.hasMergeDone() || command.hasCancelMerging()) { + return true; + } + } catch (Throwable t) { + // should not happen + log.error("Failed to parse logEntry", t); + } + } + return false; + }, fsmExecutor), fsmExecutor) + .thenApply(logEntry -> { + try { + KVRangeCommand command = ZeroCopyParser.parse(logEntry.getData(), KVRangeCommand.parser()); + return command.hasCancelMerging() ? TryConfirmMergedResult.ALREADY_CANCELED : + TryConfirmMergedResult.MERGED; + } catch (Throwable t) { + throw new KVRangeException("Should never happen", t); + } + }); + } + + private CompletableFuture trySendPrepareMergeToRequest(String taskId, + KVRangeId mergeeId, + long mergerVer, + Boundary mergerBoundary, + List mergeeVoters, + ClusterConfig mergerConfig) { + Supplier> sendTask = () -> { + String mergeeVoter = randomPickOne(mergeeVoters, mergerConfig.getVotersList()); + CompletableFuture replyFuture = messenger.once(m -> m.hasPrepareMergeToReply() + && m.getRangeId().equals(mergeeId) + && m.getPrepareMergeToReply().getTaskId().equals(taskId)) + .orTimeout(1, TimeUnit.SECONDS) + .thenApply(KVRangeMessage::getPrepareMergeToReply) + .exceptionally(e -> PrepareMergeToReply.newBuilder() + .setTaskId(taskId) + .setAccept(false) + .build()); + log.debug("Send PrepareMergeTo request: mergeeId={}, mergeeStore={}", KVRangeIdUtil.toString(mergeeId), + mergeeVoter); + messenger.send(KVRangeMessage.newBuilder() + .setHostStoreId(mergeeVoter) + .setRangeId(mergeeId) + .setPrepareMergeToRequest(PrepareMergeToRequest.newBuilder() + .setTaskId(taskId) + .setId(id) + .setVer(VerUtil.bump(mergerVer, false)) + .setBoundary(mergerBoundary) + .setConfig(mergerConfig) + .build()) + .build()); + return replyFuture; + }; + return AsyncRetry.exec(sendTask, (reply, e) -> !reply.getAccept(), + Duration.ofSeconds(opts.getMergeTimeoutSec() / 2).toNanos(), + Duration.ofSeconds(opts.getMergeTimeoutSec()).toNanos()) + .handle((reply, e) -> { + if (e != null) { + log.warn("Failed to send PrepareMergeTo request: mergeeId={}", KVRangeIdUtil.toString(mergeeId), e); + } else if (!reply.getAccept()) { + log.debug("Mergee rejected PrepareMergeTo request: mergeeId={} ", KVRangeIdUtil.toString(mergeeId)); + } else { + log.debug("Mergee accepted PrepareMergeTo request: mergeeId={} ", KVRangeIdUtil.toString(mergeeId)); + } + return null; + }); + } + + private CompletableFuture trySendMergeDoneRequest(String taskId, + KVRangeId mergeeId, + long mergeeVer, + List mergeeVoters, + List mergerVoters) { + Supplier> sendTask = () -> { + String mergeeVoter = randomPickOne(mergeeVoters, mergerVoters); + CompletableFuture replyFuture = messenger.once(m -> m.hasMergeDoneReply() + && m.getRangeId().equals(mergeeId) + && m.getMergeDoneReply().getTaskId().equals(taskId)) + .thenApply(KVRangeMessage::getMergeDoneReply) + .orTimeout(1, TimeUnit.SECONDS) + .exceptionally(e -> MergeDoneReply.newBuilder().setTaskId(taskId).setAccept(false).build()); + log.debug("Send MergeDone request: mergeeId={}, mergeeStore={}", + KVRangeIdUtil.toString(mergeeId), mergeeVoter); + messenger.send(KVRangeMessage.newBuilder() + .setRangeId(mergeeId) + .setHostStoreId(mergeeVoter) + .setMergeDoneRequest(MergeDoneRequest.newBuilder() + .setId(id) + .setTaskId(taskId) + .setMergeeVer(mergeeVer) + .setStoreId(hostStoreId) + .build()) + .build()); + return replyFuture; + }; + return AsyncRetry.exec(sendTask, (reply, e) -> !reply.getAccept(), + Duration.ofSeconds(opts.getMergeTimeoutSec() / 2).toNanos(), + Duration.ofSeconds(opts.getMergeTimeoutSec()).toNanos()) + .handle((reply, e) -> { + if (e != null) { + log.warn("Failed to send MergeDone request: mergeeId={}", KVRangeIdUtil.toString(mergeeId), e); + } else if (!reply.getAccept()) { + log.debug("Mergee rejected MergeDone request: mergeeId={} ", KVRangeIdUtil.toString(mergeeId)); + } else { + log.debug("Mergee accepted MergeDone request: mergeeId={} ", KVRangeIdUtil.toString(mergeeId)); + } + return null; + }); + } + + private CompletableFuture trySendCancelMergingRequest(String taskId, + KVRangeId remoteRangeId, + long remoteRangeVer, + List remoteRangeVoters, + List localVoters) { + Supplier> sendCancelRequestTask = () -> { + String mergerVoter = randomPickOne(remoteRangeVoters, localVoters); + CompletableFuture cancelReplyFuture = messenger.once(m -> m.hasCancelMergingReply() + && m.getRangeId().equals(remoteRangeId) + && m.getCancelMergingReply().getTaskId().equals(taskId)) + .orTimeout(1, TimeUnit.SECONDS) + .thenApply(KVRangeMessage::getCancelMergingReply) + .exceptionally(v -> CancelMergingReply.newBuilder().setTaskId(taskId).setAccept(false).build()); + log.debug("Send CancelMerging request: remoteRangeId={}, storeId={}", KVRangeIdUtil.toString(remoteRangeId), + mergerVoter); + messenger.send(KVRangeMessage.newBuilder() + .setRangeId(remoteRangeId) + .setHostStoreId(mergerVoter) + .setCancelMergingRequest(CancelMergingRequest.newBuilder() + .setTaskId(taskId) + .setVer(remoteRangeVer) + .setRequester(id) + .build()) + .build()); + return cancelReplyFuture; + }; + return AsyncRetry.exec(sendCancelRequestTask, (reply, e) -> !reply.getAccept(), + Duration.ofSeconds(opts.getMergeTimeoutSec() / 2).toNanos(), + Duration.ofSeconds(opts.getMergeTimeoutSec()).toNanos()) + .handle((reply, e) -> { + if (e != null) { + log.warn("Failed to send CancelMerging request: remoteRangeId={}", + KVRangeIdUtil.toString(remoteRangeId), e); + } else if (!reply.getAccept()) { + log.debug("Mergee rejected CancelMerging request: remoteRangeId={} ", + KVRangeIdUtil.toString(remoteRangeId)); + } else { + log.debug("Mergee accepted CancelMerging request: remoteRangeId={} ", + KVRangeIdUtil.toString(remoteRangeId)); + } + return null; + }); + } + + private CompletableFuture trySendMergeRequest(String taskId, + KVRangeId mergerId, + long mergerVer, + long mergeeVer, + Boundary boundary, + List mergerVoters, + ClusterConfig mergeeConfig) { + Supplier> sendMergeRequestTask = () -> { + String mergerVoter = randomPickOne(mergerVoters, mergeeConfig.getVotersList()); + CompletableFuture replyFuture = messenger.once(m -> m.hasMergeReply() + && m.getRangeId().equals(mergerId) + && m.getMergeReply().getTaskId().equals(taskId)) + .orTimeout(1, TimeUnit.SECONDS) + .thenApply(KVRangeMessage::getMergeReply) + .exceptionally(e -> MergeReply.newBuilder().setTaskId(taskId).setAccept(false).build()); + log.debug("Send Merge request: mergerId={}, storeId={}", KVRangeIdUtil.toString(mergerId), mergerVoter); + messenger.send(KVRangeMessage.newBuilder() + .setHostStoreId(mergerVoter) + .setRangeId(mergerId) + .setMergeRequest(MergeRequest.newBuilder() + .setTaskId(taskId) + .setVer(mergerVer) + .setMergeeId(id) + .setMergeeVer(VerUtil.bump(mergeeVer, false)) + .setStoreId(hostStoreId) + .setBoundary(boundary) + .setConfig(mergeeConfig) + .build()) + .build()); + return replyFuture; + }; + return AsyncRetry.exec(sendMergeRequestTask, (reply, e) -> !reply.getAccept(), + Duration.ofSeconds(opts.getMergeTimeoutSec() / 2).toNanos(), + Duration.ofSeconds(opts.getMergeTimeoutSec()).toNanos()) + .handle((reply, e) -> { + if (e != null) { + log.warn("Failed to send Merge request: mergerId={}", KVRangeIdUtil.toString(mergerId), e); + } else if (!reply.getAccept()) { + log.debug("Mergee rejected Merge request: mergerId={} ", KVRangeIdUtil.toString(mergerId)); + } else { + log.debug("Merger accepted Merge request: mergerId={} ", KVRangeIdUtil.toString(mergerId)); + } + return null; + }); + } + + private void resetHinterAndCoProc(Boundary boundary) { + try { + splitHinters.forEach(hinter -> hinter.reset(boundary)); + factSubject.onNext(reset(boundary)); + } catch (Throwable ex) { + log.error("Failed to reset hinter or coProc after boundary change", ex); + } + } + private boolean isGracefulQuit(ClusterConfig currentConfig, ChangeConfig nextConfig) { return Set.of(hostStoreId).containsAll(currentConfig.getVotersList()) && currentConfig.getLearnersCount() == 0 @@ -1542,7 +1947,6 @@ private CompletableFuture restore(KVRangeSnapshot snapshot, if (e != null) { if (e instanceof SnapshotException.ObsoleteSnapshotException) { log.debug("Obsolete snapshot, reset kvRange to latest snapshot: \n{}", snapshot); - kvRange.toReseter(wal.latestSnapshot()).done(); } } else { linearizer.afterLogApplied(snapshot.getLastAppliedIndex()); @@ -1572,20 +1976,19 @@ private void shrinkWAL() { return; } lastShrinkCheckAt.set(now); - if (kvRange.lastAppliedIndex() - wal.latestSnapshot().getLastAppliedIndex() - < opts.getCompactWALThreshold()) { + if (wal.logDataSize() < opts.getCompactWALThreshold()) { shrinkingWAL.set(false); return; } mgmtTaskRunner.add(() -> { - if (isNotOpening() || kvRange.state().getType() == ConfigChanging) { + if (isNotOpening() || kvRange.currentState().getType() == ConfigChanging) { // don't let compaction interferes with config changing process shrinkingWAL.set(false); return CompletableFuture.completedFuture(null); } KVRangeSnapshot latestSnapshot = wal.latestSnapshot(); - long lastAppliedIndex = kvRange.lastAppliedIndex(); - if (lastAppliedIndex - latestSnapshot.getLastAppliedIndex() < opts.getCompactWALThreshold()) { + long lastAppliedIndex = kvRange.currentLastAppliedIndex(); + if (wal.logDataSize() < opts.getCompactWALThreshold()) { shrinkingWAL.set(false); return CompletableFuture.completedFuture(null); } @@ -1633,7 +2036,8 @@ private void detectZombieState(KVRangeDescriptor descriptor) { } } - private void checkAndRepairFromZombieState() { + private void judgeZombieState() { + CompletableFuture checkFuture = quitZombie.getAndSet(null); if (zombieAt > 0 && Duration.ofMillis(HLC.INST.getPhysical() - zombieAt).toSeconds() > opts.getZombieTimeoutSec()) { ClusterConfig clusterConfig = wal.latestClusterConfig(); @@ -1643,13 +2047,72 @@ private void checkAndRepairFromZombieState() { if (recovering.compareAndSet(false, true)) { log.info("Recovering from lost quorum during changing config from single voter: \n{}", clusterConfig); - wal.recover().whenComplete((v, e) -> recovering.set(false)); + wal.recover().whenComplete((v, e) -> { + recovering.set(false); + checkFuture.complete(false); + }); + } else { + checkFuture.complete(false); + } + } else { + if (checkFuture != null) { + log.info("Zombie state detected, send quit signal."); + quitSignal.complete(false); + checkFuture.complete(true); } - } else if (!clusterConfig.getVotersList().contains(hostStoreId) - && !clusterConfig.getLearnersList().contains(hostStoreId)) { - log.info("Zombie state detected, send quit signal."); - quitSignal.complete(null); } + } else if (checkFuture != null) { + checkFuture.complete(false); + } + } + + private void checkMergeTimeout() { + State state = kvRange.currentState(); + switch (state.getType()) { + case PreparedMerging -> { + if (mergePendingAt < 0) { + mergePendingAt = System.nanoTime(); + } else { + boolean timeout = Duration.ofSeconds(opts.getMergeTimeoutSec()) + .compareTo(Duration.ofNanos(System.nanoTime() - mergePendingAt)) <= 0; + if (timeout) { + if (wal.isLeader() && cancelingMerge.compareAndSet(false, true)) { + log.debug("Merge timeout, auto cancel merging: mergeeId={}", KVRangeIdUtil.toString(id)); + wal.propose(KVRangeCommand.newBuilder() + .setTaskId(state.getTaskId()) + .setVer(kvRange.currentVer()) + .setCancelMerging(CancelMerging.newBuilder().build()) + .build()) + .thenRun(() -> mergePendingAt = -1) + .whenComplete((logIdx, e) -> cancelingMerge.set(false)); + } + } + } + } + case WaitingForMerge -> { + if (mergePendingAt < 0) { + mergePendingAt = System.nanoTime(); + } else { + boolean timeout = Duration.ofSeconds(opts.getMergeTimeoutSec()) + .compareTo(Duration.ofNanos(System.nanoTime() - mergePendingAt)) <= 0; + if (timeout) { + if (wal.isLeader()) { + log.debug("Merge timeout, broadcast merge help: mergeeId={}", KVRangeIdUtil.toString(id)); + messenger.send(KVRangeMessage.newBuilder() + .setMergeHelpRequest(MergeHelpRequest.newBuilder() + .setTaskId(state.getTaskId()) + .setMergeeId(id) + .setVer(kvRange.currentVer()) + .setBoundary(boundary()) + .setConfig(wal.latestClusterConfig()) + .build()) + .build()); + mergePendingAt = -1; + } + } + } + } + default -> mergePendingAt = -1; } } @@ -1658,20 +2121,6 @@ private boolean isNotOpening() { return state != Open; } - private boolean isCompatible(ClusterConfig config1, ClusterConfig config2) { - // both merger and mergee are not in config change process - Set voterSet1 = new HashSet<>(config1.getVotersList()); - Set voterSet2 = new HashSet<>(config2.getVotersList()); - Set learnerSet1 = new HashSet<>(config1.getLearnersList()); - Set learnerSet2 = new HashSet<>(config2.getLearnersList()); - return voterSet1.equals(voterSet2) - && learnerSet1.equals(learnerSet2) - && config1.getNextVotersList().isEmpty() - && config2.getNextVotersList().isEmpty() - && config1.getNextLearnersList().isEmpty() - && config2.getNextLearnersList().isEmpty(); - } - private void handleMessage(KVRangeMessage message) { switch (message.getPayloadTypeCase()) { case WALRAFTMESSAGES -> @@ -1684,6 +2133,8 @@ private void handleMessage(KVRangeMessage message) { case CANCELMERGINGREQUEST -> handleCancelMergingRequest(message.getHostStoreId(), message.getCancelMergingRequest()); case MERGEDONEREQUEST -> handleMergeDoneRequest(message.getHostStoreId(), message.getMergeDoneRequest()); + case DATAMERGEREQUEST -> handleDataMergeRequest(message.getHostStoreId(), message.getDataMergeRequest()); + case MERGEHELPREQUEST -> handleMergeHelpRequest(message.getHostStoreId(), message.getMergeHelpRequest()); default -> { // do nothing } @@ -1697,67 +2148,48 @@ private void handleWALMessages(String peerId, List messages) { private void handleSnapshotSyncRequest(String follower, SnapshotSyncRequest request) { log.info("Dumping snapshot: session={}: follower={}\n{}", request.getSessionId(), follower, request.getSnapshot()); - KVRangeDumpSession session = new KVRangeDumpSession(follower, request, kvRange, messenger, - Duration.ofSeconds(opts.getSnapshotSyncIdleTimeoutSec()), - opts.getSnapshotSyncBytesPerSec(), snapshotBandwidthGovernor, metricManager::reportDump, tags); - dumpSessions.put(session.id(), session); - session.awaitDone().whenComplete((result, e) -> { - switch (result) { - case OK -> log.info("Snapshot dumped: session={}, follower={}", session.id(), follower); - case Canceled -> log.info("Snapshot dump canceled: session={}, follower={}", session.id(), follower); - case NoCheckpoint -> { - log.info("No checkpoint found, compact WAL now"); - compactWAL(); - } - case Abort -> log.info("Snapshot dump aborted: session={}, follower={}", session.id(), follower); - case Error -> log.warn("Snapshot dump failed: session={}, follower={}", session.id(), follower); - default -> { - // do nothing - } - } - dumpSessions.remove(session.id(), session); - }); + startDumpSession(request.getSessionId(), request.getSnapshot(), request.getSnapshot().getId(), follower); } private void handlePrepareMergeToRequest(String peer, PrepareMergeToRequest request) { log.debug("Handle PrepareMergeTo request \n{}", request); // I'm the mergee - descriptorSubject.firstElement() - .timeout(5, TimeUnit.SECONDS) - .subscribe(rangeDescriptor -> - wal.propose(KVRangeCommand.newBuilder() - .setTaskId(request.getTaskId()) - .setVer(rangeDescriptor.getVer()) - .setPrepareMergeTo(PrepareMergeTo.newBuilder() - .setMergerId(request.getId()) - .setMergerVer(request.getVer()) - .setBoundary(request.getBoundary()) - .setConfig(request.getConfig()) - .build()) - .build()) - .whenCompleteAsync((v, e) -> { - if (e != null) { - log.debug("Failed to propose command[PrepareMergeTo]: \n{}", request, e); - } else { - log.debug("Command[PrepareMergeTo] proposed: index={}\n{}", v, request); - } - messenger.send(KVRangeMessage.newBuilder() - .setRangeId(request.getId()) - .setHostStoreId(peer) - .setPrepareMergeToReply(PrepareMergeToReply.newBuilder() - .setTaskId(request.getTaskId()) - .setAccept(e == null) - .build()) - .build()); - }, fsmExecutor), - e -> messenger.send(KVRangeMessage.newBuilder() - .setRangeId(request.getId()) - .setHostStoreId(peer) - .setPrepareMergeToReply(PrepareMergeToReply.newBuilder() + AtomicReference disposableRef = new AtomicReference<>(); + disposables.add(descriptorSubject.firstElement() + .observeOn(Schedulers.from(mgmtExecutor)) + .doOnSubscribe(disposableRef::set) + .doOnDispose(() -> { + Disposable disposable = disposableRef.get(); + if (disposable != null) { + disposables.delete(disposableRef.get()); + } + }) + .subscribe(latestDesc -> + wal.propose(KVRangeCommand.newBuilder() .setTaskId(request.getTaskId()) - .setAccept(false) + .setVer(latestDesc.getVer()) // use current fsm ver, may be mismatched when applied + .setPrepareMergeTo(PrepareMergeTo.newBuilder() + .setMergerId(request.getId()) + .setMergerVer(request.getVer()) + .setBoundary(request.getBoundary()) + .setConfig(request.getConfig()) + .build()) .build()) - .build())); + .whenCompleteAsync((proposalIndex, e) -> { + if (e != null) { + log.debug("Failed to propose command[PrepareMergeTo]: \n{}", request, e); + } else { + log.debug("Command[PrepareMergeTo] proposed: index={}\n{}", proposalIndex, request); + } + messenger.send(KVRangeMessage.newBuilder() + .setRangeId(request.getId()) + .setHostStoreId(peer) + .setPrepareMergeToReply(PrepareMergeToReply.newBuilder() + .setTaskId(request.getTaskId()) + .setAccept(e == null) + .build()) + .build()); + }, fsmExecutor))); } private void handleMergeRequest(String peer, MergeRequest request) { @@ -1770,6 +2202,7 @@ private void handleMergeRequest(String peer, MergeRequest request) { .setMergeeVer(request.getMergeeVer()) .setBoundary(request.getBoundary()) .setStoreId(request.getStoreId()) + .setConfig(request.getConfig()) .build()) .build()) .whenCompleteAsync((v, e) -> { @@ -1840,6 +2273,97 @@ private void handleMergeDoneRequest(String peer, MergeDoneRequest request) { }, fsmExecutor); } + private void handleDataMergeRequest(String peer, DataMergeRequest request) { + log.debug("Handle DataMerge request: \n{}", request); + AtomicReference disposableRef = new AtomicReference<>(); + disposables.add(descriptorSubject + .filter(desc -> desc.getState() == WaitingForMerge) + .firstElement() + .observeOn(Schedulers.from(mgmtExecutor)) + .doOnSubscribe(disposableRef::set) + .doOnDispose(() -> { + Disposable disposable = disposableRef.get(); + if (disposable != null) { + disposables.delete(disposableRef.get()); + } + }) + .subscribe(desc -> { + KVRangeSnapshot checkpoint = kvRange.checkpoint(); + startDumpSession(request.getSessionId(), checkpoint, request.getMergerId(), peer); + })); + } + + private void handleMergeHelpRequest(String peer, MergeHelpRequest request) { + log.debug("Handle MergeHelp request: \n{}", request); + KVRangeId mergeeId = request.getMergeeId(); + Boundary mergeeBoundary = request.getBoundary(); + Boundary myBoundary = boundary(); + ByteString myEndKey = endKey(myBoundary); + ByteString mergeeStartKey = startKey(mergeeBoundary); + // handle help request only when I'm in Normal state and the request is in same epoch + if (kvRange.currentState().getType() == Normal && mergeeId.getEpoch() == id.getEpoch()) { + if (Objects.equals(myEndKey, mergeeStartKey)) { + log.debug("help mergee cancel: mergeeId={}", KVRangeIdUtil.toString(request.getMergeeId())); + messenger.send(KVRangeMessage.newBuilder() + .setRangeId(mergeeId) + .setHostStoreId(peer) + .setCancelMergingRequest(CancelMergingRequest.newBuilder() + .setTaskId(request.getTaskId()) + .setVer(request.getVer()) + .setRequester(id) + .build()) + .build()); + } else if (BoundaryUtil.inRange(mergeeBoundary, myBoundary)) { + log.debug("help mergee finish: mergeeId={}", KVRangeIdUtil.toString(request.getMergeeId())); + messenger.send(KVRangeMessage.newBuilder() + .setRangeId(mergeeId) + .setHostStoreId(peer) + .setMergeDoneRequest(MergeDoneRequest.newBuilder() + .setId(id) + .setTaskId(request.getTaskId()) + .setMergeeVer(request.getVer()) + .setStoreId(hostStoreId) + .build()) + .build()); + } + } + } + + private void startDumpSession(String sessionId, + KVRangeSnapshot snapshot, + KVRangeId targetRangeId, + String targetStoreId) { + KVRangeDumpSession session = new KVRangeDumpSession(sessionId, snapshot, targetRangeId, targetStoreId, + kvRange, messenger, Duration.ofSeconds(opts.getSnapshotSyncIdleTimeoutSec()), + opts.getSnapshotSyncBytesPerSec(), + snapshotBandwidthGovernor, + bytes -> { + // reset merge timeout timer if there is a live data dump activity + if (kvRange.currentState().getType() == WaitingForMerge) { + mergePendingAt = -1; + } + metricManager.reportDump(bytes); + }, tags); + dumpSessions.put(session.id(), session); + session.awaitDone().whenComplete((result, e) -> { + switch (result) { + case OK -> log.info("Snapshot dumped: session={}, follower={}", session.id(), targetStoreId); + case Canceled -> + log.info("Snapshot dump canceled: session={}, follower={}", session.id(), targetStoreId); + case NoCheckpoint -> { + log.info("No checkpoint found, compact WAL now"); + compactWAL(); + } + case Abort -> log.info("Snapshot dump aborted: session={}, follower={}", session.id(), targetStoreId); + case Error -> log.warn("Snapshot dump failed: session={}, follower={}", session.id(), targetStoreId); + default -> { + // do nothing + } + } + dumpSessions.remove(session.id(), session); + }); + } + private Any reset(Boundary boundary) { long startAt = System.nanoTime(); long stamp = resetLock.writeLock(); @@ -1853,6 +2377,18 @@ private Any reset(Boundary boundary) { } } + private enum TryMigrateResult { + SUCCESS_AFTER_RESET, SUCCESS, FAILED + } + + private enum TryCancelMergingResult { + CANCELLED, ALREADY_MERGED + } + + private enum TryConfirmMergedResult { + ALREADY_CANCELED, MERGED + } + enum Lifecycle { Init, // initialized but not open Opening, @@ -1867,6 +2403,6 @@ enum Lifecycle { * Callback for listening the quit signal which generated as the result of config change operation. */ public interface QuitListener { - void onQuit(IKVRangeFSM rangeToQuit); + void onQuit(IKVRangeFSM rangeToQuit, boolean reset); } } diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeFactory.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeFactory.java new file mode 100644 index 000000000..8b4a73bda --- /dev/null +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeFactory.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.store.range; + +import org.apache.bifromq.basekv.localengine.ICPableKVSpace; +import org.apache.bifromq.basekv.proto.KVRangeId; +import org.apache.bifromq.basekv.proto.KVRangeSnapshot; + +public class KVRangeFactory { + public static IKVRange create(KVRangeId id, ICPableKVSpace kvSpace, String... tags) { + return new KVRange(id, kvSpace, tags); + } + + public static IKVRange create(KVRangeId id, ICPableKVSpace kvSpace, KVRangeSnapshot snapshot, String... tags) { + return new KVRange(id, kvSpace, snapshot, tags); + } +} diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeKeys.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeKeys.java index f19487b3b..ee592e5d8 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeKeys.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeKeys.java @@ -23,10 +23,10 @@ import com.google.protobuf.ByteString; -public class KVRangeKeys { - public static final ByteString METADATA_VER_BYTES = unsafeWrap(new byte[] {0x00}); - public static final ByteString METADATA_RANGE_BOUND_BYTES = unsafeWrap(new byte[] {0x01}); - public static final ByteString METADATA_LAST_APPLIED_INDEX_BYTES = unsafeWrap(new byte[] {0x02}); - public static final ByteString METADATA_STATE_BYTES = unsafeWrap(new byte[] {0x03}); - public static final ByteString METADATA_CLUSTER_CONFIG_BYTES = unsafeWrap(new byte[] {0x04}); +class KVRangeKeys { + static final ByteString METADATA_VER_BYTES = unsafeWrap(new byte[] {0x00}); + static final ByteString METADATA_RANGE_BOUND_BYTES = unsafeWrap(new byte[] {0x01}); + static final ByteString METADATA_LAST_APPLIED_INDEX_BYTES = unsafeWrap(new byte[] {0x02}); + static final ByteString METADATA_STATE_BYTES = unsafeWrap(new byte[] {0x03}); + static final ByteString METADATA_CLUSTER_CONFIG_BYTES = unsafeWrap(new byte[] {0x04}); } diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeMetricManager.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeMetricManager.java index e44f878c1..64c38210d 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeMetricManager.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeMetricManager.java @@ -14,17 +14,11 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.store.range; -import org.apache.bifromq.basekv.proto.KVRangeDescriptor; -import org.apache.bifromq.basekv.proto.KVRangeId; -import org.apache.bifromq.basekv.proto.State; -import org.apache.bifromq.basekv.store.proto.ROCoProcOutput; -import org.apache.bifromq.basekv.store.proto.RWCoProcOutput; -import org.apache.bifromq.basekv.utils.KVRangeIdUtil; import com.google.protobuf.ByteString; import io.micrometer.core.instrument.DistributionSummary; import io.micrometer.core.instrument.Gauge; @@ -36,6 +30,12 @@ import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Supplier; +import org.apache.bifromq.basekv.proto.KVRangeDescriptor; +import org.apache.bifromq.basekv.proto.KVRangeId; +import org.apache.bifromq.basekv.proto.State; +import org.apache.bifromq.basekv.store.proto.ROCoProcOutput; +import org.apache.bifromq.basekv.store.proto.RWCoProcOutput; +import org.apache.bifromq.basekv.utils.KVRangeIdUtil; class KVRangeMetricManager implements IKVRangeMetricManager { private final DistributionSummary dumpBytesSummary; @@ -55,6 +55,7 @@ class KVRangeMetricManager implements IKVRangeMetricManager { private final Timer existTimer; private final Timer getTimer; private final Timer queryCoProcTimer; + private final Timer linearizerTimer; private final Timer compactionTimer; private final Timer applyLogTimer; private final Timer installSnapshotTimer; @@ -67,43 +68,47 @@ class KVRangeMetricManager implements IKVRangeMetricManager { .and("rangeId", KVRangeIdUtil.toString(rangeId)); dumpBytesSummary = Metrics.summary("basekv.snap.dump", tags); restoreBytesSummary = Metrics.summary("basekv.snap.restore", tags); - stateGauge = Gauge.builder("basekv.meta.state", () -> { - KVRangeDescriptor desc = currentDesc.get(); - if (desc != null) { - return desc.getState().ordinal(); - } - return State.StateType.NoUse.ordinal(); - }) + stateGauge = Gauge.builder("basekv.meta.state", + () -> { + KVRangeDescriptor desc = currentDesc.get(); + if (desc != null) { + return desc.getState().ordinal(); + } + return State.StateType.NoUse.ordinal(); + }) .tags(tags) .register(Metrics.globalRegistry); - verGauge = Gauge.builder("basekv.meta.ver", () -> { - KVRangeDescriptor desc = currentDesc.get(); - if (desc != null) { - return desc.getVer(); - } - return -1; - }) + verGauge = Gauge.builder("basekv.meta.ver", + () -> { + KVRangeDescriptor desc = currentDesc.get(); + if (desc != null) { + return desc.getVer(); + } + return -1; + }) .tags(tags) .register(Metrics.globalRegistry); lastAppliedIndexGauge = Gauge.builder("basekv.meta.appidx", currentLastAppliedIndex::get) .tags(tags) .register(Metrics.globalRegistry); - dataSizeGauge = Gauge.builder("basekv.meta.size", () -> { - KVRangeDescriptor desc = currentDesc.get(); - if (desc != null) { - return desc.getStatisticsMap().getOrDefault("dataSize", 0.0).longValue(); - } - return 0; - }) + dataSizeGauge = Gauge.builder("basekv.meta.size", + () -> { + KVRangeDescriptor desc = currentDesc.get(); + if (desc != null) { + return desc.getStatisticsMap().getOrDefault("dataSize", 0.0).longValue(); + } + return 0; + }) .tags(tags) .register(Metrics.globalRegistry); - walSizeGauge = Gauge.builder("basekv.meta.walsize", () -> { - KVRangeDescriptor desc = currentDesc.get(); - if (desc != null) { - return desc.getStatisticsMap().getOrDefault("walSize", 0.0).longValue(); - } - return 0; - }) + walSizeGauge = Gauge.builder("basekv.meta.walsize", + () -> { + KVRangeDescriptor desc = currentDesc.get(); + if (desc != null) { + return desc.getStatisticsMap().getOrDefault("walSize", 0.0).longValue(); + } + return 0; + }) .tags(tags) .register(Metrics.globalRegistry); configChangeTimer = Timer.builder("basekv.cmd.configchange") @@ -136,6 +141,9 @@ class KVRangeMetricManager implements IKVRangeMetricManager { queryCoProcTimer = Timer.builder("basekv.cmd.querycoproc") .tags(tags) .register(Metrics.globalRegistry); + linearizerTimer = Timer.builder("basekv.cmd.linear") + .tags(tags) + .register(Metrics.globalRegistry); compactionTimer = Timer.builder("basekv.cmd.compact") .tags(tags) .register(Metrics.globalRegistry); @@ -226,6 +234,11 @@ public CompletableFuture recordQueryCoProc(Supplier recordLinearization(Supplier> supplier) { + return recordDuration(supplier, linearizerTimer); + } + @Override public CompletableFuture recordCompact(Supplier> supplier) { return recordDuration(supplier, compactionTimer); @@ -259,6 +272,7 @@ void close() { Metrics.globalRegistry.removeByPreFilterId(existTimer.getId()); Metrics.globalRegistry.removeByPreFilterId(getTimer.getId()); Metrics.globalRegistry.removeByPreFilterId(queryCoProcTimer.getId()); + Metrics.globalRegistry.removeByPreFilterId(linearizerTimer.getId()); Metrics.globalRegistry.removeByPreFilterId(compactionTimer.getId()); Metrics.globalRegistry.removeByPreFilterId(applyLogTimer.getId()); Metrics.globalRegistry.removeByPreFilterId(installSnapshotTimer.getId()); diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeQueryLinearizer.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeQueryLinearizer.java index 848e25143..58219c0e3 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeQueryLinearizer.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeQueryLinearizer.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.store.range; @@ -26,6 +26,7 @@ import java.util.concurrent.ConcurrentMap; import java.util.concurrent.Executor; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.function.Function; import java.util.function.Supplier; import lombok.AllArgsConstructor; import org.apache.bifromq.logger.MDCLogger; @@ -36,13 +37,18 @@ class KVRangeQueryLinearizer implements IKVRangeQueryLinearizer { private final ConcurrentMap, CompletableFuture> readIndexes = Maps.newConcurrentMap(); private final ConcurrentLinkedDeque toBeLinearized = new ConcurrentLinkedDeque<>(); private final Supplier> readIndexProvider; + private final Function>, CompletableFuture> recordDuration; private final Executor executor; private final AtomicBoolean linearizing = new AtomicBoolean(); private volatile long lastAppliedIndex = 0; - KVRangeQueryLinearizer(Supplier> readIndexProvider, Executor executor, - long lastAppliedIndex, String... tags) { + KVRangeQueryLinearizer(Supplier> readIndexProvider, + Executor executor, + long lastAppliedIndex, + Function>, CompletableFuture> recordDuration, + String... tags) { this.readIndexProvider = readIndexProvider; + this.recordDuration = recordDuration; this.executor = executor; this.lastAppliedIndex = lastAppliedIndex; this.log = MDCLogger.getLogger(KVRangeQueryLinearizer.class, tags); @@ -50,26 +56,28 @@ class KVRangeQueryLinearizer implements IKVRangeQueryLinearizer { @Override public CompletionStage linearize() { - CompletableFuture onDone = new CompletableFuture<>(); - CompletableFuture readIndex = readIndexProvider.get(); - readIndexes.put(readIndex, onDone); - readIndex.whenCompleteAsync((ri, e) -> { - if (e != null) { - log.debug("failed to get readIndex", e); - readIndexes.remove(readIndex).completeExceptionally(e); - } else { - if (ri <= lastAppliedIndex) { - readIndexes.remove(readIndex).complete(null); + return recordDuration.apply(() -> { + CompletableFuture onDone = new CompletableFuture<>(); + CompletableFuture readIndex = readIndexProvider.get(); + readIndexes.put(readIndex, onDone); + readIndex.whenCompleteAsync((ri, e) -> { + if (e != null) { + log.debug("failed to get readIndex", e); + readIndexes.remove(readIndex).completeExceptionally(e); } else { - readIndexes.remove(readIndex, onDone); - if (!onDone.isDone()) { - toBeLinearized.add(new ToLinearize(ri, onDone)); - schedule(); + if (ri <= lastAppliedIndex) { + readIndexes.remove(readIndex).complete(null); + } else { + readIndexes.remove(readIndex, onDone); + if (!onDone.isDone()) { + toBeLinearized.add(new ToLinearize(ri, onDone)); + schedule(); + } } } - } - }, executor); - return onDone; + }, executor); + return onDone; + }); } @Override diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeQueryRunner.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeQueryRunner.java index 3c5005934..80bb78fc7 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeQueryRunner.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeQueryRunner.java @@ -25,6 +25,7 @@ import static org.apache.bifromq.basekv.proto.State.StateType.ToBePurged; import static org.apache.bifromq.basekv.store.util.VerUtil.boundaryCompatible; +import com.google.common.base.Preconditions; import com.google.common.collect.Sets; import com.google.protobuf.ByteString; import java.util.List; @@ -37,13 +38,14 @@ import java.util.function.Supplier; import org.apache.bifromq.basekv.proto.KVRangeDescriptor; import org.apache.bifromq.basekv.proto.State; -import org.apache.bifromq.basekv.store.api.IKVLoadRecord; import org.apache.bifromq.basekv.store.api.IKVRangeCoProc; -import org.apache.bifromq.basekv.store.api.IKVRangeSplitHinter; -import org.apache.bifromq.basekv.store.api.IKVReader; +import org.apache.bifromq.basekv.store.api.IKVRangeRefreshableReader; import org.apache.bifromq.basekv.store.exception.KVRangeException; import org.apache.bifromq.basekv.store.proto.ROCoProcInput; import org.apache.bifromq.basekv.store.proto.ROCoProcOutput; +import org.apache.bifromq.basekv.store.range.hinter.IKVLoadRecord; +import org.apache.bifromq.basekv.store.range.hinter.IKVRangeSplitHinter; +import org.apache.bifromq.basekv.utils.BoundaryUtil; import org.apache.bifromq.logger.MDCLogger; import org.slf4j.Logger; @@ -80,19 +82,25 @@ class KVRangeQueryRunner implements IKVRangeQueryRunner { // Execute a ROCommand @Override public CompletableFuture exist(long ver, ByteString key, boolean linearized) { - return submit(ver, rangeReader -> completedFuture(rangeReader.exist(key)), linearized); + return submit(ver, rangeReader -> { + Preconditions.checkArgument(BoundaryUtil.inRange(key, rangeReader.boundary())); + return completedFuture(rangeReader.exist(key)); + }, linearized); } @Override public CompletableFuture> get(long ver, ByteString key, boolean linearized) { - return submit(ver, rangeReader -> completedFuture(rangeReader.get(key)), linearized); + return submit(ver, rangeReader -> { + Preconditions.checkArgument(BoundaryUtil.inRange(key, rangeReader.boundary())); + return completedFuture(rangeReader.get(key)); + }, linearized); } @Override public CompletableFuture queryCoProc(long ver, ROCoProcInput query, boolean linearized) { return submit(ver, rangeReader -> { IKVLoadRecorder loadRecorder = new KVLoadRecorder(); - IKVReader loadRecordableReader = new LoadRecordableKVReader(rangeReader, loadRecorder); + IKVRangeRefreshableReader loadRecordableReader = new LoadRecordableKVReader(rangeReader, loadRecorder); return coProc.query(query, loadRecordableReader) .whenComplete((v, e) -> { try { @@ -116,7 +124,7 @@ public void close() { private CompletableFuture submit(long ver, QueryFunction queryFn, boolean linearized) { - if (!boundaryCompatible(ver, kvRange.version())) { + if (!boundaryCompatible(ver, kvRange.currentVer())) { return CompletableFuture.failedFuture( new KVRangeException.BadVersion("Version Mismatch", descriptorSupplier.get())); } @@ -164,11 +172,11 @@ private CompletableFuture doQuery(long ver, QueryFuncti return CompletableFuture.failedFuture( new KVRangeException.TryLater("Range is resetting or busy", descriptorSupplier.get())); } - IKVReader dataReader = kvRange.borrowDataReader(); + IKVRangeRefreshableReader refreshableReader = kvRange.newReader(); // return the borrowed reader when future completed - onDone.whenComplete((v, e) -> kvRange.returnDataReader(dataReader)); - State state = kvRange.state(); - if (!boundaryCompatible(ver, kvRange.version())) { + onDone.whenComplete((v, e) -> refreshableReader.close()); + State state = kvRange.currentState(); + if (!boundaryCompatible(ver, kvRange.currentVer())) { queryLock.unlockRead(stamp); onDone.completeExceptionally( new KVRangeException.BadVersion("Version Mismatch", descriptorSupplier.get())); @@ -180,7 +188,8 @@ private CompletableFuture doQuery(long ver, QueryFuncti new KVRangeException.TryLater("Range has been in state: " + state.getType().name().toLowerCase())); return onDone; } - return queryFn.apply(dataReader) + refreshableReader.refresh(); + return queryFn.apply(refreshableReader) .whenCompleteAsync((v, e) -> { queryLock.unlockRead(stamp); if (e != null) { @@ -199,6 +208,6 @@ private CompletableFuture doQuery(long ver, QueryFuncti } private interface QueryFunction { - CompletableFuture apply(IKVReader dataReader); + CompletableFuture apply(IKVRangeRefreshableReader rangeReader); } } diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeReader.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeReader.java new file mode 100644 index 000000000..772c3ad62 --- /dev/null +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeReader.java @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.store.range; + +import static org.apache.bifromq.basekv.store.range.KVRangeKeys.METADATA_LAST_APPLIED_INDEX_BYTES; +import static org.apache.bifromq.basekv.store.range.KVRangeKeys.METADATA_STATE_BYTES; +import static org.apache.bifromq.basekv.store.range.KVRangeKeys.METADATA_VER_BYTES; + +import com.google.protobuf.ByteString; +import java.util.Optional; +import org.apache.bifromq.basekv.localengine.IKVSpaceReader; +import org.apache.bifromq.basekv.proto.Boundary; +import org.apache.bifromq.basekv.proto.State; +import org.apache.bifromq.basekv.raft.proto.ClusterConfig; +import org.apache.bifromq.basekv.store.api.IKVIterator; +import org.apache.bifromq.basekv.store.api.IKVRangeReader; +import org.apache.bifromq.basekv.store.util.KVUtil; + +class KVRangeReader implements IKVRangeReader { + protected final IKVSpaceReader kvSpaceReader; + + public KVRangeReader(IKVSpaceReader spaceReader) { + this.kvSpaceReader = spaceReader; + } + + @Override + public final long version() { + return kvSpaceReader.metadata(METADATA_VER_BYTES).map(KVUtil::toLongNativeOrder).orElse(-1L); + } + + @Override + public final State state() { + return kvSpaceReader.metadata(METADATA_STATE_BYTES) + .map(stateBytes -> { + try { + return State.parseFrom(stateBytes); + } catch (Throwable e) { + return State.newBuilder().setType(State.StateType.NoUse).build(); + } + }) + .orElse(State.newBuilder().setType(State.StateType.NoUse).build()); + } + + @Override + public final long lastAppliedIndex() { + return kvSpaceReader.metadata(METADATA_LAST_APPLIED_INDEX_BYTES).map(KVUtil::toLong).orElse(-1L); + } + + @Override + public final Boundary boundary() { + return kvSpaceReader.metadata(KVRangeKeys.METADATA_RANGE_BOUND_BYTES) + .map(boundaryBytes -> { + try { + return Boundary.parseFrom(boundaryBytes); + } catch (Throwable e) { + return Boundary.getDefaultInstance(); + } + }) + .orElse(Boundary.getDefaultInstance()); + } + + @Override + public final ClusterConfig clusterConfig() { + return kvSpaceReader.metadata(KVRangeKeys.METADATA_CLUSTER_CONFIG_BYTES) + .map(clusterConfigBytes -> { + try { + return ClusterConfig.parseFrom(clusterConfigBytes); + } catch (Throwable e) { + return ClusterConfig.getDefaultInstance(); + } + }) + .orElse(ClusterConfig.getDefaultInstance()); + } + + @Override + public final long size(Boundary boundary) { + return kvSpaceReader.size(boundary); + } + + @Override + public final boolean exist(ByteString key) { + return kvSpaceReader.exist(key); + } + + @Override + public final Optional get(ByteString key) { + return kvSpaceReader.get(key); + } + + @Override + public final IKVIterator iterator() { + return new KVIterator(kvSpaceReader.newIterator()); + } + + @Override + public final IKVIterator iterator(Boundary boundary) { + return new KVIterator(kvSpaceReader.newIterator(boundary)); + } + + @Override + public final void close() { + kvSpaceReader.close(); + } +} diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVCheckpointDataIterator.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeRefreshableReader.java similarity index 62% rename from base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVCheckpointDataIterator.java rename to base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeRefreshableReader.java index c9ec0b915..5a6c5c517 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVCheckpointDataIterator.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeRefreshableReader.java @@ -14,23 +14,24 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.store.range; -import org.apache.bifromq.basekv.localengine.IKVSpaceIterator; +import org.apache.bifromq.basekv.localengine.IKVSpaceRefreshableReader; +import org.apache.bifromq.basekv.store.api.IKVRangeRefreshableReader; -class KVCheckpointDataIterator extends KVIterator implements IKVCheckpointIterator { - private final IKVSpaceIterator kvSpaceIterator; +class KVRangeRefreshableReader extends KVRangeReader implements IKVRangeRefreshableReader { + protected final IKVSpaceRefreshableReader kvSpaceReader; - KVCheckpointDataIterator(IKVSpaceIterator kvSpaceIterator) { - super(kvSpaceIterator); - this.kvSpaceIterator = kvSpaceIterator; + public KVRangeRefreshableReader(IKVSpaceRefreshableReader spaceReader) { + super(spaceReader); + this.kvSpaceReader = spaceReader; } @Override - public void close() { - kvSpaceIterator.close(); + public void refresh() { + kvSpaceReader.refresh(); } } diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeMetadataWriter.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeRestoreSession.java similarity index 51% rename from base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeMetadataWriter.java rename to base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeRestoreSession.java index 13c08dbfb..2d424d334 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeMetadataWriter.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeRestoreSession.java @@ -20,67 +20,73 @@ package org.apache.bifromq.basekv.store.range; import static org.apache.bifromq.basekv.store.range.KVRangeKeys.METADATA_CLUSTER_CONFIG_BYTES; +import static org.apache.bifromq.basekv.store.range.KVRangeKeys.METADATA_LAST_APPLIED_INDEX_BYTES; import static org.apache.bifromq.basekv.store.range.KVRangeKeys.METADATA_RANGE_BOUND_BYTES; import static org.apache.bifromq.basekv.store.range.KVRangeKeys.METADATA_STATE_BYTES; import static org.apache.bifromq.basekv.store.range.KVRangeKeys.METADATA_VER_BYTES; -import org.apache.bifromq.basekv.localengine.IKVSpaceMetadataUpdatable; -import org.apache.bifromq.basekv.localengine.IKVSpaceMetadataWriter; +import com.google.protobuf.ByteString; +import org.apache.bifromq.basekv.localengine.IRestoreSession; import org.apache.bifromq.basekv.proto.Boundary; -import org.apache.bifromq.basekv.proto.KVRangeId; import org.apache.bifromq.basekv.proto.State; import org.apache.bifromq.basekv.raft.proto.ClusterConfig; -import com.google.protobuf.ByteString; -import java.util.Optional; - -public class KVRangeMetadataWriter extends AbstractKVRangeMetadataUpdatable - implements IKVRangeMetadataWriter { - private final IKVSpaceMetadataWriter keyRangeMetadataWriter; +import org.apache.bifromq.basekv.store.util.KVUtil; - KVRangeMetadataWriter(KVRangeId id, IKVSpaceMetadataWriter keyRangeMetadataWriter) { - super(id, keyRangeMetadataWriter); - this.keyRangeMetadataWriter = keyRangeMetadataWriter; - } +class KVRangeRestoreSession implements IKVRangeRestoreSession { + private final IRestoreSession restoreSession; - @Override - protected IKVSpaceMetadataUpdatable keyRangeWriter() { - return keyRangeMetadataWriter; + KVRangeRestoreSession(IRestoreSession restoreSession) { + this.restoreSession = restoreSession; } @Override public void done() { - keyRangeMetadataWriter.done(); + restoreSession.done(); } @Override public void abort() { - keyRangeMetadataWriter.abort(); + restoreSession.abort(); } @Override public int count() { - return keyRangeMetadataWriter.count(); + return restoreSession.count(); + } + + @Override + public IKVRangeRestoreSession ver(long ver) { + restoreSession.metadata(METADATA_VER_BYTES, KVUtil.toByteStringNativeOrder(ver)); + return this; + } + + @Override + public IKVRangeRestoreSession lastAppliedIndex(long lastAppliedIndex) { + restoreSession.metadata(METADATA_LAST_APPLIED_INDEX_BYTES, KVUtil.toByteString(lastAppliedIndex)); + return this; } @Override - public long version() { - Optional verBytes = keyRangeMetadataWriter.metadata(METADATA_VER_BYTES); - return version(verBytes.orElse(null)); + public IKVRangeRestoreSession boundary(Boundary boundary) { + restoreSession.metadata(METADATA_RANGE_BOUND_BYTES, boundary.toByteString()); + return this; } @Override - public State state() { - Optional stateData = keyRangeMetadataWriter.metadata(METADATA_STATE_BYTES); - return state(stateData.orElse(null)); + public IKVRangeRestoreSession state(State state) { + restoreSession.metadata(METADATA_STATE_BYTES, state.toByteString()); + return this; } @Override - public Boundary boundary() { - return boundary(keyRangeMetadataWriter.metadata(METADATA_RANGE_BOUND_BYTES).orElse(null)); + public IKVRangeRestoreSession clusterConfig(ClusterConfig clusterConfig) { + restoreSession.metadata(METADATA_CLUSTER_CONFIG_BYTES, clusterConfig.toByteString()); + return this; } @Override - public ClusterConfig clusterConfig() { - return clusterConfig(keyRangeMetadataWriter.metadata(METADATA_CLUSTER_CONFIG_BYTES).orElse(null)); + public IKVRangeRestoreSession put(ByteString key, ByteString value) { + restoreSession.put(key, value); + return this; } } diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeRestorer.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeRestorer.java index aa663dcfa..113536611 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeRestorer.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeRestorer.java @@ -19,21 +19,14 @@ package org.apache.bifromq.basekv.store.range; -import io.reactivex.rxjava3.annotations.NonNull; -import io.reactivex.rxjava3.observers.DisposableObserver; -import io.reactivex.rxjava3.schedulers.Schedulers; import java.util.Objects; import java.util.UUID; import java.util.concurrent.CompletableFuture; import java.util.concurrent.Executor; import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; -import org.apache.bifromq.basekv.proto.KVPair; import org.apache.bifromq.basekv.proto.KVRangeMessage; import org.apache.bifromq.basekv.proto.KVRangeSnapshot; -import org.apache.bifromq.basekv.proto.SaveSnapshotDataReply; -import org.apache.bifromq.basekv.proto.SaveSnapshotDataRequest; import org.apache.bifromq.basekv.proto.SnapshotSyncRequest; import org.apache.bifromq.basekv.store.exception.KVRangeStoreException; import org.apache.bifromq.logger.MDCLogger; @@ -46,7 +39,6 @@ class KVRangeRestorer { private final IKVRangeMetricManager metricManager; private final Executor executor; private final int idleTimeSec; - private final AdaptiveWriteBudget adaptiveWriteBudget; private final AtomicReference currentSession = new AtomicReference<>(); KVRangeRestorer(KVRangeSnapshot startSnapshot, @@ -61,7 +53,6 @@ class KVRangeRestorer { this.metricManager = metricManager; this.executor = executor; this.idleTimeSec = idleTimeSec; - this.adaptiveWriteBudget = new AdaptiveWriteBudget(); this.log = MDCLogger.getLogger(KVRangeRestorer.class, tags); RestoreSession initialSession = new RestoreSession(startSnapshot, null); initialSession.doneFuture.complete(null); @@ -92,110 +83,38 @@ public CompletableFuture restoreFrom(String leader, KVRangeSnapshot rangeS } CompletableFuture onDone = session.doneFuture; long startNanos = System.nanoTime(); - AtomicLong totalEntries = new AtomicLong(); - AtomicLong totalBytes = new AtomicLong(); try { - IKVReseter restorer = range.toReseter(rangeSnapshot); - log.info("Restoring from snapshot: session={}, leader={} \n{}", session.id, session.leader, rangeSnapshot); - DisposableObserver observer = messenger.receive() - .filter(m -> m.hasSaveSnapshotDataRequest() - && m.getSaveSnapshotDataRequest().getSessionId().equals(session.id)) - .timeout(idleTimeSec, TimeUnit.SECONDS) - .observeOn(Schedulers.from(executor)) - .subscribeWith(new DisposableObserver() { - @Override - public void onNext(@NonNull KVRangeMessage m) { - SaveSnapshotDataRequest request = m.getSaveSnapshotDataRequest(); - try { - switch (request.getFlag()) { - case More, End -> { - int bytes = 0; - int entries = 0; - for (KVPair kv : request.getKvList()) { - if (session.entries == 0 && session.bytes == 0) { - session.batchStartNanos = System.nanoTime(); - } - bytes += kv.getKey().size(); - bytes += kv.getValue().size(); - entries++; - restorer.put(kv.getKey(), kv.getValue()); - session.entries++; - session.bytes += kv.getKey().size() + kv.getValue().size(); - if (shouldRotate(session)) { - flushSegment(restorer, session); - } - } - if (request.getFlag() == SaveSnapshotDataRequest.Flag.More) { - if (shouldRotate(session)) { - flushSegment(restorer, session); - } - } - metricManager.reportRestore(bytes); - totalEntries.addAndGet(entries); - totalBytes.addAndGet(bytes); - if (request.getFlag() == SaveSnapshotDataRequest.Flag.End) { - flushSegment(restorer, session); - if (!onDone.isCancelled()) { - restorer.done(); - dispose(); - onDone.complete(null); - log.info( - "Restored from snapshot: session={}, leader={}, entries={}, bytes={}, cost={}ms", - session.id, session.leader, totalEntries.get(), totalBytes.get(), - TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos)); - } else { - restorer.abort(); - dispose(); - log.info("Snapshot restore canceled: session={}, leader={}", - session.id, session.leader); - } - } - messenger.send(KVRangeMessage.newBuilder() - .setRangeId(range.id()) - .setHostStoreId(m.getHostStoreId()) - .setSaveSnapshotDataReply(SaveSnapshotDataReply.newBuilder() - .setReqId(request.getReqId()) - .setSessionId(request.getSessionId()) - .setResult(SaveSnapshotDataReply.Result.OK) - .build()) - .build()); - } - default -> throw new KVRangeStoreException("Snapshot dump failed"); - } - } catch (Throwable t) { - log.error("Snapshot restored failed: session={}", session.id, t); - onError(t); - messenger.send(KVRangeMessage.newBuilder() - .setRangeId(range.id()) - .setHostStoreId(m.getHostStoreId()) - .setSaveSnapshotDataReply(SaveSnapshotDataReply.newBuilder() - .setReqId(request.getReqId()) - .setSessionId(request.getSessionId()) - .setResult(SaveSnapshotDataReply.Result.Error) - .build()) - .build()); - } - } - - @Override - public void onError(@NonNull Throwable e) { - restorer.abort(); - onDone.completeExceptionally(e); - dispose(); - } - - @Override - public void onComplete() { - - } - }); + IKVRangeRestoreSession restoreSession = range.startRestore(rangeSnapshot, (count, bytes) -> + log.info("Received snapshot data: session={}, leader={}, entries={}, bytes={}", + session.id, leader, count, bytes)); + log.info("Restoring from snapshot: session={}, leader={} \n{}", session.id, leader, rangeSnapshot); + IKVRangeSnapshotReceiver receiver = new KVRangeSnapshotReceiver(session.id, rangeSnapshot.getId(), leader, + messenger, metricManager, executor, idleTimeSec, log); + CompletableFuture receiveFuture = receiver.start(restoreSession::put); onDone.whenComplete((v, e) -> { if (onDone.isCancelled()) { - observer.dispose(); - restorer.abort(); + restoreSession.abort(); + receiveFuture.cancel(true); } }); - log.info("Send snapshot sync request: leader={}", session.leader); + receiveFuture.whenCompleteAsync((result, e) -> { + if (e != null) { + restoreSession.abort(); + onDone.completeExceptionally(new KVRangeStoreException("Snapshot restore failed", e)); + return; + } + if (result.code() == IKVRangeSnapshotReceiver.Code.DONE) { + restoreSession.done(); + onDone.complete(null); + log.info("Restored from snapshot: session={}, leader={}, entries={}, bytes={}, cost={}ms", + session.id, session.leader, result.totalEntries(), result.totalBytes(), + TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos)); + } else { + restoreSession.abort(); + onDone.completeExceptionally(new KVRangeStoreException("Snapshot restore failed: " + result)); + } + }, executor); + log.info("Send snapshot sync request to {} {}", leader, !onDone.isDone()); if (!onDone.isDone()) { messenger.send(KVRangeMessage.newBuilder() .setRangeId(range.id()) @@ -212,39 +131,15 @@ public void onComplete() { return onDone; } - private boolean shouldRotate(RestoreSession session) { - return adaptiveWriteBudget.shouldFlush(session.entries, session.bytes); - } - - private void flushSegment(IKVReseter restorer, RestoreSession session) { - if (session.entries > 0 || session.bytes > 0) { - log.info("Flush snapshot data: sessionId={}, entries={}, bytes={}, leader={}", session.id, - session.entries, session.bytes, session.leader); - long entries = session.entries; - long bytes = session.bytes; - long batchStart = session.batchStartNanos > 0 ? session.batchStartNanos : System.nanoTime(); - restorer.flush(); - long latencyMillis = Math.max(1L, TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - batchStart)); - adaptiveWriteBudget.recordFlush(entries, bytes, latencyMillis); - session.entries = 0; - session.bytes = 0; - session.batchStartNanos = -1; - } - } - private static class RestoreSession { final String id = UUID.randomUUID().toString(); final KVRangeSnapshot snapshot; final CompletableFuture doneFuture = new CompletableFuture<>(); final String leader; - long entries = 0; - long bytes = 0; - long batchStartNanos = -1; private RestoreSession(KVRangeSnapshot snapshot, String leader) { this.snapshot = snapshot; this.leader = leader; } } - } diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeSnapshotReceiver.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeSnapshotReceiver.java new file mode 100644 index 000000000..37c8b3165 --- /dev/null +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeSnapshotReceiver.java @@ -0,0 +1,186 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.store.range; + +import static org.apache.bifromq.basekv.store.range.IKVRangeSnapshotReceiver.Code.DONE; +import static org.apache.bifromq.basekv.store.range.IKVRangeSnapshotReceiver.Code.ERROR; +import static org.apache.bifromq.basekv.store.range.IKVRangeSnapshotReceiver.Code.NOT_FOUND; +import static org.apache.bifromq.basekv.store.range.IKVRangeSnapshotReceiver.Code.TIME_OUT; + +import io.reactivex.rxjava3.annotations.NonNull; +import io.reactivex.rxjava3.observers.DisposableObserver; +import io.reactivex.rxjava3.schedulers.Schedulers; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.Executor; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicLong; +import org.apache.bifromq.base.util.CascadeCancelCompletableFuture; +import org.apache.bifromq.basekv.proto.KVPair; +import org.apache.bifromq.basekv.proto.KVRangeId; +import org.apache.bifromq.basekv.proto.KVRangeMessage; +import org.apache.bifromq.basekv.proto.SaveSnapshotDataReply; +import org.apache.bifromq.basekv.proto.SaveSnapshotDataRequest; +import org.apache.bifromq.basekv.utils.KVRangeIdUtil; +import org.slf4j.Logger; + +class KVRangeSnapshotReceiver implements IKVRangeSnapshotReceiver { + private final Logger log; + private final String sessionId; + private final IKVRangeMessenger messenger; + private final IKVRangeMetricManager metricManager; + private final Executor executor; + private final int idleTimeSec; + private final KVRangeId sourceRangeId; + private final String sourceStoreId; + + public KVRangeSnapshotReceiver(String sessionId, + KVRangeId sourceRangeId, + String sourceStoreId, + IKVRangeMessenger messenger, + IKVRangeMetricManager metricManager, + Executor executor, + int idleTimeSec, + Logger log) { + this.sessionId = sessionId; + this.sourceRangeId = sourceRangeId; + this.sourceStoreId = sourceStoreId; + this.messenger = messenger; + this.metricManager = metricManager; + this.executor = executor; + this.idleTimeSec = idleTimeSec; + this.log = log; + } + + @Override + public CompletableFuture start(ReceiveListener listener) { + CompletableFuture onDone = new CompletableFuture<>(); + AtomicLong totalEntries = new AtomicLong(); + AtomicLong totalBytes = new AtomicLong(); + try { + DisposableObserver observer = messenger.receive() + .filter(m -> m.hasSaveSnapshotDataRequest() + && m.getHostStoreId().equals(sourceStoreId) + && m.getSaveSnapshotDataRequest().getSessionId().equals(sessionId)) + .timeout(idleTimeSec, TimeUnit.SECONDS, Schedulers.from(executor)) + .observeOn(Schedulers.from(executor)) + .subscribeWith(new DisposableObserver() { + @Override + public void onNext(@NonNull KVRangeMessage m) { + SaveSnapshotDataRequest request = m.getSaveSnapshotDataRequest(); + try { + switch (request.getFlag()) { + case More, End -> { + int thisBytes = 0; + int thisEntries = 0; + for (KVPair kv : request.getKvList()) { + thisBytes += kv.getKey().size(); + thisBytes += kv.getValue().size(); + thisEntries++; + listener.onReceive(kv.getKey(), kv.getValue()); + } + metricManager.reportRestore(thisBytes); + totalEntries.addAndGet(thisEntries); + totalBytes.addAndGet(thisBytes); + if (request.getFlag() == SaveSnapshotDataRequest.Flag.End) { + if (!onDone.isCancelled()) { + dispose(); + onDone.complete(new Result(DONE, totalEntries.get(), totalBytes.get())); + log.info("Finish data receiving: rangeId={}, storeId={}, session={}", + KVRangeIdUtil.toString(sourceRangeId), sourceStoreId, sessionId); + } else { + dispose(); + log.info("Receiver canceled: session={}", sessionId); + } + } + log.debug("Send reply: rangeId={}, storeId={}, session={}", + KVRangeIdUtil.toString(sourceRangeId), sourceStoreId, sessionId); + messenger.send(KVRangeMessage.newBuilder() + .setRangeId(sourceRangeId) + .setHostStoreId(sourceStoreId) + .setSaveSnapshotDataReply(SaveSnapshotDataReply.newBuilder() + .setReqId(request.getReqId()) + .setSessionId(request.getSessionId()) + .setResult(SaveSnapshotDataReply.Result.OK) + .build()) + .build()); + } + case NotFound -> { + onDone.complete(new Result(NOT_FOUND, 0, 0)); + dispose(); + } + default -> { + log.debug("Failed to receive data: rangeId={}, storeId={}, session={}", + KVRangeIdUtil.toString(sourceRangeId), sourceStoreId, sessionId); + onDone.complete(new Result(ERROR, 0, 0)); + dispose(); + } + } + } catch (Throwable t) { + log.error("Snapshot restored failed: session={}", sessionId, t); + onError(t); + messenger.send(KVRangeMessage.newBuilder() + .setRangeId(sourceRangeId) + .setHostStoreId(sourceStoreId) + .setSaveSnapshotDataReply(SaveSnapshotDataReply.newBuilder() + .setSessionId(sessionId) + .setResult(SaveSnapshotDataReply.Result.Error) + .build()) + .build()); + } + } + + @Override + public void onError(@NonNull Throwable e) { + log.error("Receiving data error: rangeId={}, storeId={}, session={}", + KVRangeIdUtil.toString(sourceRangeId), sourceStoreId, sessionId, e); + if (e instanceof TimeoutException) { + onDone.complete(new Result(TIME_OUT, 0, 0)); + } else { + onDone.complete(new Result(ERROR, 0, 0)); + } + dispose(); + } + + @Override + public void onComplete() { + + } + }); + onDone.whenCompleteAsync((v, e) -> { + if (onDone.isCancelled()) { + observer.dispose(); + messenger.send(KVRangeMessage.newBuilder() + .setRangeId(sourceRangeId) + .setHostStoreId(sourceStoreId) + .setSaveSnapshotDataReply(SaveSnapshotDataReply.newBuilder() + .setSessionId(sessionId) + .setResult(SaveSnapshotDataReply.Result.NoSessionFound) + .build()) + .build()); + } + }, executor); + } catch (Throwable t) { + log.error("Unexpected error", t); + onDone.completeExceptionally(t); + } + return CascadeCancelCompletableFuture.fromRoot(onDone); + } +} diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeStatsCollector.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeStatsCollector.java index e7e8a6b9c..8c6e8a9c9 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeStatsCollector.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeStatsCollector.java @@ -14,27 +14,23 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.store.range; -import org.apache.bifromq.basekv.store.api.IKVRangeReader; -import org.apache.bifromq.basekv.store.stats.StatsCollector; -import org.apache.bifromq.basekv.store.wal.IKVRangeWAL; import java.time.Duration; import java.util.Map; import java.util.concurrent.Executor; +import org.apache.bifromq.basekv.store.stats.StatsCollector; +import org.apache.bifromq.basekv.store.wal.IKVRangeWAL; -final class KVRangeStatsCollector extends StatsCollector { - private final IKVRangeReader reader; +class KVRangeStatsCollector extends StatsCollector { + private final IKVRange reader; private final IKVRangeWAL wal; - public KVRangeStatsCollector(IKVRangeReader rangeState, - IKVRangeWAL wal, - Duration interval, - Executor executor) { + public KVRangeStatsCollector(IKVRange rangeState, IKVRangeWAL wal, Duration interval, Executor executor) { super(interval, executor); this.reader = rangeState; this.wal = wal; diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeWriter.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeWriter.java index 50b941d07..be2b45094 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeWriter.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeWriter.java @@ -14,91 +14,150 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.store.range; import static org.apache.bifromq.basekv.store.range.KVRangeKeys.METADATA_CLUSTER_CONFIG_BYTES; +import static org.apache.bifromq.basekv.store.range.KVRangeKeys.METADATA_LAST_APPLIED_INDEX_BYTES; import static org.apache.bifromq.basekv.store.range.KVRangeKeys.METADATA_RANGE_BOUND_BYTES; import static org.apache.bifromq.basekv.store.range.KVRangeKeys.METADATA_STATE_BYTES; import static org.apache.bifromq.basekv.store.range.KVRangeKeys.METADATA_VER_BYTES; -import org.apache.bifromq.basekv.localengine.IKVSpaceWriter; +import com.google.protobuf.ByteString; +import java.util.HashSet; +import java.util.Set; +import org.apache.bifromq.basekv.localengine.ICPableKVSpace; +import org.apache.bifromq.basekv.localengine.IKVSpaceMigratableWriter; import org.apache.bifromq.basekv.proto.Boundary; import org.apache.bifromq.basekv.proto.KVRangeId; +import org.apache.bifromq.basekv.proto.KVRangeSnapshot; import org.apache.bifromq.basekv.proto.State; import org.apache.bifromq.basekv.raft.proto.ClusterConfig; import org.apache.bifromq.basekv.store.api.IKVWriter; +import org.apache.bifromq.basekv.store.util.KVUtil; import org.apache.bifromq.basekv.utils.KVRangeIdUtil; -import com.google.protobuf.ByteString; -import java.util.Optional; -public class KVRangeWriter extends AbstractKVRangeMetadataUpdatable - implements IKVRangeWriter { - private final IKVSpaceWriter spaceWriter; +class KVRangeWriter implements IKVRangeWriter { + private final KVRangeId id; + private final ICPableKVSpace space; + private final IKVSpaceMigratableWriter spaceWriter; + private final Set activeRestoreSessions = new HashSet<>(); - public KVRangeWriter(KVRangeId id, IKVSpaceWriter spaceWriter) { - super(id, spaceWriter); - this.spaceWriter = spaceWriter; + KVRangeWriter(KVRangeId id, ICPableKVSpace space) { + this.id = id; + this.space = space; + this.spaceWriter = space.toWriter(); } @Override - protected IKVSpaceWriter keyRangeWriter() { - return spaceWriter; + public KVRangeId id() { + return id; } @Override - public IKVRangeMetadataWriter migrateTo(KVRangeId targetRangeId, Boundary boundary) { - return new KVRangeMetadataWriter(targetRangeId, - spaceWriter.migrateTo(KVRangeIdUtil.toString(targetRangeId), boundary)); + public final KVRangeWriter ver(long ver) { + spaceWriter.metadata(METADATA_VER_BYTES, KVUtil.toByteStringNativeOrder(ver)); + return this; } @Override - public IKVRangeMetadataWriter migrateFrom(KVRangeId fromRangeId, Boundary boundary) { - return new KVRangeMetadataWriter(fromRangeId, - spaceWriter.migrateFrom(KVRangeIdUtil.toString(fromRangeId), boundary)); + public final KVRangeWriter lastAppliedIndex(long lastAppliedIndex) { + spaceWriter.metadata(METADATA_LAST_APPLIED_INDEX_BYTES, KVUtil.toByteString(lastAppliedIndex)); + return this; } @Override - public IKVWriter kvWriter() { - return new KVWriter(spaceWriter); + public final KVRangeWriter boundary(Boundary boundary) { + spaceWriter.metadata(METADATA_RANGE_BOUND_BYTES, boundary.toByteString()); + return this; } @Override - public void abort() { - keyRangeWriter().abort(); + public final KVRangeWriter state(State state) { + spaceWriter.metadata(METADATA_STATE_BYTES, state.toByteString()); + return this; } @Override - public int count() { - return keyRangeWriter().count(); + public final KVRangeWriter clusterConfig(ClusterConfig clusterConfig) { + spaceWriter.metadata(METADATA_CLUSTER_CONFIG_BYTES, clusterConfig.toByteString()); + return this; } @Override - public void done() { - keyRangeWriter().done(); + public void migrateTo(KVRangeId targetRangeId, KVRangeSnapshot snapshot) { + activeRestoreSessions.add(new KVRangeRestoreSession( + spaceWriter.migrateTo(KVRangeIdUtil.toString(targetRangeId), snapshot.getBoundary())) + .ver(snapshot.getVer()) + .lastAppliedIndex(snapshot.getLastAppliedIndex()) + .boundary(snapshot.getBoundary()) + .state(snapshot.getState()) + .clusterConfig(snapshot.getClusterConfig())); } @Override - public long version() { - Optional verBytes = spaceWriter.metadata(METADATA_VER_BYTES); - return version(verBytes.orElse(null)); + public Migrater startMerging(MigrationProgressListener progressListener) { + IKVRangeRestoreSession restoreSession = new KVRangeRestoreSession( + space.startReceiving(progressListener::onProgress)); + activeRestoreSessions.add(restoreSession); + return new Migrater() { + @Override + public Migrater ver(long ver) { + restoreSession.ver(ver); + return this; + } + + @Override + public Migrater state(State state) { + restoreSession.state(state); + return this; + } + + @Override + public Migrater boundary(Boundary boundary) { + restoreSession.boundary(boundary); + return this; + } + + @Override + public void put(ByteString key, ByteString value) { + restoreSession.put(key, value); + } + + @Override + public void abort() { + restoreSession.abort(); + } + }; } @Override - public State state() { - Optional stateData = spaceWriter.metadata(METADATA_STATE_BYTES); - return state(stateData.orElse(null)); + public IKVWriter kvWriter() { + return new KVWriter(spaceWriter); } @Override - public Boundary boundary() { - return boundary(spaceWriter.metadata(METADATA_RANGE_BOUND_BYTES).orElse(null)); + public void abort() { + spaceWriter.abort(); + for (IKVRangeRestoreSession session : activeRestoreSessions) { + session.abort(); + } + activeRestoreSessions.clear(); } @Override - public ClusterConfig clusterConfig() { - return clusterConfig(spaceWriter.metadata(METADATA_CLUSTER_CONFIG_BYTES).orElse(null)); + public int count() { + return spaceWriter.count(); + } + + @Override + public void done() { + spaceWriter.done(); + for (IKVRangeRestoreSession session : activeRestoreSessions) { + session.done(); + } + activeRestoreSessions.clear(); } } diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVReader.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVReader.java deleted file mode 100644 index 52a8b6d92..000000000 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVReader.java +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.bifromq.basekv.store.range; - -import static org.apache.bifromq.basekv.utils.BoundaryUtil.inRange; - -import org.apache.bifromq.basekv.localengine.IKVSpaceIterator; -import org.apache.bifromq.basekv.localengine.IKVSpaceReader; -import org.apache.bifromq.basekv.proto.Boundary; -import org.apache.bifromq.basekv.store.api.IKVCloseableReader; -import org.apache.bifromq.basekv.store.api.IKVIterator; -import org.apache.bifromq.basekv.store.api.IKVRangeReader; -import com.google.protobuf.ByteString; -import java.util.Optional; - -public class KVReader implements IKVCloseableReader { - private final IKVSpaceReader kvSpace; - private final IKVRangeReader kvRangeReader; - private volatile IKVSpaceIterator kvSpaceIterator; - - KVReader(IKVSpaceReader kvSpace, IKVRangeReader reader) { - this.kvSpace = kvSpace; - this.kvRangeReader = reader; - } - - @Override - public Boundary boundary() { - return kvRangeReader.boundary(); - } - - @Override - public long size(Boundary boundary) { - assert inRange(boundary, boundary()); - return kvRangeReader.size(boundary); - } - - @Override - public boolean exist(ByteString key) { - assert inRange(key, boundary()); - return kvSpace.exist(key); - } - - @Override - public Optional get(ByteString key) { - assert inRange(key, boundary()); - return kvSpace.get(key); - } - - @Override - public IKVIterator iterator() { - return new KVIterator(getKvSpaceIterator()); - } - - @Override - public void refresh() { - getKvSpaceIterator().refresh(); - } - - private IKVSpaceIterator getKvSpaceIterator() { - if (kvSpaceIterator == null) { - synchronized (this) { - if (kvSpaceIterator == null) { - this.kvSpaceIterator = kvSpace.newIterator(); - } - } - } - return kvSpaceIterator; - } - - @Override - public void close() { - if (kvSpaceIterator != null) { - kvSpaceIterator.close(); - } - } -} diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVWriter.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVWriter.java index b41a731b2..6b10187c8 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVWriter.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVWriter.java @@ -14,20 +14,20 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.store.range; +import com.google.protobuf.ByteString; import org.apache.bifromq.basekv.localengine.IKVSpaceWriter; import org.apache.bifromq.basekv.proto.Boundary; import org.apache.bifromq.basekv.store.api.IKVWriter; -import com.google.protobuf.ByteString; -public class KVWriter implements IKVWriter { +class KVWriter implements IKVWriter { private final IKVSpaceWriter writer; - public KVWriter(IKVSpaceWriter writer) { + KVWriter(IKVSpaceWriter writer) { this.writer = writer; } diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/LoadRecordableKVIterator.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/LoadRecordableKVIterator.java index d710d2f7e..1495ee543 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/LoadRecordableKVIterator.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/LoadRecordableKVIterator.java @@ -19,14 +19,14 @@ package org.apache.bifromq.basekv.store.range; -import org.apache.bifromq.basekv.store.api.IKVIterator; import com.google.protobuf.ByteString; +import org.apache.bifromq.basekv.store.api.IKVIterator; -public class LoadRecordableKVIterator implements IKVIterator { +class LoadRecordableKVIterator implements IKVIterator { private final IKVIterator delegate; private final IKVLoadRecorder recorder; - public LoadRecordableKVIterator(IKVIterator delegate, IKVLoadRecorder recorder) { + LoadRecordableKVIterator(IKVIterator delegate, IKVLoadRecorder recorder) { this.delegate = delegate; this.recorder = recorder; } @@ -93,4 +93,9 @@ public void seekForPrev(ByteString key) { delegate.seekForPrev(key); recorder.record(key, System.nanoTime() - start); } + + @Override + public void close() { + delegate.close(); + } } diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/LoadRecordableKVRangeWriter.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/LoadRecordableKVRangeWriter.java index 1665d5784..c46dbce06 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/LoadRecordableKVRangeWriter.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/LoadRecordableKVRangeWriter.java @@ -19,15 +19,15 @@ package org.apache.bifromq.basekv.store.range; -import org.apache.bifromq.basekv.localengine.IKVSpaceWriter; +import org.apache.bifromq.basekv.localengine.ICPableKVSpace; import org.apache.bifromq.basekv.proto.KVRangeId; import org.apache.bifromq.basekv.store.api.IKVWriter; -public class LoadRecordableKVRangeWriter extends KVRangeWriter { +class LoadRecordableKVRangeWriter extends KVRangeWriter { private final IKVLoadRecorder recorder; - public LoadRecordableKVRangeWriter(KVRangeId id, IKVSpaceWriter spaceWriter, IKVLoadRecorder recorder) { - super(id, spaceWriter); + LoadRecordableKVRangeWriter(KVRangeId id, ICPableKVSpace space, IKVLoadRecorder recorder) { + super(id, space); this.recorder = recorder; } diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/LoadRecordableKVReader.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/LoadRecordableKVReader.java index 23956399e..918d0c903 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/LoadRecordableKVReader.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/LoadRecordableKVReader.java @@ -14,31 +14,53 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.store.range; -import org.apache.bifromq.basekv.proto.Boundary; -import org.apache.bifromq.basekv.store.api.IKVIterator; -import org.apache.bifromq.basekv.store.api.IKVReader; import com.google.protobuf.ByteString; import java.util.Optional; +import org.apache.bifromq.basekv.proto.Boundary; +import org.apache.bifromq.basekv.proto.State; +import org.apache.bifromq.basekv.raft.proto.ClusterConfig; +import org.apache.bifromq.basekv.store.api.IKVIterator; +import org.apache.bifromq.basekv.store.api.IKVRangeRefreshableReader; -class LoadRecordableKVReader implements IKVReader { - private final IKVReader delegate; +class LoadRecordableKVReader implements IKVRangeRefreshableReader { + private final IKVRangeRefreshableReader delegate; private final IKVLoadRecorder recorder; - LoadRecordableKVReader(IKVReader delegate, IKVLoadRecorder recorder) { + LoadRecordableKVReader(IKVRangeRefreshableReader delegate, IKVLoadRecorder recorder) { this.delegate = delegate; this.recorder = recorder; } + @Override + public long version() { + return delegate.version(); + } + + @Override + public State state() { + return delegate.state(); + } + + @Override + public long lastAppliedIndex() { + return delegate.lastAppliedIndex(); + } + @Override public Boundary boundary() { return delegate.boundary(); } + @Override + public ClusterConfig clusterConfig() { + return delegate.clusterConfig(); + } + @Override public long size(Boundary boundary) { return delegate.size(boundary); @@ -65,6 +87,16 @@ public IKVIterator iterator() { return new LoadRecordableKVIterator(delegate.iterator(), recorder); } + @Override + public IKVIterator iterator(Boundary boundary) { + return new LoadRecordableKVIterator(delegate.iterator(boundary), recorder); + } + + @Override + public void close() { + delegate.close(); + } + @Override public void refresh() { long start = System.nanoTime(); diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/LoadRecordableKVWriter.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/LoadRecordableKVWriter.java index c7ca931fd..3a1d65762 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/LoadRecordableKVWriter.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/LoadRecordableKVWriter.java @@ -19,9 +19,9 @@ package org.apache.bifromq.basekv.store.range; +import com.google.protobuf.ByteString; import org.apache.bifromq.basekv.proto.Boundary; import org.apache.bifromq.basekv.store.api.IKVWriter; -import com.google.protobuf.ByteString; class LoadRecordableKVWriter implements IKVWriter { private final IKVWriter delegate; diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/SnapshotBandwidthGovernor.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/SnapshotBandwidthGovernor.java index 3a953e814..8aaa05c46 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/SnapshotBandwidthGovernor.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/SnapshotBandwidthGovernor.java @@ -21,7 +21,7 @@ import com.google.common.util.concurrent.RateLimiter; -final class SnapshotBandwidthGovernor { +class SnapshotBandwidthGovernor { private final RateLimiter rateLimiter; SnapshotBandwidthGovernor(long bytesPerSec) { diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/hinter/KVLoadBasedSplitHinter.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/hinter/KVLoadBasedSplitHinter.java index 2eccabc7d..16dd994fc 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/hinter/KVLoadBasedSplitHinter.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/hinter/KVLoadBasedSplitHinter.java @@ -14,15 +14,11 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.store.range.hinter; -import org.apache.bifromq.basekv.proto.Boundary; -import org.apache.bifromq.basekv.proto.SplitHint; -import org.apache.bifromq.basekv.store.api.IKVLoadRecord; -import org.apache.bifromq.basekv.store.api.IKVRangeSplitHinter; import com.google.common.base.Preconditions; import com.google.protobuf.ByteString; import io.micrometer.core.instrument.Gauge; @@ -30,10 +26,10 @@ import java.time.Duration; import java.util.Map; import java.util.NavigableMap; -import java.util.Optional; import java.util.concurrent.ConcurrentSkipListMap; -import java.util.function.Function; import java.util.function.Supplier; +import org.apache.bifromq.basekv.proto.Boundary; +import org.apache.bifromq.basekv.proto.SplitHint; public abstract class KVLoadBasedSplitHinter implements IKVRangeSplitHinter { public static final String LOAD_TYPE_IO_DENSITY = "ioDensity"; @@ -43,20 +39,15 @@ public abstract class KVLoadBasedSplitHinter implements IKVRangeSplitHinter { private final long windowSizeNanos; private final NavigableMap trackedKeySlots = new ConcurrentSkipListMap<>(); private final NavigableMap recentLoadHints = new ConcurrentSkipListMap<>(); - private final Function> toSplitKey; private final Gauge ioDensityGuage; private final Gauge ioLatencyNanosGauge; private final Gauge avgLatencyNanosGauge; private volatile SplitHint latestHint = SplitHint.getDefaultInstance(); - public KVLoadBasedSplitHinter(Supplier nanoSource, - Duration windowSize, - Function> toSplitKey, - String... tags) { + public KVLoadBasedSplitHinter(Supplier nanoSource, Duration windowSize, String... tags) { Preconditions.checkArgument(!windowSize.isNegative(), "Window size must be positive"); this.nanoSource = nanoSource; this.windowSizeNanos = windowSize.toNanos(); - this.toSplitKey = toSplitKey; ioDensityGuage = Gauge.builder("basekv.load.est.iodensity", () -> latestHint.getLoadOrDefault(LOAD_TYPE_IO_DENSITY, 0)) .tags(tags) @@ -101,15 +92,15 @@ protected void onRecord(IKVLoadRecord kvLoadRecord) { if (mySlot < currentSlot) { // cross window slot long slotBegin = currentSlot * windowSizeNanos; - trackedKeySlots.computeIfAbsent(currentSlot, k -> new LoadRecordWindow(toSplitKey)) + trackedKeySlots.computeIfAbsent(currentSlot, k -> new LoadRecordWindow()) .record(loadDistribution, kvIOs, kvNanos, now - slotBegin); if (mySlot + 1 < currentSlot) { trackedKeySlots.computeIfAbsent(currentSlot - 1, - k -> new LoadRecordWindow(toSplitKey)).record(loadDistribution, kvIOs, kvNanos, windowSizeNanos); + k -> new LoadRecordWindow()).record(loadDistribution, kvIOs, kvNanos, windowSizeNanos); } else { trackedKeySlots.computeIfAbsent(currentSlot - 1, - k -> new LoadRecordWindow(toSplitKey)) + k -> new LoadRecordWindow()) .record(loadDistribution, kvIOs, kvNanos, slotBegin - startNanos); } // re-estimate @@ -117,7 +108,7 @@ protected void onRecord(IKVLoadRecord kvLoadRecord) { } else { // still in same window slot trackedKeySlots.computeIfAbsent(currentSlot, - k -> new LoadRecordWindow(toSplitKey)).record(loadDistribution, kvIOs, kvNanos, now - startNanos); + k -> new LoadRecordWindow()).record(loadDistribution, kvIOs, kvNanos, now - startNanos); } } diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/hinter/LoadRecordWindow.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/hinter/LoadRecordWindow.java index f8dbda227..753fbf2c7 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/hinter/LoadRecordWindow.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/hinter/LoadRecordWindow.java @@ -29,30 +29,16 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; -import java.util.function.Function; final class LoadRecordWindow { - private final Function> toSplitKey; private final AtomicInteger records = new AtomicInteger(); private final AtomicInteger totalKVIOs = new AtomicInteger(); private final AtomicLong totalKVIONanos = new AtomicLong(); private final AtomicLong totalLatency = new AtomicLong(); private final Map loadDistribution = new ConcurrentHashMap<>(); - public LoadRecordWindow() { - this(Optional::of); - } - - LoadRecordWindow(Function> toSplitKey) { - this.toSplitKey = toSplitKey; - } + LoadRecordWindow() { - LoadRecordWindow(LoadRecordWindow other) { - this.toSplitKey = other.toSplitKey; - this.records.set(other.records.get()); - this.totalKVIOs.set(other.totalKVIOs.get()); - this.totalKVIONanos.set(other.totalKVIONanos.get()); - this.totalLatency.set(other.totalLatency.get()); } void record(Map keyLoads, int kvIOs, long kvIOTimeNanos, long latencyNanos) { @@ -89,22 +75,12 @@ public Optional estimateSplitKey() { totalKeyIONanos += entry.getValue().get(); } long halfTotal = totalKeyIONanos / 2; - int attempt = 0; for (Map.Entry e : slotDistro.entrySet()) { loadSum += e.getValue().get(); if (loadSum >= halfTotal) { - Optional splitKey = toSplitKey.apply(e.getKey()); - if (splitKey.isPresent()) { - return splitKey; - } - attempt++; - if (attempt < 5) { - attempt++; - } else { - return Optional.empty(); - } + return Optional.of(e.getKey()); } } return Optional.empty(); } -} \ No newline at end of file +} diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/hinter/MutationKVLoadBasedSplitHinter.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/hinter/MutationKVLoadBasedSplitHinter.java index 13ad5b975..fca3014c7 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/hinter/MutationKVLoadBasedSplitHinter.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/hinter/MutationKVLoadBasedSplitHinter.java @@ -14,26 +14,20 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.store.range.hinter; -import org.apache.bifromq.basekv.store.api.IKVLoadRecord; +import java.time.Duration; import org.apache.bifromq.basekv.store.proto.ROCoProcInput; import org.apache.bifromq.basekv.store.proto.RWCoProcInput; -import com.google.protobuf.ByteString; -import java.time.Duration; -import java.util.Optional; -import java.util.function.Function; public class MutationKVLoadBasedSplitHinter extends KVLoadBasedSplitHinter { public static final String TYPE = "kv_io_mutation"; - public MutationKVLoadBasedSplitHinter(Duration windowSizeSeconds, - Function> toSplitKey, - String... tags) { - super(System::nanoTime, windowSizeSeconds, toSplitKey, tags); + public MutationKVLoadBasedSplitHinter(Duration windowSizeSeconds, String... tags) { + super(System::nanoTime, windowSizeSeconds, tags); } diff --git a/bifromq-sysprops/src/main/java/org/apache/bifromq/sysprops/props/DistWorkerLoadEstimationWindowSeconds.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/hinter/MutationKVLoadBasedSplitHinterFactory.java similarity index 51% rename from bifromq-sysprops/src/main/java/org/apache/bifromq/sysprops/props/DistWorkerLoadEstimationWindowSeconds.java rename to base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/hinter/MutationKVLoadBasedSplitHinterFactory.java index 6f511f93f..de71bec7f 100644 --- a/bifromq-sysprops/src/main/java/org/apache/bifromq/sysprops/props/DistWorkerLoadEstimationWindowSeconds.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/hinter/MutationKVLoadBasedSplitHinterFactory.java @@ -17,18 +17,22 @@ * under the License. */ -package org.apache.bifromq.sysprops.props; +package org.apache.bifromq.basekv.store.range.hinter; -import org.apache.bifromq.sysprops.BifroMQSysProp; -import org.apache.bifromq.sysprops.parser.LongParser; +import com.google.protobuf.Struct; +import com.google.protobuf.Value; +import java.time.Duration; -/** - * The window seconds for load estimation in dist worker. - */ -public final class DistWorkerLoadEstimationWindowSeconds extends BifroMQSysProp { - public static final DistWorkerLoadEstimationWindowSeconds INSTANCE = new DistWorkerLoadEstimationWindowSeconds(); +public class MutationKVLoadBasedSplitHinterFactory implements IKVRangeSplitHinterFactory { + private static final String CONF_WINDOW_SECONDS = "windowSeconds"; - private DistWorkerLoadEstimationWindowSeconds() { - super("dist_worker_load_estimation_window_seconds", 5L, LongParser.POSITIVE); + @Override + public IKVRangeSplitHinter create(SplitHinterContext ctx, Struct conf) { + long windowSeconds = 5; + if (conf != null && conf.getFieldsOrDefault(CONF_WINDOW_SECONDS, Value.getDefaultInstance()).hasNumberValue()) { + windowSeconds = (long) conf.getFieldsOrThrow(CONF_WINDOW_SECONDS).getNumberValue(); + } + return new MutationKVLoadBasedSplitHinter(Duration.ofSeconds(windowSeconds), ctx.getTags()); } } + diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/hinter/QueryKVLoadBasedSplitHinter.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/hinter/QueryKVLoadBasedSplitHinter.java index 206b67272..c251cb427 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/hinter/QueryKVLoadBasedSplitHinter.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/hinter/QueryKVLoadBasedSplitHinter.java @@ -14,37 +14,25 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.store.range.hinter; -import org.apache.bifromq.basekv.store.api.IKVLoadRecord; -import org.apache.bifromq.basekv.store.proto.ROCoProcInput; -import org.apache.bifromq.basekv.store.proto.RWCoProcInput; -import com.google.protobuf.ByteString; import java.time.Duration; -import java.util.Optional; -import java.util.function.Function; import java.util.function.Supplier; +import org.apache.bifromq.basekv.store.proto.ROCoProcInput; +import org.apache.bifromq.basekv.store.proto.RWCoProcInput; public class QueryKVLoadBasedSplitHinter extends KVLoadBasedSplitHinter { public static final String TYPE = "kv_io_query"; public QueryKVLoadBasedSplitHinter(Duration windowSize, String... tags) { - this(windowSize, Optional::of, tags); - } - - public QueryKVLoadBasedSplitHinter(Duration windowSize, - Function> toSplitKey, - String... tags) { - this(System::nanoTime, windowSize, toSplitKey, tags); + this(System::nanoTime, windowSize, tags); } - public QueryKVLoadBasedSplitHinter(Supplier nanoSource, Duration windowSize, - Function> toSplitKey, - String... tags) { - super(nanoSource, windowSize, toSplitKey, tags); + public QueryKVLoadBasedSplitHinter(Supplier nanoSource, Duration windowSize, String... tags) { + super(nanoSource, windowSize, tags); } @Override diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/hinter/SplitHinterRegistry.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/hinter/SplitHinterRegistry.java new file mode 100644 index 000000000..cd1b916e3 --- /dev/null +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/hinter/SplitHinterRegistry.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.store.range.hinter; + +import com.google.protobuf.Struct; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import org.apache.bifromq.basehookloader.BaseHookLoader; +import org.slf4j.Logger; + +public final class SplitHinterRegistry { + private final List factorySpecs; + + /** + * Load available factories and cache matched ones. + */ + public SplitHinterRegistry(Map factoryConf, Logger log) { + Map loaded = BaseHookLoader.load(IKVRangeSplitHinterFactory.class); + List specs = new ArrayList<>(); + for (Map.Entry entry : factoryConf.entrySet()) { + String fqn = entry.getKey(); + IKVRangeSplitHinterFactory factory = loaded.get(fqn); + if (factory == null) { + log.warn("KVRangeSplitHinterFactory[{}] not found", fqn); + continue; + } + log.info("KVRangeSplitHinterFactory[{}] enabled", fqn); + specs.add(new FactorySpec(factory, entry.getValue())); + } + this.factorySpecs = specs; + } + + /** + * Create hinters by cached factories with incoming context. + */ + public List createHinters(SplitHinterContext context) { + List hinters = new ArrayList<>(); + for (FactorySpec spec : factorySpecs) { + hinters.add(spec.factory.create(context, spec.conf)); + } + return hinters; + } + + private record FactorySpec(IKVRangeSplitHinterFactory factory, Struct conf) { + } +} diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/stats/StatsCollector.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/stats/StatsCollector.java index 7f534efbc..e8e353678 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/stats/StatsCollector.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/stats/StatsCollector.java @@ -14,28 +14,26 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.store.stats; -import org.apache.bifromq.base.util.AsyncRunner; import com.google.common.collect.Maps; import io.reactivex.rxjava3.core.Observable; import io.reactivex.rxjava3.subjects.BehaviorSubject; import java.time.Duration; import java.util.Map; -import java.util.concurrent.CompletableFuture; import java.util.concurrent.CompletionStage; import java.util.concurrent.Executor; import java.util.concurrent.atomic.AtomicBoolean; +import org.apache.bifromq.base.util.AsyncRunner; public abstract class StatsCollector implements IStatsCollector { private final Duration interval; private final AsyncRunner executor; private final BehaviorSubject> statsSubject = BehaviorSubject.create(); private final AtomicBoolean closed = new AtomicBoolean(); - private final CompletableFuture closedSignal = new CompletableFuture<>(); private volatile long lastScrapAt = 0; public StatsCollector(Duration interval, Executor executor) { @@ -50,7 +48,7 @@ public final Observable> collect() { @Override public final void tick() { - if (interval.compareTo(Duration.ofNanos(System.nanoTime() - lastScrapAt)) <= 0) { + if (!closed.get() && interval.compareTo(Duration.ofNanos(System.nanoTime() - lastScrapAt)) <= 0) { executor.add(() -> { if (closed.get()) { if (!statsSubject.hasComplete()) { @@ -73,10 +71,9 @@ public final CompletionStage stop() { if (!statsSubject.hasComplete()) { statsSubject.onComplete(); } - closedSignal.complete(null); }); } - return closedSignal; + return executor.awaitDone(); } protected abstract void scrap(Map stats); diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/wal/IKVRangeWAL.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/wal/IKVRangeWAL.java index 1347f9580..4ce572982 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/wal/IKVRangeWAL.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/wal/IKVRangeWAL.java @@ -22,7 +22,6 @@ import com.google.protobuf.ByteString; import com.google.protobuf.InvalidProtocolBufferException; import io.reactivex.rxjava3.core.Observable; -import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Optional; @@ -33,6 +32,7 @@ import org.apache.bifromq.basekv.proto.KVRangeCommand; import org.apache.bifromq.basekv.proto.KVRangeId; import org.apache.bifromq.basekv.proto.KVRangeSnapshot; +import org.apache.bifromq.basekv.raft.ILogEntryIterator; import org.apache.bifromq.basekv.raft.IRaftNode; import org.apache.bifromq.basekv.raft.event.CommitEvent; import org.apache.bifromq.basekv.raft.event.ElectionEvent; @@ -71,7 +71,7 @@ public interface IKVRangeWAL { Observable commitIndex(); - CompletableFuture> retrieveCommitted(long fromIndex, long maxSize); + CompletableFuture retrieveCommitted(long fromIndex, long maxSize); CompletableFuture readIndex(); diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/wal/IKVRangeWALSubscription.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/wal/IKVRangeWALSubscription.java index 9dd0aacb9..d7cf1ba6d 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/wal/IKVRangeWALSubscription.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/wal/IKVRangeWALSubscription.java @@ -14,11 +14,13 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.store.wal; +import java.util.concurrent.CompletionStage; + public interface IKVRangeWALSubscription { - void stop(); + CompletionStage stop(); } diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/wal/KVRangeWAL.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/wal/KVRangeWAL.java index 27705301d..31f80e3b3 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/wal/KVRangeWAL.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/wal/KVRangeWAL.java @@ -26,14 +26,12 @@ import io.reactivex.rxjava3.core.Observable; import io.reactivex.rxjava3.subjects.BehaviorSubject; import io.reactivex.rxjava3.subjects.PublishSubject; -import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.concurrent.CompletableFuture; import java.util.concurrent.Executor; -import java.util.concurrent.atomic.AtomicLong; import java.util.function.Predicate; import lombok.SneakyThrows; import org.apache.bifromq.baseenv.EnvProvider; @@ -41,6 +39,7 @@ import org.apache.bifromq.basekv.proto.KVRangeCommand; import org.apache.bifromq.basekv.proto.KVRangeId; import org.apache.bifromq.basekv.proto.KVRangeSnapshot; +import org.apache.bifromq.basekv.raft.ILogEntryIterator; import org.apache.bifromq.basekv.raft.IRaftNode; import org.apache.bifromq.basekv.raft.RaftConfig; import org.apache.bifromq.basekv.raft.RaftNode; @@ -75,7 +74,6 @@ public class KVRangeWAL implements IKVRangeWAL, IRaftNode.ISnapshotInstaller { private final String localId; private final IKVRangeWALStore walStore; private final IRaftNode raftNode; - private final AtomicLong ticks = new AtomicLong(0); private final String[] tags; public KVRangeWAL(String clusterId, @@ -171,7 +169,7 @@ public CompletableFuture restore(KVRangeSnapshot requested, String leader, return CompletableFuture.failedFuture(new KVRangeException("Canceled once")); } }, executor, tags); - onDone.whenCompleteAsync((v, e) -> walSub.stop(), executor); + onDone.exceptionally(e -> null).thenComposeAsync(v -> walSub.stop(), executor); return onDone; } @@ -186,7 +184,7 @@ public Observable commitIndex() { } @Override - public CompletableFuture> retrieveCommitted(long fromIndex, long maxSize) { + public CompletableFuture retrieveCommitted(long fromIndex, long maxSize) { return raftNode.retrieveCommitted(fromIndex, maxSize); } @@ -253,7 +251,6 @@ public void receivePeerMessages(String fromPeer, List messages) { @Override public void tick() { - ticks.incrementAndGet(); raftNode.tick(); } @@ -272,7 +269,12 @@ public CompletableFuture close() { snapRestoreTaskPublisher.onComplete(); electionPublisher.onComplete(); syncStatePublisher.onComplete(); - return raftNode.stop().whenComplete((v, e) -> log.debug("KVRangeWAL closed")); + return raftNode.stop() + .exceptionally(e -> { + log.error("Raft node stop error", e); + return null; + }) + .whenComplete((v, e) -> log.debug("KVRangeWAL closed")); } @Override diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/wal/KVRangeWALStorageEngine.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/wal/KVRangeWALStorageEngine.java index 642caf051..808b1dc83 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/wal/KVRangeWALStorageEngine.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/wal/KVRangeWALStorageEngine.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.store.wal; @@ -24,11 +24,11 @@ import com.google.common.base.Preconditions; import com.google.common.collect.Maps; import com.google.common.collect.Sets; +import com.google.protobuf.Struct; import java.util.Map; import java.util.Set; import java.util.concurrent.atomic.AtomicReference; import org.apache.bifromq.basekv.localengine.IKVEngine; -import org.apache.bifromq.basekv.localengine.IWALableKVEngineConfigurator; import org.apache.bifromq.basekv.localengine.IWALableKVSpace; import org.apache.bifromq.basekv.localengine.KVEngineFactory; import org.apache.bifromq.basekv.proto.KVRangeId; @@ -48,14 +48,14 @@ public class KVRangeWALStorageEngine implements IKVRangeWALStoreEngine { private final Map instances = Maps.newConcurrentMap(); private final IKVEngine kvEngine; - public KVRangeWALStorageEngine(String clusterId, - String overrideIdentity, - IWALableKVEngineConfigurator configurator) { + public KVRangeWALStorageEngine(String clusterId, String overrideIdentity, String engineType, Struct conf) { this.clusterId = clusterId; - kvEngine = KVEngineFactory.createWALable(overrideIdentity, configurator); + kvEngine = KVEngineFactory.createWALable(overrideIdentity, engineType, conf); log = MDCLogger.getLogger(KVRangeWALStorageEngine.class, "clusterId", clusterId, "storeId", kvEngine.id()); } + // configurator-based constructor removed; use type + struct only + @Override public void stop() { if (state.compareAndSet(State.STARTED, State.STOPPING)) { diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/wal/KVRangeWALStore.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/wal/KVRangeWALStore.java index 3af23494d..2ea81062a 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/wal/KVRangeWALStore.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/wal/KVRangeWALStore.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.store.wal; @@ -41,7 +41,6 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.util.Deque; -import java.util.Iterator; import java.util.List; import java.util.NoSuchElementException; import java.util.Optional; @@ -50,10 +49,12 @@ import java.util.function.Consumer; import org.apache.bifromq.baseenv.ZeroCopyParser; import org.apache.bifromq.basekv.localengine.IKVSpaceIterator; +import org.apache.bifromq.basekv.localengine.IKVSpaceRefreshableReader; import org.apache.bifromq.basekv.localengine.IKVSpaceWriter; import org.apache.bifromq.basekv.localengine.IWALableKVSpace; import org.apache.bifromq.basekv.proto.Boundary; import org.apache.bifromq.basekv.proto.KVRangeId; +import org.apache.bifromq.basekv.raft.ILogEntryIterator; import org.apache.bifromq.basekv.raft.proto.ClusterConfig; import org.apache.bifromq.basekv.raft.proto.LogEntry; import org.apache.bifromq.basekv.raft.proto.Snapshot; @@ -74,7 +75,6 @@ class KVRangeWALStore implements IKVRangeWALStore { private final TreeMap configEntryMap = Maps.newTreeMap(); private final Deque stabilizingIndices = new ConcurrentLinkedDeque<>(); private final Consumer onDestroy; - private final LogEntryIteratorPool logEntryIteratorPool; private long currentTerm = 0; private Voting currentVoting; private Snapshot latestSnapshot; @@ -90,7 +90,6 @@ class KVRangeWALStore implements IKVRangeWALStore { this.onDestroy = onDestroy; log = MDCLogger.getLogger(KVRangeWALStore.class, "clusterId", clusterId, "storeId", storeId, "rangeId", KVRangeIdUtil.toString(rangeId)); - logEntryIteratorPool = new LogEntryIteratorPool(kvSpace); load(); } @@ -160,7 +159,8 @@ public void applySnapshot(Snapshot snapshot) { } } log.trace("Truncating logs before index[{}]", truncateBeforeIndex); - try (IKVSpaceIterator it = kvSpace.newIterator()) { + try (IKVSpaceRefreshableReader reader = kvSpace.reader();) { + IKVSpaceIterator it = reader.newIterator(); // truncate log entry writer.clear(Boundary.newBuilder() .setStartKey(logEntriesKeyPrefixInfix(0)) @@ -235,8 +235,8 @@ public Optional entryAt(long index) { if (index < firstIndex() || index > lastIndex()) { return Optional.empty(); } - try { - ByteString data = kvSpace.get(logEntryKey(logEntriesKeyInfix, index)).get(); + try (IKVSpaceRefreshableReader reader = kvSpace.reader()) { + ByteString data = reader.get(logEntryKey(logEntriesKeyInfix, index)).get(); return Optional.of(ZeroCopyParser.parse(data, LogEntry.parser())); } catch (Throwable e) { log.error("Failed to parse log entry[index={}]", index, e); @@ -245,7 +245,7 @@ public Optional entryAt(long index) { } @Override - public Iterator entries(long lo, long hi, long maxSize) { + public ILogEntryIterator entries(long lo, long hi, long maxSize) { if (lo < firstIndex()) { throw new IndexOutOfBoundsException( "lo[" + lo + "] must not be less than firstIndex[" + firstIndex() + "]"); @@ -257,7 +257,7 @@ public Iterator entries(long lo, long hi, long maxSize) { if (maxSize < 0) { maxSize = Long.MAX_VALUE; } - return logEntryIteratorPool.acquire(lo, hi, maxSize, logEntriesKeyInfix); + return new LogEntryIterator(kvSpace, lo, hi, maxSize, logEntriesKeyInfix); } @Override @@ -363,18 +363,20 @@ private void asyncFlush() { } private void load() { - loadLogEntryInfix(); - loadVoting(); - loadCurrentTerm(); - loadLatestSnapshot(); - loadConfigEntryIndexes(); - loadLastIndex(); + try (IKVSpaceRefreshableReader reader = kvSpace.reader()) { + loadLogEntryInfix(reader); + loadVoting(reader); + loadCurrentTerm(reader); + loadLatestSnapshot(reader); + loadConfigEntryIndexes(reader); + loadLastIndex(reader); + } trace("New raft state storage loaded"); } - private void loadVoting() { + private void loadVoting(IKVSpaceRefreshableReader reader) { try { - Optional votingBytes = kvSpace.get(KEY_CURRENT_VOTING_BYTES); + Optional votingBytes = reader.get(KEY_CURRENT_VOTING_BYTES); if (votingBytes.isPresent()) { currentVoting = ZeroCopyParser.parse(votingBytes.get(), Voting.parser()); } @@ -383,32 +385,31 @@ private void loadVoting() { } } - private void loadCurrentTerm() { - currentTerm = kvSpace.get(KEY_CURRENT_TERM_BYTES).map(KVUtil::toLong).orElse(0L); + private void loadCurrentTerm(IKVSpaceRefreshableReader reader) { + currentTerm = reader.get(KEY_CURRENT_TERM_BYTES).map(KVUtil::toLong).orElse(0L); } - private void loadLatestSnapshot() { + private void loadLatestSnapshot(IKVSpaceRefreshableReader reader) { try { - ByteString latestSnapshotBytes = kvSpace.get(KEY_LATEST_SNAPSHOT_BYTES).get(); + ByteString latestSnapshotBytes = reader.get(KEY_LATEST_SNAPSHOT_BYTES).get(); latestSnapshot = ZeroCopyParser.parse(latestSnapshotBytes, Snapshot.parser()); } catch (InvalidProtocolBufferException e) { throw new KVRangeStoreException("Failed to parse snapshot", e); } } - private void loadConfigEntryIndexes() { - try (IKVSpaceIterator it = kvSpace.newIterator()) { - ByteString prefix = KEY_CONFIG_ENTRY_INDEXES_BYTES; - for (it.seek(prefix); it.isValid() && it.key().startsWith(prefix); it.next()) { - long configEntryIndex = it.value().asReadOnlyByteBuffer().getLong(); - configEntryMap.put(configEntryIndex, loadConfigEntry(configEntryIndex)); - } + private void loadConfigEntryIndexes(IKVSpaceRefreshableReader reader) { + IKVSpaceIterator it = reader.newIterator(); + ByteString prefix = KEY_CONFIG_ENTRY_INDEXES_BYTES; + for (it.seek(prefix); it.isValid() && it.key().startsWith(prefix); it.next()) { + long configEntryIndex = it.value().asReadOnlyByteBuffer().getLong(); + configEntryMap.put(configEntryIndex, loadConfigEntry(configEntryIndex)); } } private ClusterConfig loadConfigEntry(long configEntryIndex) { - try { - ByteString data = kvSpace.get(logEntryKey(logEntriesKeyInfix, configEntryIndex)).get(); + try (IKVSpaceRefreshableReader reader = kvSpace.reader()) { + ByteString data = reader.get(logEntryKey(logEntriesKeyInfix, configEntryIndex)).get(); LogEntry logEntry = ZeroCopyParser.parse(data, LogEntry.parser()); assert logEntry.hasConfig(); return logEntry.getConfig(); @@ -420,19 +421,18 @@ private ClusterConfig loadConfigEntry(long configEntryIndex) { } } - private void loadLastIndex() { - try (IKVSpaceIterator it = kvSpace.newIterator()) { - it.seekToLast(); - if (it.isValid() && it.key().startsWith(KEY_PREFIX_LOG_ENTRIES_BYTES)) { - lastIndex = KVRangeWALKeys.parseLogIndex(it.key()); - } else { - lastIndex = latestSnapshot.getIndex(); - } + private void loadLastIndex(IKVSpaceRefreshableReader reader) { + IKVSpaceIterator it = reader.newIterator(); + it.seekToLast(); + if (it.isValid() && it.key().startsWith(KEY_PREFIX_LOG_ENTRIES_BYTES)) { + lastIndex = KVRangeWALKeys.parseLogIndex(it.key()); + } else { + lastIndex = latestSnapshot.getIndex(); } } - private void loadLogEntryInfix() { - logEntriesKeyInfix = kvSpace.get(KEY_LOG_ENTRIES_INCAR).map(KVUtil::toInt).orElse(0); + private void loadLogEntryInfix(IKVSpaceRefreshableReader reader) { + logEntriesKeyInfix = reader.get(KEY_LOG_ENTRIES_INCAR).map(KVUtil::toInt).orElse(0); } private void trace(String msg, Object... args) { diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/wal/KVRangeWALSubscription.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/wal/KVRangeWALSubscription.java index 6e423bbe7..87aaa12c0 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/wal/KVRangeWALSubscription.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/wal/KVRangeWALSubscription.java @@ -24,6 +24,7 @@ import java.util.Map; import java.util.NavigableMap; import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CompletionStage; import java.util.concurrent.ConcurrentSkipListMap; import java.util.concurrent.Executor; import java.util.concurrent.atomic.AtomicBoolean; @@ -48,6 +49,7 @@ class KVRangeWALSubscription implements IKVRangeWALSubscription { private final CompositeDisposable disposables = new CompositeDisposable(); private final AtomicBoolean fetching = new AtomicBoolean(); private final AtomicBoolean stopped = new AtomicBoolean(); + private final CompletableFuture stopSign = new CompletableFuture<>(); private final AtomicLong lastFetchedIdx = new AtomicLong(); private final ConcurrentSkipListMap pendingApplies = new ConcurrentSkipListMap<>(); @@ -96,12 +98,20 @@ class KVRangeWALSubscription implements IKVRangeWALSubscription { } @Override - public void stop() { + public CompletionStage stop() { if (stopped.compareAndSet(false, true)) { disposables.dispose(); fetchRunner.cancelAll(); applyRunner.cancelAll(); + CompletableFuture + .allOf(fetchRunner.awaitDone().toCompletableFuture(), applyRunner.awaitDone().toCompletableFuture()) + .exceptionally(e -> { + log.error("WAL Subscripiton stop error", e); + return null; + }) + .whenComplete((v, e) -> stopSign.complete(null)); } + return stopSign; } private void scheduleFetchWAL() { @@ -138,6 +148,7 @@ private CompletableFuture fetchWAL() { break; } } + logEntries.close(); if (entry != null) { if (hasMore) { lastFetchedIdx.set(Math.max(entry.getIndex() - 1, lastFetchedIdx.get())); diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/wal/LogEntryIterator.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/wal/LogEntryIterator.java new file mode 100644 index 000000000..bd898387d --- /dev/null +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/wal/LogEntryIterator.java @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.store.wal; + +import static org.apache.bifromq.basekv.store.wal.KVRangeWALKeys.logEntriesKeyPrefixInfix; +import static org.apache.bifromq.basekv.store.wal.KVRangeWALKeys.logEntryKey; +import static org.apache.bifromq.basekv.utils.BoundaryUtil.upperBound; + +import com.google.protobuf.ByteString; +import java.util.NoSuchElementException; +import org.apache.bifromq.baseenv.ZeroCopyParser; +import org.apache.bifromq.basekv.localengine.IKVSpaceIterator; +import org.apache.bifromq.basekv.localengine.IKVSpaceRefreshableReader; +import org.apache.bifromq.basekv.localengine.IWALableKVSpace; +import org.apache.bifromq.basekv.proto.Boundary; +import org.apache.bifromq.basekv.raft.ILogEntryIterator; +import org.apache.bifromq.basekv.raft.proto.LogEntry; +import org.apache.bifromq.basekv.store.exception.KVRangeStoreException; + +final class LogEntryIterator implements ILogEntryIterator { + private final IKVSpaceIterator iterator; + private final long endIndex; + private final long maxSize; + private final Runnable cleanable; + + private long currentIndex; + private long accumulatedSize; + + LogEntryIterator(IWALableKVSpace kvSpace, long startIndex, long endIndex, long maxSize, int logEntriesKeyInfix) { + IKVSpaceRefreshableReader reader = kvSpace.reader(); + ByteString startBound = logEntriesKeyPrefixInfix(logEntriesKeyInfix); + ByteString endBound = upperBound(logEntriesKeyPrefixInfix(logEntriesKeyInfix)); + this.iterator = reader.newIterator(Boundary.newBuilder() + .setStartKey(startBound) + .setEndKey(endBound) + .build()); + this.endIndex = endIndex; + this.maxSize = maxSize; + this.currentIndex = startIndex; + this.accumulatedSize = 0; + ByteString startKey = logEntryKey(logEntriesKeyInfix, startIndex); + iterator.seek(startKey); + cleanable = new NativeResource(reader, iterator); + } + + @Override + public boolean hasNext() { + if (currentIndex >= endIndex || accumulatedSize > maxSize || !iterator.isValid()) { + return false; + } + return true; + } + + @Override + public LogEntry next() { + if (!hasNext()) { + throw new NoSuchElementException(); + } + try { + ByteString value = iterator.value(); + currentIndex++; + LogEntry entry = ZeroCopyParser.parse(value, LogEntry.parser()); + accumulatedSize += entry.getData().size(); + iterator.next(); + return entry; + } catch (Throwable e) { + throw new KVRangeStoreException("Log data corruption", e); + } + } + + @Override + public void close() { + cleanable.run(); + } + + private record NativeResource(IKVSpaceRefreshableReader reader, IKVSpaceIterator itr) implements Runnable { + @Override + public void run() { + itr.close(); + reader.close(); + } + } +} diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/wal/LogEntryIteratorPool.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/wal/LogEntryIteratorPool.java deleted file mode 100644 index 70b0aad29..000000000 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/wal/LogEntryIteratorPool.java +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.bifromq.basekv.store.wal; - -import static org.apache.bifromq.basekv.store.wal.KVRangeWALKeys.logEntryKey; - -import org.apache.bifromq.baseenv.ZeroCopyParser; -import org.apache.bifromq.basekv.localengine.IKVSpaceIterator; -import org.apache.bifromq.basekv.localengine.IWALableKVSpace; -import org.apache.bifromq.basekv.raft.proto.LogEntry; -import org.apache.bifromq.basekv.store.exception.KVRangeStoreException; -import com.google.protobuf.ByteString; -import java.util.Iterator; -import java.util.NoSuchElementException; -import java.util.Queue; -import java.util.concurrent.ConcurrentLinkedQueue; - -/** - * Manages a pool of LogEntryIterators to avoid the overhead of creating new IKVSpaceIterators each time. - */ -class LogEntryIteratorPool { - private final IWALableKVSpace kvSpace; - private final Queue pool = new ConcurrentLinkedQueue<>(); - - LogEntryIteratorPool(IWALableKVSpace kvSpace) { - this.kvSpace = kvSpace; - } - - /** - * Acquires a new iterator from the pool or creates a new one if the pool is empty. - * - * @param startIndex the starting index (inclusive) - * @param endIndex the ending index (exclusive) - * @param maxSize maximum accumulated size of data to be read - * @param logEntriesKeyInfix current log entries key infix - * @return an iterator over LogEntry objects - */ - Iterator acquire(long startIndex, long endIndex, long maxSize, int logEntriesKeyInfix) { - PooledLogEntryIterator it = pool.poll(); - if (it == null) { - it = new PooledLogEntryIterator(kvSpace.newIterator(), this); - } - it.refresh(startIndex, endIndex, maxSize, logEntriesKeyInfix); - return it; - } - - /** - * Release the iterator back to the pool. - */ - void release(PooledLogEntryIterator it) { - pool.offer(it); - } - - /** - * A pooled LogEntryIterator implementation that implements Iterator. - */ - static class PooledLogEntryIterator implements Iterator { - private final IKVSpaceIterator iterator; - private final LogEntryIteratorPool pool; - - private long endIndex; - private long maxSize; - private long currentIndex; - private long accumulatedSize; - private boolean released; - - PooledLogEntryIterator(IKVSpaceIterator iterator, LogEntryIteratorPool pool) { - this.iterator = iterator; - this.pool = pool; - } - - private void refresh(long startIndex, long endIndex, long maxSize, int logEntriesKeyInfix) { - this.endIndex = endIndex; - this.maxSize = maxSize; - this.currentIndex = startIndex; - this.accumulatedSize = 0; - this.released = false; - - ByteString startKey = logEntryKey(logEntriesKeyInfix, startIndex); - - iterator.refresh(); - iterator.seek(startKey); - } - - @Override - public boolean hasNext() { - if (released) { - return false; - } - if (currentIndex >= endIndex || accumulatedSize > maxSize || !iterator.isValid()) { - releaseIfNotAlready(); - return false; - } - return true; - } - - @Override - public LogEntry next() { - if (!hasNext()) { - throw new NoSuchElementException(); - } - try { - ByteString value = iterator.value(); - currentIndex++; - LogEntry entry = ZeroCopyParser.parse(value, LogEntry.parser()); - accumulatedSize += entry.getData().size(); - iterator.next(); - return entry; - } catch (Throwable e) { - throw new KVRangeStoreException("Log data corruption", e); - } - } - - private void releaseIfNotAlready() { - if (!released) { - released = true; - pool.release(this); - } - } - } -} diff --git a/base-kv/base-kv-store-server/src/main/resources/META-INF/services/org.apache.bifromq.basekv.store.range.hinter.IKVRangeSplitHinterFactory b/base-kv/base-kv-store-server/src/main/resources/META-INF/services/org.apache.bifromq.basekv.store.range.hinter.IKVRangeSplitHinterFactory new file mode 100644 index 000000000..d6e668e46 --- /dev/null +++ b/base-kv/base-kv-store-server/src/main/resources/META-INF/services/org.apache.bifromq.basekv.store.range.hinter.IKVRangeSplitHinterFactory @@ -0,0 +1 @@ +org.apache.bifromq.basekv.store.range.hinter.MutationKVLoadBasedSplitHinterFactory diff --git a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/TestCoProc.java b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/TestCoProc.java index fa543c52a..169f3d3d8 100644 --- a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/TestCoProc.java +++ b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/TestCoProc.java @@ -24,9 +24,9 @@ import java.util.concurrent.CompletableFuture; import java.util.function.Supplier; import org.apache.bifromq.basekv.proto.KVRangeId; -import org.apache.bifromq.basekv.store.api.IKVCloseableReader; import org.apache.bifromq.basekv.store.api.IKVRangeCoProc; -import org.apache.bifromq.basekv.store.api.IKVReader; +import org.apache.bifromq.basekv.store.api.IKVRangeReader; +import org.apache.bifromq.basekv.store.api.IKVRangeRefreshableReader; import org.apache.bifromq.basekv.store.api.IKVWriter; import org.apache.bifromq.basekv.store.proto.ROCoProcInput; import org.apache.bifromq.basekv.store.proto.ROCoProcOutput; @@ -34,21 +34,21 @@ import org.apache.bifromq.basekv.store.proto.RWCoProcOutput; public class TestCoProc implements IKVRangeCoProc { - private final Supplier rangeReaderProvider; + private final Supplier rangeReaderProvider; - public TestCoProc(KVRangeId id, Supplier rangeReaderProvider) { + public TestCoProc(KVRangeId id, Supplier rangeReaderProvider) { this.rangeReaderProvider = rangeReaderProvider; } @Override - public CompletableFuture query(ROCoProcInput input, IKVReader reader) { + public CompletableFuture query(ROCoProcInput input, IKVRangeReader reader) { // get return CompletableFuture.completedFuture( - ROCoProcOutput.newBuilder().setRaw(reader.get(input.getRaw()).orElse(ByteString.EMPTY)).build()); + ROCoProcOutput.newBuilder().setRaw(reader.get(input.getRaw()).get()).build()); } @Override - public Supplier mutate(RWCoProcInput input, IKVReader reader, IKVWriter client, boolean isLeader) { + public Supplier mutate(RWCoProcInput input, IKVRangeReader reader, IKVWriter client, boolean isLeader) { String[] str = input.getRaw().toStringUtf8().split("_"); ByteString key = ByteString.copyFromUtf8(str[0]); ByteString value = ByteString.copyFromUtf8(str[1]); diff --git a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/TestCoProcFactory.java b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/TestCoProcFactory.java index b47e712c2..ff1e4b93e 100644 --- a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/TestCoProcFactory.java +++ b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/TestCoProcFactory.java @@ -19,16 +19,16 @@ package org.apache.bifromq.basekv; +import java.util.function.Supplier; import org.apache.bifromq.basekv.proto.KVRangeId; -import org.apache.bifromq.basekv.store.api.IKVCloseableReader; import org.apache.bifromq.basekv.store.api.IKVRangeCoProc; import org.apache.bifromq.basekv.store.api.IKVRangeCoProcFactory; -import java.util.function.Supplier; +import org.apache.bifromq.basekv.store.api.IKVRangeRefreshableReader; public class TestCoProcFactory implements IKVRangeCoProcFactory { @Override public IKVRangeCoProc createCoProc(String clusterId, String storeId, KVRangeId id, - Supplier readerProvider) { + Supplier readerProvider) { return new TestCoProc(id, readerProvider); } } diff --git a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/annotation/Cluster.java b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/annotation/Cluster.java index b677521a8..544e80739 100644 --- a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/annotation/Cluster.java +++ b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/annotation/Cluster.java @@ -34,4 +34,11 @@ int installSnapshotTimeoutTick() default 2000; boolean asyncAppend() default true; + + // Extra knobs for deterministic tests + int snapshotSyncIdleTimeoutSec() default 5; + + int zombieTimeoutSec() default 5; + + int mergeTimeoutSec() default 5; } diff --git a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/server/AgentHostStoreMessengerTest.java b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/server/AgentHostStoreMessengerTest.java index b2b8ea35d..757ae7799 100644 --- a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/server/AgentHostStoreMessengerTest.java +++ b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/server/AgentHostStoreMessengerTest.java @@ -20,11 +20,19 @@ package org.apache.bifromq.basekv.server; import static org.apache.bifromq.basekv.Constants.toBaseKVAgentId; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyBoolean; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.never; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertTrue; +import com.google.protobuf.ByteString; +import io.reactivex.rxjava3.observers.TestObserver; +import io.reactivex.rxjava3.subjects.PublishSubject; +import java.lang.reflect.Method; import org.apache.bifromq.basecluster.IAgentHost; import org.apache.bifromq.basecluster.agent.proto.AgentMemberAddr; import org.apache.bifromq.basecluster.agent.proto.AgentMessage; @@ -35,10 +43,6 @@ import org.apache.bifromq.basekv.proto.KVRangeMessage; import org.apache.bifromq.basekv.proto.StoreMessage; import org.apache.bifromq.basekv.utils.KVRangeIdUtil; -import com.google.protobuf.ByteString; -import io.reactivex.rxjava3.observers.TestObserver; -import io.reactivex.rxjava3.subjects.PublishSubject; -import java.lang.reflect.Method; import org.mockito.ArgumentCaptor; import org.mockito.Mock; import org.testng.annotations.Test; @@ -70,6 +74,7 @@ protected void doSetup(Method method) { when(agent.register(srcStore)).thenReturn(srcStoreAgentMember); when(agent.register(targetStore)).thenReturn(tgtStoreAgentMember); when(tgtStoreAgentMember.receive()).thenReturn(tgtStoreMessageSubject); + when(srcStoreAgentMember.receive()).thenReturn(PublishSubject.create()); } @Test @@ -158,4 +163,25 @@ public void receiveBroadcast() { testObserver.awaitCount(1); assertEquals(testObserver.values().get(0).getPayload().getHostStoreId(), targetStore); } + + @Test + public void sendToSelfShortcut() { + AgentHostStoreMessenger messenger = new AgentHostStoreMessenger(agentHost, clusterId, targetStore); + TestObserver testObserver = TestObserver.create(); + messenger.receive().subscribe(testObserver); + + StoreMessage message = StoreMessage.newBuilder() + .setFrom(targetStore) + .setSrcRange(targetRange) + .setPayload(KVRangeMessage.newBuilder().setHostStoreId(targetStore).setRangeId(targetRange).build()) + .build(); + + messenger.send(message); + + testObserver.awaitCount(1); + assertEquals(testObserver.values().get(0), message); + + verify(tgtStoreAgentMember, never()).multicast(anyString(), any(), anyBoolean()); + verify(tgtStoreAgentMember, never()).broadcast(any(), anyBoolean()); + } } diff --git a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/server/QueryPipelineTest.java b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/server/QueryPipelineTest.java index 74bc66d4c..c0493201b 100644 --- a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/server/QueryPipelineTest.java +++ b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/server/QueryPipelineTest.java @@ -14,16 +14,26 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.server; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertTrue; +import com.google.protobuf.ByteString; +import io.grpc.stub.ServerCallStreamObserver; +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.TimeUnit; import org.apache.bifromq.basekv.MockableTest; import org.apache.bifromq.basekv.proto.KVRangeId; import org.apache.bifromq.basekv.store.IKVRangeStore; @@ -34,14 +44,6 @@ import org.apache.bifromq.basekv.store.proto.ROCoProcOutput; import org.apache.bifromq.basekv.store.proto.ReplyCode; import org.apache.bifromq.basekv.utils.KVRangeIdUtil; -import com.google.protobuf.ByteString; -import io.grpc.stub.ServerCallStreamObserver; -import java.util.ArrayList; -import java.util.List; -import java.util.Optional; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.ThreadLocalRandom; -import java.util.concurrent.TimeUnit; import org.mockito.Mock; import org.testng.annotations.Test; @@ -226,4 +228,86 @@ public void errorCodeConversion() { getReply = pipeline.handleRequest("_", getRequest).join(); assertEquals(getReply.getCode(), ReplyCode.InternalError); } + + @Test + public void skipCanceledQueuedTask() { + boolean linearized = false; + QueryPipeline pipeline = new QueryPipeline(rangeStore, linearized, streamObserver); + KVRangeId rangeId = KVRangeIdUtil.generate(); + + ByteString key1 = ByteString.copyFromUtf8("k1"); + ByteString key2 = ByteString.copyFromUtf8("k2"); + + CompletableFuture> firstFuture = new CompletableFuture<>(); + + when(rangeStore.get(1, rangeId, key1, linearized)).thenReturn(firstFuture); + when(rangeStore.get(1, rangeId, key2, linearized)) + .thenReturn(CompletableFuture.completedFuture(Optional.empty())); + + KVRangeRORequest r1 = KVRangeRORequest.newBuilder() + .setReqId(1) + .setVer(1) + .setKvRangeId(rangeId) + .setGetKey(key1) + .build(); + KVRangeRORequest r2 = KVRangeRORequest.newBuilder() + .setReqId(2) + .setVer(1) + .setKvRangeId(rangeId) + .setGetKey(key2) + .build(); + + CompletableFuture f1 = pipeline.handleRequest("_", r1); + CompletableFuture f2 = pipeline.handleRequest("_", r2); + + f2.cancel(true); + + firstFuture.complete(Optional.empty()); + f1.join(); + + verify(rangeStore, times(1)).get(1, rangeId, key1, linearized); + verify(rangeStore, times(0)).get(1, rangeId, key2, linearized); + assertTrue(f2.isCancelled()); + } + + @Test + public void skipQueuedTaskAfterPipelineClosed() { + boolean linearized = false; + QueryPipeline pipeline = new QueryPipeline(rangeStore, linearized, streamObserver); + KVRangeId rangeId = KVRangeIdUtil.generate(); + + ByteString key1 = ByteString.copyFromUtf8("k1"); + ByteString key2 = ByteString.copyFromUtf8("k2"); + + CompletableFuture> firstFuture = new CompletableFuture<>(); + + when(rangeStore.get(1, rangeId, key1, linearized)).thenReturn(firstFuture); + when(rangeStore.get(1, rangeId, key2, linearized)) + .thenReturn(CompletableFuture.completedFuture(Optional.empty())); + + KVRangeRORequest r1 = KVRangeRORequest.newBuilder() + .setReqId(1) + .setVer(1) + .setKvRangeId(rangeId) + .setGetKey(key1) + .build(); + KVRangeRORequest r2 = KVRangeRORequest.newBuilder() + .setReqId(2) + .setVer(1) + .setKvRangeId(rangeId) + .setGetKey(key2) + .build(); + + CompletableFuture f1 = pipeline.handleRequest("_", r1); + CompletableFuture f2 = pipeline.handleRequest("_", r2); + + pipeline.onCompleted(); + + firstFuture.complete(Optional.empty()); + f1.join(); + + verify(rangeStore, times(1)).get(1, rangeId, key1, linearized); + verify(rangeStore, times(0)).get(1, rangeId, key2, linearized); + assertTrue(pipeline.isClosed()); + } } diff --git a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/KVRangeBootstrapTest.java b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/KVRangeBootstrapTest.java index 3bcf702bb..142dce0e5 100644 --- a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/KVRangeBootstrapTest.java +++ b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/KVRangeBootstrapTest.java @@ -20,6 +20,9 @@ package org.apache.bifromq.basekv.store; import static java.util.Collections.emptyMap; +import static org.apache.bifromq.basekv.localengine.StructUtil.toValue; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.DB_CHECKPOINT_ROOT_DIR; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.DB_ROOT_DIR; import static org.apache.bifromq.basekv.proto.State.StateType.Normal; import static org.apache.bifromq.basekv.utils.BoundaryUtil.FULL_BOUNDARY; import static org.testng.Assert.assertEquals; @@ -27,6 +30,7 @@ import static org.testng.Assert.assertTrue; import com.google.protobuf.Any; +import com.google.protobuf.Struct; import io.reactivex.rxjava3.core.Observable; import io.reactivex.rxjava3.subjects.PublishSubject; import java.io.File; @@ -48,8 +52,6 @@ import org.apache.bifromq.baseenv.EnvProvider; import org.apache.bifromq.basekv.MockableTest; import org.apache.bifromq.basekv.TestCoProcFactory; -import org.apache.bifromq.basekv.localengine.rocksdb.RocksDBCPableKVEngineConfigurator; -import org.apache.bifromq.basekv.localengine.rocksdb.RocksDBWALableKVEngineConfigurator; import org.apache.bifromq.basekv.proto.KVRangeDescriptor; import org.apache.bifromq.basekv.proto.KVRangeId; import org.apache.bifromq.basekv.proto.KVRangeMessage; @@ -88,12 +90,18 @@ protected void doSetup(Method method) { EnvProvider.INSTANCE.newThreadFactory("bg-task-executor")); dbRootDir = Files.createTempDirectory(""); - (((RocksDBCPableKVEngineConfigurator) options.getDataEngineConfigurator())) - .dbCheckpointRootDir(Paths.get(dbRootDir.toString(), DB_CHECKPOINT_DIR_NAME) - .toString()) - .dbRootDir(Paths.get(dbRootDir.toString(), DB_NAME).toString()); - ((RocksDBWALableKVEngineConfigurator) options.getWalEngineConfigurator()) - .dbRootDir(Paths.get(dbRootDir.toString(), DB_WAL_NAME).toString()); + Struct dataConf = options.getDataEngineConf().toBuilder() + .putFields(DB_CHECKPOINT_ROOT_DIR, + toValue(Paths.get(dbRootDir.toString(), DB_CHECKPOINT_DIR_NAME).toString())) + .putFields(DB_ROOT_DIR, toValue(Paths.get(dbRootDir.toString(), DB_NAME).toString())) + .build(); + options.setDataEngineType(options.getDataEngineType()); + options.setDataEngineConf(dataConf); + Struct walConf = options.getWalEngineConf().toBuilder() + .putFields(DB_ROOT_DIR, toValue(Paths.get(dbRootDir.toString(), DB_WAL_NAME).toString())) + .build(); + options.setWalEngineType(options.getWalEngineType()); + options.setWalEngineConf(walConf); rangeStore = new KVRangeStore("testCluster", diff --git a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/KVRangeMessengerTest.java b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/KVRangeMessengerTest.java index 84b4b2bb6..f242f7e84 100644 --- a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/KVRangeMessengerTest.java +++ b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/KVRangeMessengerTest.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.store; @@ -23,12 +23,9 @@ import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; +import static org.testng.Assert.fail; -import org.apache.bifromq.basekv.MockableTest; -import org.apache.bifromq.basekv.proto.KVRangeId; -import org.apache.bifromq.basekv.proto.KVRangeMessage; -import org.apache.bifromq.basekv.proto.StoreMessage; -import org.apache.bifromq.basekv.utils.KVRangeIdUtil; import io.reactivex.rxjava3.observers.TestObserver; import io.reactivex.rxjava3.subjects.PublishSubject; import java.lang.reflect.Method; @@ -36,6 +33,12 @@ import java.util.concurrent.TimeUnit; import lombok.SneakyThrows; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.basekv.MockableTest; +import org.apache.bifromq.basekv.proto.KVRangeId; +import org.apache.bifromq.basekv.proto.KVRangeMessage; +import org.apache.bifromq.basekv.proto.StoreMessage; +import org.apache.bifromq.basekv.store.exception.KVRangeException; +import org.apache.bifromq.basekv.utils.KVRangeIdUtil; import org.mockito.ArgumentCaptor; import org.mockito.Mock; import org.testng.annotations.Test; @@ -50,9 +53,6 @@ protected void doSetup(Method method) { incomingStoreMsg = PublishSubject.create(); } - protected void doTearDown(Method method) { - } - @Test public void send() { String srcStoreId = "srcStoreId"; @@ -94,6 +94,35 @@ public void receiveSend() { assertEquals(receivedMsg.getRangeId(), srcRangeId); } + @Test + public void receiveWithoutRangeId() { + String srcStoreId = "srcStoreId"; + KVRangeId srcRangeId = KVRangeIdUtil.generate(); + String targetStoreId = "targetStoreId"; + KVRangeId targetRangeId = KVRangeIdUtil.generate(); + when(storeMessenger.receive()).thenReturn(incomingStoreMsg); + KVRangeMessenger messenger = new KVRangeMessenger(targetStoreId, targetRangeId, storeMessenger); + TestObserver rangeMsgObserver = TestObserver.create(); + messenger.receive().subscribe(rangeMsgObserver); + + // Build a message without rangeId set but with matching hostStoreId + KVRangeMessage rangeMessage = KVRangeMessage.newBuilder() + .setHostStoreId(targetStoreId) + .build(); + StoreMessage storeMessage = StoreMessage.newBuilder() + .setFrom(srcStoreId) + .setSrcRange(srcRangeId) + .setPayload(rangeMessage) + .build(); + + incomingStoreMsg.onNext(storeMessage); + rangeMsgObserver.awaitCount(1); + + KVRangeMessage receivedMsg = rangeMsgObserver.values().get(0); + assertEquals(receivedMsg.getHostStoreId(), srcStoreId); + assertEquals(receivedMsg.getRangeId(), srcRangeId); + } + @SneakyThrows @Test public void ignoreWrongTarget() { @@ -167,4 +196,25 @@ public void once() { incomingStoreMsg.onComplete(); await().until(() -> onceFuture1.isCompletedExceptionally()); } + + @Test + public void onceOnError() { + String targetStoreId = "targetStoreId"; + KVRangeId targetRangeId = KVRangeIdUtil.generate(); + when(storeMessenger.receive()).thenReturn(incomingStoreMsg); + KVRangeMessenger messenger = new KVRangeMessenger(targetStoreId, targetRangeId, storeMessenger); + + CompletableFuture onceFuture = messenger.once(msg -> true); + incomingStoreMsg.onError(new RuntimeException("Mocked exception")); + + await().until(onceFuture::isCompletedExceptionally); + try { + onceFuture.join(); + fail(); + } catch (RuntimeException e) { + // CompletionException or directly runtime wraps the cause + Throwable cause = e.getCause() != null ? e.getCause() : e; + assertTrue(cause instanceof KVRangeException.TryLater); + } + } } diff --git a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/KVRangeStoreClusterConfigChangeTest.java b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/KVRangeStoreClusterConfigChangeTest.java index 17bc5e5e4..dc3dc2376 100644 --- a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/KVRangeStoreClusterConfigChangeTest.java +++ b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/KVRangeStoreClusterConfigChangeTest.java @@ -14,25 +14,32 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.store; import static java.util.Collections.emptySet; import static org.awaitility.Awaitility.await; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; -import org.apache.bifromq.basekv.annotation.Cluster; -import org.apache.bifromq.basekv.proto.KVRangeId; import com.google.common.collect.Lists; import com.google.common.collect.Sets; import com.google.protobuf.ByteString; import java.util.List; import java.util.Objects; import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CompletionException; import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.basekv.annotation.Cluster; +import org.apache.bifromq.basekv.proto.KVRangeDescriptor; +import org.apache.bifromq.basekv.proto.KVRangeId; +import org.apache.bifromq.basekv.proto.State; +import org.apache.bifromq.basekv.store.exception.KVRangeException; import org.testng.annotations.Test; @Slf4j @@ -308,4 +315,54 @@ public void configChangeDoNotAffectMutation() { cluster.put(setting.leader, ver, rangeId, ByteString.copyFromUtf8("key"), ByteString.copyFromUtf8("value")) .toCompletableFuture().join(); } + + @Test(groups = "integration") + public void correlateIdMismatchRollsBackToNormalAndTryLater() { + KVRangeId rangeId = cluster.genesisKVRangeId(); + // ensure cluster ready and capture current leader/voters + KVRangeConfig setting = cluster.awaitAllKVRangeReady(rangeId, 1, 40); + String leader = setting.leader; + Set currentVoters = Sets.newHashSet(setting.clusterConfig.getVotersList()); + + // Phase 1: hold all WAL messages from leader to step through deterministically + try (KVRangeStoreTestCluster.HoldHandle hold = cluster.holdIf( + m -> m.getFrom().equals(leader) && m.getPayload().hasWalRaftMessages())) { + CompletableFuture changeFuture = cluster + .changeReplicaConfig(leader, setting.ver, rangeId, currentVoters, Sets.newHashSet()) + .toCompletableFuture(); + + long deadline = System.nanoTime() + TimeUnit.SECONDS.toNanos(15); + boolean inConfigChanging = false; + while (System.nanoTime() < deadline) { + hold.releaseOne(); + KVRangeDescriptor rd = cluster.getKVRange(leader, rangeId); + if (rd != null && rd.getState() == State.StateType.ConfigChanging) { + inConfigChanging = true; + break; + } + } + assertTrue(inConfigChanging); + + long latestVer = cluster.kvRangeSetting(rangeId).ver; + // Ensure election messages are not blocked + hold.releaseAll(); + String newLeader = nonLeaderStore(setting); + cluster.transferLeader(leader, latestVer, rangeId, newLeader).toCompletableFuture().join(); + + Throwable thrown = null; + try { + changeFuture.join(); + } catch (Throwable t) { + thrown = (t instanceof CompletionException && t.getCause() != null) ? t.getCause() : t; + } + assertTrue(thrown instanceof KVRangeException.TryLater); + } + + // FSM should rollback to Normal across replicas + cluster.awaitKVRangeStateOnAllStores(rangeId, State.StateType.Normal, 30); + + // voters should remain unchanged + KVRangeConfig after = cluster.kvRangeSetting(rangeId); + assertEquals(Sets.newHashSet(after.clusterConfig.getVotersList()), currentVoters); + } } diff --git a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/KVRangeStoreClusterMergeEdgeCasesTest.java b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/KVRangeStoreClusterMergeEdgeCasesTest.java new file mode 100644 index 000000000..7f0917059 --- /dev/null +++ b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/KVRangeStoreClusterMergeEdgeCasesTest.java @@ -0,0 +1,777 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.store; + +import static com.google.protobuf.ByteString.copyFromUtf8; +import static org.apache.bifromq.basekv.utils.BoundaryUtil.NULL_BOUNDARY; +import static org.awaitility.Awaitility.await; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; +import static org.testng.Assert.fail; + +import java.time.Duration; +import java.util.HashSet; +import java.util.Set; +import java.util.concurrent.CompletionException; +import java.util.concurrent.atomic.AtomicReference; +import lombok.SneakyThrows; +import org.apache.bifromq.basekv.annotation.Cluster; +import org.apache.bifromq.basekv.proto.Boundary; +import org.apache.bifromq.basekv.proto.KVRangeDescriptor; +import org.apache.bifromq.basekv.proto.KVRangeId; +import org.apache.bifromq.basekv.proto.State; +import org.apache.bifromq.basekv.store.exception.KVRangeException.BadRequest; +import org.apache.bifromq.basekv.store.exception.KVRangeException.TryLater; +import org.apache.bifromq.basekv.utils.BoundaryUtil; +import org.apache.bifromq.basekv.utils.KVRangeIdUtil; +import org.testng.annotations.Test; + +public class KVRangeStoreClusterMergeEdgeCasesTest extends KVRangeStoreClusterTestTemplate { + + @Cluster(initVoters = 1) + @Test(groups = "integration") + public void mergeMissingRollback() { + KVRangeId merger = cluster.genesisKVRangeId(); + String leader = cluster.bootstrapStore(); + cluster.awaitKVRangeReady(leader, merger); + KVRangeConfig before = cluster.kvRangeSetting(merger); + + KVRangeId missing = KVRangeIdUtil.generate(); + + try { + cluster.merge(leader, before.ver, merger, missing).toCompletableFuture().join(); + } catch (CompletionException ex) { + Throwable cause = ex.getCause(); + assertTrue(cause instanceof BadRequest); + } + + cluster.awaitKVRangeStateOnAllStores(merger, State.StateType.Normal, 60); + KVRangeConfig after = cluster.kvRangeSetting(merger); + assertEquals(before.ver, after.ver); + } + + @SneakyThrows + @Test(groups = "integration") + public void mispreparedTimeoutConsistentState() { + KVRangeId gid = cluster.genesisKVRangeId(); + String leader = cluster.bootstrapStore(); + cluster.awaitKVRangeReady(leader, gid); + cluster.split(leader, cluster.kvRangeSetting(gid).ver, gid, + copyFromUtf8("m")).toCompletableFuture().join(); + await().until(() -> cluster.allKVRangeIds().size() == 2); + KVRangeConfig r0 = cluster.kvRangeSetting(cluster.allKVRangeIds().get(0)); + KVRangeConfig r1 = cluster.kvRangeSetting(cluster.allKVRangeIds().get(1)); + AtomicReference merger; + AtomicReference mergee; + if (r0.boundary.hasEndKey() && BoundaryUtil.compare(r0.boundary.getEndKey(), r1.boundary.getStartKey()) <= 0) { + merger = new AtomicReference<>(r0); + mergee = new AtomicReference<>(r1); + } else { + merger = new AtomicReference<>(r1); + mergee = new AtomicReference<>(r0); + } + + cluster.awaitKVRangeReady(merger.get().leader, merger.get().id); + cluster.awaitKVRangeReady(mergee.get().leader, mergee.get().id); + + KVRangeConfig before = cluster.kvRangeSetting(merger.get().id); + + // drop PreparedMergeToReply/MergeRequest/MergeHelpRequest so it must self-timeout + try (AutoCloseable dropPMTReply = cluster.dropIf(m -> m.getPayload().hasPrepareMergeToReply() + && m.getPayload().getRangeId().equals(merger.get().id)); + AutoCloseable dropMHRequest = cluster.dropIf(m -> m.getPayload().hasMergeHelpRequest()); + KVRangeStoreTestCluster.HoldHandle blockMergeReq = cluster.holdIf(m -> m.getPayload().hasMergeRequest() + && m.getPayload().getRangeId().equals(merger.get().id))) { + try { + cluster.merge(merger.get().leader, before.ver, merger.get().id, mergee.get().id) + .toCompletableFuture().join(); + fail(); + } catch (CompletionException ex) { + assertTrue(ex.getCause() instanceof TryLater); + } + + // Merger self-timeouts and cancels, stays with original boundary + cluster.awaitKVRangeStateOnAllStores(merger.get().id, State.StateType.Normal, 60); + KVRangeConfig after = cluster.kvRangeSetting(merger.get().id); + assertTrue(after.ver >= before.ver + 1); + assertEquals(after.boundary, before.boundary); + + // Release MergeRequest to arrive late; + // merger(Normal) should send CancelMergingRequest, mergee back to Normal + blockMergeReq.releaseAll(); + cluster.awaitKVRangeStateOnAllStores(mergee.get().id, State.StateType.Normal, 60); + } + } + + @Test(groups = "integration") + public void prepareCondNotMetThenCancel() { + KVRangeId gid = cluster.genesisKVRangeId(); + String leader = cluster.bootstrapStore(); + cluster.awaitKVRangeReady(leader, gid); + cluster.split(leader, cluster.kvRangeSetting(gid).ver, gid, + copyFromUtf8("m")).toCompletableFuture().join(); + await().until(() -> cluster.allKVRangeIds().size() == 2); + KVRangeConfig r0 = cluster.kvRangeSetting(cluster.allKVRangeIds().get(0)); + KVRangeConfig r1 = cluster.kvRangeSetting(cluster.allKVRangeIds().get(1)); + AtomicReference merger; + AtomicReference mergee; + if (r0.boundary.hasEndKey() + && BoundaryUtil.compare(r0.boundary.getEndKey(), r1.boundary.getStartKey()) <= 0) { + merger = new AtomicReference<>(r0); + mergee = new AtomicReference<>(r1); + } else { + merger = new AtomicReference<>(r1); + mergee = new AtomicReference<>(r0); + } + + // Ensure merger and mergee are hosted on the same leader store, + // so PrepareMergeTo is handled by the real mergee instead of the "absent mergee" fast path. + while (!merger.get().leader.equals(mergee.get().leader)) { + cluster.awaitKVRangeReady(mergee.get().leader, mergee.get().id); + try { + await().ignoreExceptions().atMost(java.time.Duration.ofSeconds(5)) + .until(() -> { + cluster.transferLeader(mergee.get().leader, mergee.get().ver, mergee.get().id, + merger.get().leader) + .toCompletableFuture().join(); + return cluster.kvRangeSetting(mergee.get().id).leader.equals(merger.get().leader); + }); + break; + } catch (Throwable t) { + merger.set(cluster.kvRangeSetting(cluster.allKVRangeIds().get(0))); + mergee.set(cluster.kvRangeSetting(cluster.allKVRangeIds().get(1))); + } + } + String s2 = cluster.addStore(); + KVRangeConfig mCfg = mergee.get(); + Set voters = new HashSet<>(mCfg.clusterConfig.getVotersList()); + Set learners = new HashSet<>(mCfg.clusterConfig.getLearnersList()); + learners.add(s2); + cluster.changeReplicaConfig(mCfg.leader, mCfg.ver, mCfg.id, voters, learners).toCompletableFuture().join(); + + try { + cluster.merge(merger.get().leader, merger.get().ver, merger.get().id, mergee.get().id) + .toCompletableFuture().join(); + } catch (CompletionException ex) { + assertTrue(ex.getCause() instanceof TryLater); + } + + cluster.awaitKVRangeStateOnAllStores(merger.get().id, State.StateType.Normal, 60); + KVRangeConfig after = cluster.kvRangeSetting(merger.get().id); + assertTrue(after.ver >= merger.get().ver + 1); + } + + @Test(groups = "integration") + public void mergeDoneRetryToMerged() { + KVRangeId gid = cluster.genesisKVRangeId(); + String leader = cluster.bootstrapStore(); + cluster.awaitKVRangeReady(leader, gid); + cluster.split(leader, cluster.kvRangeSetting(gid).ver, gid, + copyFromUtf8("m")).toCompletableFuture().join(); + await().until(() -> cluster.allKVRangeIds().size() == 2); + KVRangeConfig r0 = cluster.kvRangeSetting(cluster.allKVRangeIds().get(0)); + KVRangeConfig r1 = cluster.kvRangeSetting(cluster.allKVRangeIds().get(1)); + AtomicReference merger; + AtomicReference mergee; + if (r0.boundary.hasEndKey() + && BoundaryUtil.compare(r0.boundary.getEndKey(), r1.boundary.getStartKey()) <= 0) { + merger = new AtomicReference<>(r0); + mergee = new AtomicReference<>(r1); + } else { + merger = new AtomicReference<>(r1); + mergee = new AtomicReference<>(r0); + } + while (!merger.get().leader.equals(mergee.get().leader)) { + cluster.awaitKVRangeReady(mergee.get().leader, mergee.get().id); + try { + await().ignoreExceptions().atMost(Duration.ofSeconds(5)) + .until(() -> { + cluster.transferLeader(mergee.get().leader, mergee.get().ver, mergee.get().id, + merger.get().leader) + .toCompletableFuture().join(); + return cluster.kvRangeSetting(mergee.get().id).leader.equals(merger.get().leader); + }); + break; + } catch (Throwable t) { + merger.set(cluster.kvRangeSetting(cluster.allKVRangeIds().get(0))); + mergee.set(cluster.kvRangeSetting(cluster.allKVRangeIds().get(1))); + } + } + + try (AutoCloseable g = cluster.delayOnceIf(m -> + m.getPayload().hasMergeDoneReply() && m.getPayload().getRangeId().equals(merger.get().id), 200)) { + cluster.merge(merger.get().leader, merger.get().ver, merger.get().id, mergee.get().id) + .toCompletableFuture().join(); + } catch (Exception e) { + throw new RuntimeException(e); + } + + cluster.awaitKVRangeStateOnAllStores(mergee.get().id, State.StateType.Merged, 60); + } + + @Test(groups = "integration") + public void nonAdjacentMergeeCanceledMerger() { + KVRangeId gid = cluster.genesisKVRangeId(); + String leader = cluster.bootstrapStore(); + cluster.awaitKVRangeReady(leader, gid); + // first split at "m" + cluster.split(leader, cluster.kvRangeSetting(gid).ver, gid, + copyFromUtf8("m")).toCompletableFuture().join(); + await().until(() -> cluster.allKVRangeIds().size() == 2); + KVRangeConfig rA = cluster.kvRangeSetting(cluster.allKVRangeIds().get(0)); + KVRangeConfig rB = cluster.kvRangeSetting(cluster.allKVRangeIds().get(1)); + KVRangeConfig rightHalf = rA.boundary.hasStartKey() ? rA : rB; + cluster.split(rightHalf.leader, rightHalf.ver, rightHalf.id, + copyFromUtf8("t")).toCompletableFuture().join(); + await().until(() -> cluster.allKVRangeIds().size() == 3); + + KVRangeConfig c0 = cluster.kvRangeSetting(cluster.allKVRangeIds().get(0)); + KVRangeConfig c1 = cluster.kvRangeSetting(cluster.allKVRangeIds().get(1)); + KVRangeConfig c2 = cluster.kvRangeSetting(cluster.allKVRangeIds().get(2)); + KVRangeConfig left = c0.boundary.hasStartKey() ? (c1.boundary.hasStartKey() ? c2 : c1) : c0; + KVRangeConfig right; + if (!c0.boundary.hasEndKey()) { + right = c0; + } else if (!c1.boundary.hasEndKey()) { + right = c1; + } else { + right = c2; + } + + AtomicReference merger = new AtomicReference<>(left); + AtomicReference mergee = new AtomicReference<>(right); + cluster.awaitKVRangeReady(merger.get().leader, merger.get().id); + cluster.awaitKVRangeReady(mergee.get().leader, mergee.get().id); + + KVRangeConfig before = cluster.kvRangeSetting(merger.get().id); + try { + cluster.merge(merger.get().leader, before.ver, merger.get().id, mergee.get().id) + .toCompletableFuture().join(); + } catch (CompletionException ex) { + assertTrue(ex.getCause() instanceof TryLater); + } + + cluster.awaitKVRangeStateOnAllStores(merger.get().id, State.StateType.Normal, 60); + KVRangeConfig after = cluster.kvRangeSetting(merger.get().id); + assertTrue(after.ver >= before.ver + 1); + cluster.awaitKVRangeStateOnAllStores(mergee.get().id, State.StateType.Normal, 60); + } + + @Test(groups = "integration") + public void nonAdjacentMergerSelfCancel() { + KVRangeId gid = cluster.genesisKVRangeId(); + String leader = cluster.bootstrapStore(); + cluster.awaitKVRangeReady(leader, gid); + cluster.split(leader, cluster.kvRangeSetting(gid).ver, gid, + copyFromUtf8("m")).toCompletableFuture().join(); + await().until(() -> cluster.allKVRangeIds().size() == 2); + KVRangeConfig rA = cluster.kvRangeSetting(cluster.allKVRangeIds().get(0)); + KVRangeConfig rB = cluster.kvRangeSetting(cluster.allKVRangeIds().get(1)); + KVRangeConfig rightHalf = rA.boundary.hasStartKey() ? rA : rB; + cluster.split(rightHalf.leader, rightHalf.ver, rightHalf.id, + copyFromUtf8("t")).toCompletableFuture().join(); + await().until(() -> cluster.allKVRangeIds().size() == 3); + + KVRangeConfig c0 = cluster.kvRangeSetting(cluster.allKVRangeIds().get(0)); + KVRangeConfig c1 = cluster.kvRangeSetting(cluster.allKVRangeIds().get(1)); + KVRangeConfig c2 = cluster.kvRangeSetting(cluster.allKVRangeIds().get(2)); + KVRangeConfig left = c0.boundary.hasStartKey() ? (c1.boundary.hasStartKey() ? c2 : c1) : c0; + KVRangeConfig right; + if (!c0.boundary.hasEndKey()) { + right = c0; + } else if (!c1.boundary.hasEndKey()) { + right = c1; + } else { + right = c2; + } + + cluster.awaitAllKVRangeReady(left.id, left.ver, 40); + cluster.awaitAllKVRangeReady(right.id, right.ver, 40); + + AtomicReference merger = new AtomicReference<>(left); + AtomicReference mergee = new AtomicReference<>(right); + KVRangeConfig before = cluster.kvRangeSetting(merger.get().id); + + // Drop all CancelMergingRequest to merger so it must self-timeout + try (AutoCloseable dr = cluster.dropIf(m -> m.getPayload().hasCancelMergingRequest() + && m.getPayload().getRangeId().equals(merger.get().id))) { + try { + cluster.merge(merger.get().leader, before.ver, merger.get().id, mergee.get().id) + .toCompletableFuture().join(); + } catch (CompletionException ex) { + assertTrue(ex.getCause() instanceof TryLater); + } + } catch (Exception e) { + throw new RuntimeException(e); + } + + cluster.awaitKVRangeStateOnAllStores(merger.get().id, State.StateType.Normal, 60); + KVRangeConfig after = cluster.kvRangeSetting(merger.get().id); + assertTrue(after.ver >= before.ver + 1); + cluster.awaitKVRangeStateOnAllStores(mergee.get().id, State.StateType.Normal, 60); + } + + @Test(groups = "integration") + @SneakyThrows + public void mispreparedBothCanceled() { + KVRangeId gid = cluster.genesisKVRangeId(); + String leader = cluster.bootstrapStore(); + cluster.awaitKVRangeReady(leader, gid); + cluster.split(leader, cluster.kvRangeSetting(gid).ver, gid, + copyFromUtf8("m")).toCompletableFuture().join(); + await().until(() -> cluster.allKVRangeIds().size() == 2); + KVRangeConfig r0 = cluster.kvRangeSetting(cluster.allKVRangeIds().get(0)); + KVRangeConfig r1 = cluster.kvRangeSetting(cluster.allKVRangeIds().get(1)); + AtomicReference merger; + AtomicReference mergee; + if (r0.boundary.hasEndKey() && BoundaryUtil.compare(r0.boundary.getEndKey(), r1.boundary.getStartKey()) <= 0) { + merger = new AtomicReference<>(r0); + mergee = new AtomicReference<>(r1); + } else { + merger = new AtomicReference<>(r1); + mergee = new AtomicReference<>(r0); + } + + cluster.awaitKVRangeReady(merger.get().leader, merger.get().id); + cluster.awaitKVRangeReady(mergee.get().leader, mergee.get().id); + + KVRangeConfig before = cluster.kvRangeSetting(merger.get().id); + + AutoCloseable dropMHRequest = cluster.dropIf(m -> m.getPayload().hasMergeHelpRequest()); + // One-way isolation: merger cannot receive PrepareMergeToReply and MergeRequest + try (AutoCloseable dropPMTReply = cluster.dropIf(m -> m.getPayload().hasPrepareMergeToReply() + && m.getPayload().getRangeId().equals(merger.get().id))) { + KVRangeStoreTestCluster.HoldHandle holdHandle = cluster.holdIf(m -> m.getPayload().hasMergeRequest() + && m.getPayload().getRangeId().equals(merger.get().id)); + try { + cluster.merge(merger.get().leader, before.ver, merger.get().id, mergee.get().id) + .toCompletableFuture().join(); + } catch (CompletionException ex) { + assertTrue(ex.getCause() instanceof TryLater); + } + // Merger self-timeouts and cancels + cluster.awaitKVRangeStateOnAllStores(merger.get().id, State.StateType.Normal, 60); + KVRangeConfig after = cluster.kvRangeSetting(merger.get().id); + assertTrue(after.ver >= before.ver + 1); + assertEquals(after.boundary, before.boundary); + } + + // Mergee should have at least one replica in WaitingForMerge (state split) + await().atMost(Duration.ofSeconds(30)).until(() -> { + for (String sid : cluster.allStoreIds()) { + KVRangeDescriptor rd = cluster.getKVRange(sid, mergee.get().id); + if (rd != null && rd.getState() == State.StateType.WaitingForMerge) { + return true; + } + } + return false; + }); + dropMHRequest.close(); + await().atMost(Duration.ofSeconds(30)).until(() -> { + for (String sid : cluster.allStoreIds()) { + KVRangeDescriptor rd = cluster.getKVRange(sid, mergee.get().id); + if (rd != null && rd.getState() == State.StateType.Normal + && rd.getBoundary().equals(mergee.get().boundary)) { + return true; + } + } + return false; + }); + } + + @Test(groups = "integration") + public void mispreparedButMerged() { + KVRangeId gid = cluster.genesisKVRangeId(); + String leader = cluster.bootstrapStore(); + cluster.awaitKVRangeReady(leader, gid); + cluster.split(leader, cluster.kvRangeSetting(gid).ver, gid, + copyFromUtf8("m")).toCompletableFuture().join(); + await().until(() -> cluster.allKVRangeIds().size() == 2); + KVRangeConfig r0 = cluster.kvRangeSetting(cluster.allKVRangeIds().get(0)); + KVRangeConfig r1 = cluster.kvRangeSetting(cluster.allKVRangeIds().get(1)); + AtomicReference merger; + AtomicReference mergee; + if (r0.boundary.hasEndKey() && BoundaryUtil.compare(r0.boundary.getEndKey(), r1.boundary.getStartKey()) <= 0) { + merger = new AtomicReference<>(r0); + mergee = new AtomicReference<>(r1); + } else { + merger = new AtomicReference<>(r1); + mergee = new AtomicReference<>(r0); + } + + cluster.awaitKVRangeReady(merger.get().leader, merger.get().id); + cluster.awaitKVRangeReady(mergee.get().leader, mergee.get().id); + + KVRangeConfig mBefore = cluster.kvRangeSetting(merger.get().id); + KVRangeConfig meBefore = cluster.kvRangeSetting(mergee.get().id); + + // drop PrepareMergeToReply and MergeHelpRequest to try best to let merger receive MergeRequest first + AutoCloseable dropMHRequest = cluster.dropIf(m -> m.getPayload().hasMergeHelpRequest()); + try (AutoCloseable dropPMTReply = cluster.dropIf(m -> m.getPayload().hasPrepareMergeToReply() + && m.getPayload().getRangeId().equals(merger.get().id))) { + cluster.merge(merger.get().leader, mBefore.ver, merger.get().id, mergee.get().id) + .toCompletableFuture().join(); + } catch (Exception e) { + throw new RuntimeException(e); + } + + cluster.awaitKVRangeStateOnAllStores(merger.get().id, State.StateType.Normal, 120); + KVRangeConfig mAfter = cluster.kvRangeSetting(merger.get().id); + assertTrue(mAfter.ver >= mBefore.ver + 1); + assertEquals(mAfter.boundary, BoundaryUtil.combine(mBefore.boundary, meBefore.boundary)); + + cluster.awaitKVRangeStateOnAllStores(mergee.get().id, State.StateType.Merged, 120); + KVRangeConfig meAfter = cluster.kvRangeSetting(mergee.get().id); + assertTrue(meAfter.ver >= meBefore.ver + 1); + } + + @Test(groups = "integration") + @SneakyThrows + public void mergeDoneLostAndMergeeTimeout() { + KVRangeId gid = cluster.genesisKVRangeId(); + String leader = cluster.bootstrapStore(); + cluster.awaitKVRangeReady(leader, gid); + cluster.split(leader, cluster.kvRangeSetting(gid).ver, gid, + copyFromUtf8("m")).toCompletableFuture().join(); + await().until(() -> cluster.allKVRangeIds().size() == 2); + KVRangeConfig r0 = cluster.kvRangeSetting(cluster.allKVRangeIds().get(0)); + KVRangeConfig r1 = cluster.kvRangeSetting(cluster.allKVRangeIds().get(1)); + AtomicReference merger; + AtomicReference mergee; + if (r0.boundary.hasEndKey() + && BoundaryUtil.compare(r0.boundary.getEndKey(), r1.boundary.getStartKey()) <= 0) { + merger = new AtomicReference<>(r0); + mergee = new AtomicReference<>(r1); + } else { + merger = new AtomicReference<>(r1); + mergee = new AtomicReference<>(r0); + } + + // Drop all MergeDoneRequest to mergee so it stays in WaitingForMerge + try (AutoCloseable dropMergeDone = cluster.dropIf(m -> m.getPayload().hasMergeDoneRequest() + && m.getPayload().getRangeId().equals(mergee.get().id))) { + + // Trigger merge + KVRangeConfig mBefore = cluster.kvRangeSetting(merger.get().id); + KVRangeConfig meBefore = cluster.kvRangeSetting(mergee.get().id); + cluster.merge(merger.get().leader, mBefore.ver, mBefore.id, meBefore.id) + .toCompletableFuture().join(); + + // Wait for merger to return Normal with combined boundary + await().atMost(Duration.ofSeconds(120)).until(() -> { + KVRangeConfig mNow = cluster.kvRangeSetting(merger.get().id); + return mNow.boundary.equals(BoundaryUtil.combine(mBefore.boundary, meBefore.boundary)) + && mNow.ver >= mBefore.ver + 1; + }); + + await().atMost(Duration.ofSeconds(30)).until(() -> { + for (String sid : cluster.allStoreIds()) { + KVRangeDescriptor rd = cluster.getKVRange(sid, mergee.get().id); + if (rd != null && rd.getState() == State.StateType.WaitingForMerge) { + return true; + } + } + return false; + }); + } + + // Wait for mergee to return Merged state + await().atMost(Duration.ofSeconds(30)).until(() -> { + for (String sid : cluster.allStoreIds()) { + KVRangeDescriptor rd = cluster.getKVRange(sid, mergee.get().id); + if (rd != null && rd.getState() == State.StateType.Merged && rd.getBoundary().equals(NULL_BOUNDARY)) { + return true; + } + } + return false; + }); + } + + @Cluster(initLearners = 1) + @Test(groups = "integration") + public void votersOnly() { + KVRangeId gid = cluster.genesisKVRangeId(); + String leader = cluster.bootstrapStore(); + cluster.awaitKVRangeReady(leader, gid); + cluster.split(leader, cluster.kvRangeSetting(gid).ver, gid, + copyFromUtf8("m")).toCompletableFuture().join(); + await().until(() -> cluster.allKVRangeIds().size() == 2); + KVRangeConfig r0 = cluster.kvRangeSetting(cluster.allKVRangeIds().get(0)); + KVRangeConfig r1 = cluster.kvRangeSetting(cluster.allKVRangeIds().get(1)); + AtomicReference merger; + AtomicReference mergee; + if (r0.boundary.hasEndKey() + && BoundaryUtil.compare(r0.boundary.getEndKey(), r1.boundary.getStartKey()) <= 0) { + merger = new AtomicReference<>(r0); + mergee = new AtomicReference<>(r1); + } else { + merger = new AtomicReference<>(r1); + mergee = new AtomicReference<>(r0); + } + while (!merger.get().leader.equals(mergee.get().leader)) { + cluster.awaitKVRangeReady(mergee.get().leader, mergee.get().id); + try { + await().ignoreExceptions().atMost(Duration.ofSeconds(5)) + .until(() -> { + cluster.transferLeader(mergee.get().leader, mergee.get().ver, mergee.get().id, + merger.get().leader) + .toCompletableFuture().join(); + return cluster.kvRangeSetting(mergee.get().id).leader.equals(merger.get().leader); + }); + break; + } catch (Throwable t) { + merger.set(cluster.kvRangeSetting(cluster.allKVRangeIds().get(0))); + mergee.set(cluster.kvRangeSetting(cluster.allKVRangeIds().get(1))); + } + } + + cluster.merge(merger.get().leader, merger.get().ver, merger.get().id, mergee.get().id) + .toCompletableFuture().join(); + + cluster.awaitKVRangeStateOnAllStores(mergee.get().id, State.StateType.Merged, 60); + } + + @Test(groups = "integration") + public void followersTimeoutThenResetBySnapshot() { + KVRangeId gid = cluster.genesisKVRangeId(); + String leader = cluster.bootstrapStore(); + cluster.awaitKVRangeReady(leader, gid); + cluster.split(leader, cluster.kvRangeSetting(gid).ver, gid, + copyFromUtf8("m")).toCompletableFuture().join(); + await().until(() -> cluster.allKVRangeIds().size() == 2); + KVRangeConfig r0 = cluster.kvRangeSetting(cluster.allKVRangeIds().get(0)); + KVRangeConfig r1 = cluster.kvRangeSetting(cluster.allKVRangeIds().get(1)); + AtomicReference merger; + AtomicReference mergee; + if (r0.boundary.hasEndKey() + && BoundaryUtil.compare(r0.boundary.getEndKey(), r1.boundary.getStartKey()) <= 0) { + merger = new AtomicReference<>(r0); + mergee = new AtomicReference<>(r1); + } else { + merger = new AtomicReference<>(r1); + mergee = new AtomicReference<>(r0); + } + + while (!merger.get().leader.equals(mergee.get().leader)) { + cluster.awaitKVRangeReady(mergee.get().leader, mergee.get().id); + try { + await().ignoreExceptions().atMost(Duration.ofSeconds(5)) + .until(() -> { + cluster.transferLeader(mergee.get().leader, mergee.get().ver, mergee.get().id, + merger.get().leader) + .toCompletableFuture().join(); + return cluster.kvRangeSetting(mergee.get().id).leader.equals(merger.get().leader); + }); + break; + } catch (Throwable t) { + merger.set(cluster.kvRangeSetting(cluster.allKVRangeIds().get(0))); + mergee.set(cluster.kvRangeSetting(cluster.allKVRangeIds().get(1))); + } + } + + KVRangeConfig mBefore = cluster.kvRangeSetting(merger.get().id); + KVRangeConfig meBefore = cluster.kvRangeSetting(mergee.get().id); + + try (AutoCloseable dropDMRFromFollowers = cluster.dropIf(m -> + m.getPayload().hasDataMergeRequest() + && m.getPayload().getRangeId().equals(mergee.get().id) + && !m.getFrom().equals(merger.get().leader))) { + cluster.merge(merger.get().leader, mBefore.ver, mBefore.id, meBefore.id) + .toCompletableFuture().join(); + } catch (Exception e) { + throw new RuntimeException(e); + } + + cluster.awaitKVRangeStateOnAllStores(merger.get().id, State.StateType.Normal, 120); + + assertEquals(BoundaryUtil.combine(mBefore.boundary, meBefore.boundary), + cluster.kvRangeSetting(merger.get().id).boundary); + Boundary expected = BoundaryUtil.combine(mBefore.boundary, meBefore.boundary); + await().untilAsserted(() -> { + for (String sid : cluster.allStoreIds()) { + KVRangeDescriptor rd = cluster.getKVRange(sid, merger.get().id); + if (rd != null) { + assertEquals(rd.getState(), State.StateType.Normal); + assertEquals(rd.getBoundary(), expected); + } + } + }); + + cluster.awaitKVRangeStateOnAllStores(mergee.get().id, State.StateType.Merged, 60); + await().untilAsserted(() -> { + for (String sid : cluster.allStoreIds()) { + KVRangeDescriptor rd = cluster.getKVRange(sid, mergee.get().id); + if (rd != null && rd.getState() == State.StateType.Merged) { + assertEquals(rd.getBoundary(), NULL_BOUNDARY); + } + } + }); + } + + @Test(groups = "integration") + public void oneFollowerCompletesLeaderAndOtherReset() { + KVRangeId gid = cluster.genesisKVRangeId(); + String leader = cluster.bootstrapStore(); + cluster.awaitKVRangeReady(leader, gid); + cluster.split(leader, cluster.kvRangeSetting(gid).ver, gid, copyFromUtf8("m")).toCompletableFuture().join(); + await().until(() -> cluster.allKVRangeIds().size() == 2); + KVRangeConfig r0 = cluster.kvRangeSetting(cluster.allKVRangeIds().get(0)); + KVRangeConfig r1 = cluster.kvRangeSetting(cluster.allKVRangeIds().get(1)); + AtomicReference merger; + AtomicReference mergee; + if (r0.boundary.hasEndKey() + && BoundaryUtil.compare(r0.boundary.getEndKey(), r1.boundary.getStartKey()) <= 0) { + merger = new AtomicReference<>(r0); + mergee = new AtomicReference<>(r1); + } else { + merger = new AtomicReference<>(r1); + mergee = new AtomicReference<>(r0); + } + + while (!merger.get().leader.equals(mergee.get().leader)) { + cluster.awaitKVRangeReady(mergee.get().leader, mergee.get().id); + try { + await().ignoreExceptions().atMost(Duration.ofSeconds(5)) + .until(() -> { + cluster.transferLeader(mergee.get().leader, mergee.get().ver, mergee.get().id, + merger.get().leader) + .toCompletableFuture().join(); + return cluster.kvRangeSetting(mergee.get().id).leader.equals(merger.get().leader); + }); + break; + } catch (Throwable t) { + merger.set(cluster.kvRangeSetting(cluster.allKVRangeIds().get(0))); + mergee.set(cluster.kvRangeSetting(cluster.allKVRangeIds().get(1))); + } + } + + KVRangeConfig mBefore = cluster.kvRangeSetting(merger.get().id); + KVRangeConfig meBefore = cluster.kvRangeSetting(mergee.get().id); + + String mergerLeader = mBefore.leader; + String allowedFollower = followStores(mBefore).iterator().next(); + + try (AutoCloseable dropAllButOne = cluster.dropIf(m -> + m.getPayload().hasDataMergeRequest() + && m.getPayload().getRangeId().equals(mergee.get().id) + && !m.getFrom().equals(allowedFollower))) { + try { + cluster.merge(mergerLeader, mBefore.ver, mBefore.id, meBefore.id) + .toCompletableFuture().join(); + } catch (CompletionException ex) { + assertTrue(ex.getCause() instanceof TryLater); + } + } catch (Exception e) { + throw new RuntimeException(e); + } + + cluster.awaitKVRangeStateOnAllStores(merger.get().id, State.StateType.Normal, 120); + + Boundary expected = BoundaryUtil.combine(mBefore.boundary, meBefore.boundary); + assertEquals(expected, cluster.kvRangeSetting(merger.get().id).boundary); + for (String sid : cluster.allStoreIds()) { + KVRangeDescriptor rd = cluster.getKVRange(sid, merger.get().id); + if (rd != null) { + assertEquals(rd.getState(), State.StateType.Normal); + assertEquals(rd.getBoundary(), expected); + } + } + + cluster.awaitKVRangeStateOnAllStores(mergee.get().id, State.StateType.Merged, 60); + for (String sid : cluster.allStoreIds()) { + KVRangeDescriptor rd = cluster.getKVRange(sid, mergee.get().id); + if (rd != null && rd.getState() == State.StateType.Merged) { + assertEquals(rd.getBoundary(), NULL_BOUNDARY); + } + } + } + + @SneakyThrows + @Cluster(mergeTimeoutSec = 1) + @Test(groups = "integration") + public void tryMigrateFailedThenCancelSeesMerged() { + KVRangeId gid = cluster.genesisKVRangeId(); + String leader = cluster.bootstrapStore(); + cluster.awaitKVRangeReady(leader, gid); + cluster.split(leader, cluster.kvRangeSetting(gid).ver, gid, copyFromUtf8("m")).toCompletableFuture().join(); + await().until(() -> cluster.allKVRangeIds().size() == 2); + + KVRangeConfig r0 = cluster.kvRangeSetting(cluster.allKVRangeIds().get(0)); + KVRangeConfig r1 = cluster.kvRangeSetting(cluster.allKVRangeIds().get(1)); + AtomicReference merger; + AtomicReference mergee; + if (r0.boundary.hasEndKey() && BoundaryUtil.compare(r0.boundary.getEndKey(), r1.boundary.getStartKey()) <= 0) { + merger = new AtomicReference<>(r0); + mergee = new AtomicReference<>(r1); + } else { + merger = new AtomicReference<>(r1); + mergee = new AtomicReference<>(r0); + } + + KVRangeConfig mBefore = cluster.kvRangeSetting(merger.get().id); + KVRangeConfig meBefore = cluster.kvRangeSetting(mergee.get().id); + String mergerLeader = mBefore.leader; + java.util.Iterator it = followStores(mBefore).iterator(); + String f1 = it.next(); + String f2 = it.next(); + + try (AutoCloseable dropSnapshotDataToFollowers = cluster.dropIf(m -> + m.getPayload().hasSaveSnapshotDataRequest() + && m.getPayload().getRangeId().equals(merger.get().id) + && (m.getPayload().getHostStoreId().equals(f1) || m.getPayload().getHostStoreId().equals(f2)))) { + + cluster.merge(mergerLeader, mBefore.ver, mBefore.id, meBefore.id).toCompletableFuture().join(); + + await().atMost(Duration.ofSeconds(30)).until(() -> { + KVRangeDescriptor d1 = cluster.getKVRange(f1, merger.get().id); + KVRangeDescriptor d2 = cluster.getKVRange(f2, merger.get().id); + return d1 != null && d2 != null + && d1.getState() == State.StateType.PreparedMerging + && d2.getState() == State.StateType.PreparedMerging; + }); + + KVRangeStoreTestCluster.HoldHandle holdWalToFollowers = cluster.holdIf(m -> + m.getPayload().hasWalRaftMessages() + && m.getPayload().getRangeId().equals(merger.get().id) + && (m.getPayload().getHostStoreId().equals(f1) || m.getPayload().getHostStoreId().equals(f2))); + Thread.sleep(7000); // snapshotSyncIdleTimeoutSec=5 + // release wal, so leader could commit MergeDone + holdWalToFollowers.close(); + } + + cluster.awaitKVRangeStateOnAllStores(merger.get().id, State.StateType.Normal, 120); + Boundary expected = BoundaryUtil.combine(mBefore.boundary, meBefore.boundary); + assertEquals(expected, cluster.kvRangeSetting(merger.get().id).boundary); + for (String sid : cluster.allStoreIds()) { + KVRangeDescriptor rd = cluster.getKVRange(sid, merger.get().id); + if (rd != null) { + assertEquals(rd.getState(), State.StateType.Normal); + assertEquals(rd.getBoundary(), expected); + } + } + cluster.awaitKVRangeStateOnAllStores(mergee.get().id, State.StateType.Merged, 60); + for (String sid : cluster.allStoreIds()) { + KVRangeDescriptor rd = cluster.getKVRange(sid, mergee.get().id); + if (rd != null && rd.getState() == State.StateType.Merged) { + assertEquals(rd.getBoundary(), NULL_BOUNDARY); + } + } + } +} diff --git a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/KVRangeStoreClusterMergeTest.java b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/KVRangeStoreClusterMergeTest.java index 5cc2982b7..978d5e9f6 100644 --- a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/KVRangeStoreClusterMergeTest.java +++ b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/KVRangeStoreClusterMergeTest.java @@ -14,30 +14,32 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.store; +import static com.google.protobuf.ByteString.copyFromUtf8; +import static java.util.Collections.emptySet; import static org.apache.bifromq.basekv.proto.State.StateType.Merged; import static org.apache.bifromq.basekv.utils.BoundaryUtil.FULL_BOUNDARY; +import static org.apache.bifromq.basekv.utils.BoundaryUtil.combine; import static org.apache.bifromq.basekv.utils.BoundaryUtil.compare; -import static com.google.protobuf.ByteString.copyFromUtf8; -import static java.util.Collections.emptySet; import static org.awaitility.Awaitility.await; -import org.apache.bifromq.basekv.annotation.Cluster; -import org.apache.bifromq.basekv.proto.KVRangeDescriptor; -import org.apache.bifromq.basekv.proto.KVRangeId; -import org.apache.bifromq.basekv.raft.proto.ClusterConfig; -import org.apache.bifromq.basekv.utils.KVRangeIdUtil; import com.google.common.collect.Sets; import java.time.Duration; +import java.util.HashSet; import java.util.Objects; import java.util.Set; import java.util.concurrent.atomic.AtomicReference; import java.util.stream.Collectors; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.basekv.annotation.Cluster; +import org.apache.bifromq.basekv.proto.KVRangeDescriptor; +import org.apache.bifromq.basekv.proto.KVRangeId; +import org.apache.bifromq.basekv.raft.proto.ClusterConfig; +import org.apache.bifromq.basekv.utils.KVRangeIdUtil; import org.testng.annotations.Test; @Slf4j @@ -47,7 +49,6 @@ public class KVRangeStoreClusterMergeTest extends KVRangeStoreClusterTestTemplat public void mergeSingleNodeCluster() { KVRangeId genesisKVRangeId = cluster.genesisKVRangeId(); KVRangeConfig genesisKVRangeSettings = cluster.awaitAllKVRangeReady(genesisKVRangeId, 0, 40); - log.info("Splitting range"); cluster.split(genesisKVRangeSettings.leader, genesisKVRangeSettings.ver, genesisKVRangeId, @@ -87,10 +88,6 @@ public void mergeSingleNodeCluster() { mergee.set(cluster.kvRangeSetting(cluster.allKVRangeIds().get(1))); } } - log.info("Merge KVRange[{}] to KVRange[{}] from leader store[{}]", - KVRangeIdUtil.toString(mergee.get().id), - KVRangeIdUtil.toString(merger.get().id), - merger.get().leader); cluster.awaitKVRangeReady(merger.get().leader, merger.get().id); cluster.awaitKVRangeReady(mergee.get().leader, mergee.get().id); cluster.merge(merger.get().leader, @@ -98,25 +95,11 @@ public void mergeSingleNodeCluster() { merger.get().id, mergee.get().id) .toCompletableFuture().join(); - - KVRangeConfig mergerSetting = cluster.awaitAllKVRangeReady(merger.get().id, 3, 40); - log.info("Merged settings {}", mergerSetting); - await().atMost(Duration.ofSeconds(400)).until(() -> { - for (String storeId : cluster.allStoreIds()) { - KVRangeDescriptor mergeeDesc = cluster.getKVRange(storeId, mergee.get().id); - if (mergeeDesc.getState() != Merged) { - return false; - } - } - return true; - }); - log.info("Merge done, and quit"); KVRangeConfig mergeeSetting = cluster.awaitAllKVRangeReady(mergee.get().id, 3, 40); String lastStore = mergeeSetting.leader; cluster.changeReplicaConfig(lastStore, mergeeSetting.ver, mergee.get().id, emptySet(), emptySet()) .toCompletableFuture().join(); await().until(() -> !cluster.isHosting(lastStore, mergee.get().id)); - log.info("Test done"); } @Test(groups = "integration") @@ -133,7 +116,6 @@ public void mergeWithLearner() { private void merge() { KVRangeId genesisKVRangeId = cluster.genesisKVRangeId(); KVRangeConfig genesisKVRangeSettings = cluster.awaitAllKVRangeReady(genesisKVRangeId, 1, 40); - log.info("Splitting range"); cluster.split(genesisKVRangeSettings.leader, genesisKVRangeSettings.ver, genesisKVRangeId, @@ -173,10 +155,6 @@ && compare(range0.boundary.getEndKey(), range1.boundary.getStartKey()) <= 0) { mergee.set(cluster.kvRangeSetting(cluster.allKVRangeIds().get(1))); } } - log.info("Merge KVRange[{}] to KVRange[{}] from leader store[{}]", - KVRangeIdUtil.toString(mergee.get().id), - KVRangeIdUtil.toString(merger.get().id), - merger.get().leader); cluster.awaitKVRangeReady(merger.get().leader, merger.get().id); cluster.awaitKVRangeReady(mergee.get().leader, mergee.get().id); cluster.merge(merger.get().leader, @@ -186,11 +164,11 @@ && compare(range0.boundary.getEndKey(), range1.boundary.getStartKey()) <= 0) { .toCompletableFuture().join(); KVRangeConfig mergerSetting = cluster.awaitAllKVRangeReady(merger.get().id, 6, 40); - log.info("Merged settings {}", mergerSetting); await().atMost(Duration.ofSeconds(400)).until(() -> { for (String storeId : cluster.allStoreIds()) { KVRangeDescriptor mergeeDesc = cluster.getKVRange(storeId, mergee.get().id); - if (mergeeDesc.getState() != Merged) { + // tolerate null as already quit; non-null must be Merged + if (mergeeDesc != null && mergeeDesc.getState() != Merged) { return false; } } @@ -216,14 +194,12 @@ && compare(range0.boundary.getEndKey(), range1.boundary.getStartKey()) <= 0) { cluster.changeReplicaConfig(lastStore, mergeeSetting.ver, mergee.get().id, emptySet(), emptySet()) .toCompletableFuture().join(); await().until(() -> !cluster.isHosting(lastStore, mergee.get().id)); - log.info("Test done"); } @Test(groups = "integration") public void mergeUnderOnlyQuorumAvailable() { KVRangeId genesisKVRangeId = cluster.genesisKVRangeId(); KVRangeConfig genesisKVRangeSettings = cluster.awaitAllKVRangeReady(genesisKVRangeId, 1, 40); - log.info("Splitting range"); cluster.split(genesisKVRangeSettings.leader, genesisKVRangeSettings.ver, genesisKVRangeId, @@ -271,14 +247,8 @@ && compare(range0.boundary.getEndKey(), range1.boundary.getStartKey()) <= 0) { log.info("Shutdown one store {}", followerStoreId); cluster.shutdownStore(followerStoreId); - log.info("Merge KVRange {} to {} from leader store {}", - KVRangeIdUtil.toString(mergee.get().id), - KVRangeIdUtil.toString(merger.get().id), - merger.get().leader); - cluster.merge(merger.get().leader, merger.get().ver, merger.get().id, mergee.get().id) .toCompletableFuture().join(); - KVRangeConfig mergedSettings = cluster.awaitAllKVRangeReady(merger.get().id, 3, 40); log.info("Merged settings {}", mergedSettings); await().atMost(Duration.ofSeconds(40)) @@ -286,13 +256,118 @@ && compare(range0.boundary.getEndKey(), range1.boundary.getStartKey()) <= 0) { log.info("Merge done"); } + @Test(groups = "integration") + public void crossNetworkDataMigration() { + KVRangeId genesisKVRangeId = cluster.genesisKVRangeId(); + KVRangeConfig genesisKVRangeSettings = cluster.awaitAllKVRangeReady(genesisKVRangeId, 1, 40); + // Split into two adjacent ranges + cluster.split(genesisKVRangeSettings.leader, + genesisKVRangeSettings.ver, + genesisKVRangeId, + copyFromUtf8("m")) + .toCompletableFuture().join(); + await().atMost(Duration.ofSeconds(100)).until(() -> cluster.allKVRangeIds().size() == 2); + + KVRangeConfig range0 = cluster.kvRangeSetting(cluster.allKVRangeIds().get(0)); + KVRangeConfig range1 = cluster.kvRangeSetting(cluster.allKVRangeIds().get(1)); + AtomicReference merger; + AtomicReference mergee; + if (compare(range0.boundary, range1.boundary) <= 0) { + merger = new AtomicReference<>(range0); // left + mergee = new AtomicReference<>(range1); // right + } else { + merger = new AtomicReference<>(range1); + mergee = new AtomicReference<>(range0); + } + + // Ensure cross-network migration by moving mergee's voters to a disjoint set of stores + Set mergerVoters = Set.copyOf(merger.get().clusterConfig.getVotersList()); + Set oldMergeeVoters = Set.copyOf(mergee.get().clusterConfig.getVotersList()); + int voterCount = mergerVoters.size(); + Set newMergeeVoters = Sets.newHashSet(); + while (newMergeeVoters.size() < voterCount) { + String s = cluster.addStore(); + if (!mergerVoters.contains(s)) { + newMergeeVoters.add(s); + } + } + KVRangeConfig meCfg = mergee.get(); + cluster.changeReplicaConfig(meCfg.leader, meCfg.ver, meCfg.id, newMergeeVoters, emptySet()) + .toCompletableFuture().join(); + // wait until all replicas in old stores are removed + await().ignoreExceptions().forever().until( + () -> oldMergeeVoters.stream().map(store -> cluster.getKVRange(store, meCfg.id)).allMatch(Objects::isNull)); + + // wait until all stores reflect the new voters config for mergee + await().ignoreExceptions().atMost(Duration.ofSeconds(60)).until(() -> { + for (String store : cluster.allStoreIds()) { + KVRangeDescriptor rd = cluster.getKVRange(store, meCfg.id); + if (rd == null) { + continue; + } + Set votersOnStore = new HashSet<>(rd.getConfig().getVotersList()); + if (!votersOnStore.containsAll(newMergeeVoters)) { + return false; + } + } + return true; + }); + mergee.set(cluster.kvRangeSetting(meCfg.id)); + + // Ensure mergee elects a leader among new voters before writing + await().ignoreExceptions().atMost(Duration.ofSeconds(30)) + .until(() -> newMergeeVoters.contains(cluster.kvRangeSetting(mergee.get().id).leader)); + mergee.set(cluster.kvRangeSetting(mergee.get().id)); + cluster.awaitKVRangeReady(mergee.get().leader, mergee.get().id); + + // Put some keys in mergee range (>= "m") + String[] keys = {"n1", "n2", "z"}; + for (String k : keys) { + cluster.put(mergee.get().leader, mergee.get().id, copyFromUtf8(k), copyFromUtf8("v" + k)); + } + + // Deterministically ensure MergeDone reaches the migrated mergee leader: + // Hold MergeDoneRequest to mergee until merger finishes combining boundaries, then release. + try (KVRangeStoreTestCluster.HoldHandle holdMergeDone = + cluster.holdIf(m -> m.getPayload().hasMergeDoneRequest() + && m.getPayload().getRangeId().equals(mergee.get().id))) { + // Trigger merge + KVRangeConfig mBefore = cluster.kvRangeSetting(merger.get().id); + KVRangeConfig meBefore = cluster.kvRangeSetting(mergee.get().id); + // Use explicit mergee voters to avoid stale config being captured in MergeRequest + cluster.mergeWithMergeeVoters(merger.get().leader, mBefore.ver, mBefore.id, meBefore.id, newMergeeVoters) + .toCompletableFuture().join(); + + // Wait for merger to return to Normal with combined boundary + await().atMost(Duration.ofSeconds(120)).until(() -> { + KVRangeConfig mNow = cluster.kvRangeSetting(merger.get().id); + return Objects.equals(mNow.boundary, combine(mBefore.boundary, meBefore.boundary)); + }); + + // Ensure mergee has a leader among new voters, then release MergeDone + await().ignoreExceptions().atMost(Duration.ofSeconds(30)) + .until(() -> newMergeeVoters.contains(cluster.kvRangeSetting(mergee.get().id).leader)); + holdMergeDone.releaseAll(); + } + + // Mergee becomes Merged across all stores + await().atMost(Duration.ofSeconds(120)).until(() -> + cluster.allStoreIds().stream().map(s -> cluster.getKVRange(s, mergee.get().id)) + .filter(Objects::nonNull) + .allMatch(desc -> desc.getState() == Merged)); + + // Keys from mergee should be readable via merger + for (String k : keys) { + var val = cluster.get(merger.get().leader, merger.get().id, copyFromUtf8(k)); + assert val.isPresent(); + } + } @Cluster(installSnapshotTimeoutTick = 10) @Test(groups = "integration") public void mergeWithOneMemberIsolated() { KVRangeId genesisKVRangeId = cluster.genesisKVRangeId(); KVRangeConfig genesisKVRangeSettings = cluster.awaitAllKVRangeReady(genesisKVRangeId, 1, 40); - log.info("Splitting range"); cluster.split(genesisKVRangeSettings.leader, genesisKVRangeSettings.ver, genesisKVRangeId, @@ -358,7 +433,8 @@ public void mergeWithOneMemberIsolated() { await().atMost(Duration.ofSeconds(400)).until(() -> { for (String storeId : cluster.allStoreIds()) { KVRangeDescriptor mergeeDesc = cluster.getKVRange(storeId, mergee.get().id); - if (mergeeDesc.getState() != Merged) { + // tolerate null as already quit; non-null must be Merged + if (mergeeDesc != null && mergeeDesc.getState() != Merged) { return false; } } diff --git a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/KVRangeStoreClusterTestTemplate.java b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/KVRangeStoreClusterTestTemplate.java index e4940fdea..05c562e0e 100644 --- a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/KVRangeStoreClusterTestTemplate.java +++ b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/KVRangeStoreClusterTestTemplate.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.store; @@ -22,20 +22,21 @@ import static org.awaitility.Awaitility.await; import static org.testng.Assert.fail; -import org.apache.bifromq.basekv.MockableTest; -import org.apache.bifromq.basekv.annotation.Cluster; -import org.apache.bifromq.basekv.proto.KVRangeDescriptor; -import org.apache.bifromq.basekv.proto.KVRangeId; -import org.apache.bifromq.basekv.store.option.KVRangeOptions; -import org.apache.bifromq.basekv.store.option.KVRangeStoreOptions; -import org.apache.bifromq.basekv.utils.KVRangeIdUtil; import com.google.common.base.Preconditions; import com.google.common.collect.Sets; +import io.reactivex.rxjava3.plugins.RxJavaPlugins; import java.lang.reflect.Method; import java.util.HashSet; import java.util.Set; import java.util.concurrent.TimeUnit; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.basekv.MockableTest; +import org.apache.bifromq.basekv.annotation.Cluster; +import org.apache.bifromq.basekv.proto.KVRangeDescriptor; +import org.apache.bifromq.basekv.proto.KVRangeId; +import org.apache.bifromq.basekv.store.option.KVRangeOptions; +import org.apache.bifromq.basekv.store.option.KVRangeStoreOptions; +import org.apache.bifromq.basekv.utils.KVRangeIdUtil; @Slf4j public abstract class KVRangeStoreClusterTestTemplate extends MockableTest { @@ -56,16 +57,26 @@ private void createClusterByAnnotation(Method testMethod) { rangeOptions.setWalRaftConfig(rangeOptions.getWalRaftConfig().setAsyncAppend(cluster.asyncAppend())); rangeOptions.setWalRaftConfig(rangeOptions.getWalRaftConfig() .setInstallSnapshotTimeoutTick(cluster.installSnapshotTimeoutTick())); + // Deterministic timeouts configurable via annotation + rangeOptions.setSnapshotSyncIdleTimeoutSec(cluster.snapshotSyncIdleTimeoutSec()); + rangeOptions.setZombieTimeoutSec(cluster.zombieTimeoutSec()); + rangeOptions.setMergeTimeoutSec(cluster.mergeTimeoutSec()); options.setKvRangeOptions(rangeOptions); } else { initVoters = 3; initLearners = 0; + KVRangeOptions rangeOptions = options.getKvRangeOptions(); + // Speed up snapshot/data-merge idle timeout for deterministic tests + rangeOptions.setSnapshotSyncIdleTimeoutSec(5); + rangeOptions.setZombieTimeoutSec(5); + rangeOptions.setMergeTimeoutSec(5); } } @Override protected void doSetup(Method method) { try { + RxJavaPlugins.setErrorHandler(e -> log.debug("Unhandled exception", e)); createClusterByAnnotation(method); log.info("Starting test cluster"); cluster = new KVRangeStoreTestCluster(options); @@ -128,6 +139,7 @@ protected void doTearDown(Method method) { KVRangeStoreTestCluster lastCluster = this.cluster; lastCluster.shutdown(); } + RxJavaPlugins.setErrorHandler(null); } public String nonLeaderStore(KVRangeConfig setting) { diff --git a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/KVRangeStoreTest.java b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/KVRangeStoreTest.java index 5fc12ba37..0bcbfd8fa 100644 --- a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/KVRangeStoreTest.java +++ b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/KVRangeStoreTest.java @@ -14,48 +14,31 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.store; +import static com.google.protobuf.ByteString.copyFromUtf8; +import static java.util.Collections.emptyMap; +import static java.util.Collections.emptySet; +import static org.apache.bifromq.basekv.localengine.StructUtil.toValue; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.DB_CHECKPOINT_ROOT_DIR; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.DB_ROOT_DIR; import static org.apache.bifromq.basekv.proto.State.StateType.Merged; import static org.apache.bifromq.basekv.proto.State.StateType.Normal; import static org.apache.bifromq.basekv.utils.BoundaryUtil.FULL_BOUNDARY; import static org.apache.bifromq.basekv.utils.BoundaryUtil.NULL_BOUNDARY; import static org.apache.bifromq.basekv.utils.BoundaryUtil.combine; -import static com.google.protobuf.ByteString.copyFromUtf8; -import static java.util.Collections.emptyMap; -import static java.util.Collections.emptySet; import static org.awaitility.Awaitility.await; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertTrue; import static org.testng.Assert.fail; -import org.apache.bifromq.baseenv.EnvProvider; -import org.apache.bifromq.basekv.MockableTest; -import org.apache.bifromq.basekv.TestCoProcFactory; -import org.apache.bifromq.basekv.localengine.rocksdb.RocksDBCPableKVEngineConfigurator; -import org.apache.bifromq.basekv.localengine.rocksdb.RocksDBWALableKVEngineConfigurator; -import org.apache.bifromq.basekv.proto.EnsureRange; -import org.apache.bifromq.basekv.proto.KVRangeDescriptor; -import org.apache.bifromq.basekv.proto.KVRangeId; -import org.apache.bifromq.basekv.proto.KVRangeMessage; -import org.apache.bifromq.basekv.proto.KVRangeSnapshot; -import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor; -import org.apache.bifromq.basekv.proto.State; -import org.apache.bifromq.basekv.proto.StoreMessage; -import org.apache.bifromq.basekv.raft.proto.ClusterConfig; -import org.apache.bifromq.basekv.raft.proto.RaftNodeStatus; -import org.apache.bifromq.basekv.raft.proto.Snapshot; -import org.apache.bifromq.basekv.store.exception.KVRangeException; -import org.apache.bifromq.basekv.store.option.KVRangeStoreOptions; -import org.apache.bifromq.basekv.store.proto.ROCoProcInput; -import org.apache.bifromq.basekv.store.proto.RWCoProcInput; -import org.apache.bifromq.basekv.store.util.VerUtil; -import org.apache.bifromq.basekv.utils.KVRangeIdUtil; +import com.google.common.collect.Sets; import com.google.protobuf.ByteString; +import com.google.protobuf.Struct; import io.reactivex.rxjava3.core.Observable; import io.reactivex.rxjava3.subjects.PublishSubject; import java.io.File; @@ -76,6 +59,26 @@ import java.util.concurrent.TimeUnit; import lombok.SneakyThrows; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.baseenv.EnvProvider; +import org.apache.bifromq.basekv.MockableTest; +import org.apache.bifromq.basekv.TestCoProcFactory; +import org.apache.bifromq.basekv.proto.EnsureRange; +import org.apache.bifromq.basekv.proto.KVRangeDescriptor; +import org.apache.bifromq.basekv.proto.KVRangeId; +import org.apache.bifromq.basekv.proto.KVRangeMessage; +import org.apache.bifromq.basekv.proto.KVRangeSnapshot; +import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor; +import org.apache.bifromq.basekv.proto.State; +import org.apache.bifromq.basekv.proto.StoreMessage; +import org.apache.bifromq.basekv.raft.proto.ClusterConfig; +import org.apache.bifromq.basekv.raft.proto.RaftNodeStatus; +import org.apache.bifromq.basekv.raft.proto.Snapshot; +import org.apache.bifromq.basekv.store.exception.KVRangeException; +import org.apache.bifromq.basekv.store.option.KVRangeStoreOptions; +import org.apache.bifromq.basekv.store.proto.ROCoProcInput; +import org.apache.bifromq.basekv.store.proto.RWCoProcInput; +import org.apache.bifromq.basekv.store.util.VerUtil; +import org.apache.bifromq.basekv.utils.KVRangeIdUtil; import org.testng.annotations.Test; @Slf4j @@ -104,12 +107,18 @@ protected void doSetup(Method method) { EnvProvider.INSTANCE.newThreadFactory("bg-task-executor")); dbRootDir = Files.createTempDirectory(""); - (((RocksDBCPableKVEngineConfigurator) options.getDataEngineConfigurator())) - .dbCheckpointRootDir(Paths.get(dbRootDir.toString(), DB_CHECKPOINT_DIR_NAME) - .toString()) - .dbRootDir(Paths.get(dbRootDir.toString(), DB_NAME).toString()); - ((RocksDBWALableKVEngineConfigurator) options.getWalEngineConfigurator()) - .dbRootDir(Paths.get(dbRootDir.toString(), DB_WAL_NAME).toString()); + Struct dataConf = options.getDataEngineConf().toBuilder() + .putFields(DB_CHECKPOINT_ROOT_DIR, + toValue(Paths.get(dbRootDir.toString(), DB_CHECKPOINT_DIR_NAME).toString())) + .putFields(DB_ROOT_DIR, toValue(Paths.get(dbRootDir.toString(), DB_NAME).toString())) + .build(); + options.setDataEngineType(options.getDataEngineType()); + options.setDataEngineConf(dataConf); + Struct walConf = options.getWalEngineConf().toBuilder() + .putFields(DB_ROOT_DIR, toValue(Paths.get(dbRootDir.toString(), DB_WAL_NAME).toString())) + .build(); + options.setWalEngineType(options.getWalEngineType()); + options.setWalEngineConf(walConf); rangeStore = new KVRangeStore("testCluster", @@ -476,12 +485,13 @@ public void testMerge() { } ); log.info("{}", storeDescriptor); - KVRangeDescriptor merger = storeDescriptor.getRangesList().get(0).getId().equals(id) ? - storeDescriptor.getRangesList().get(0) : storeDescriptor.getRangesList().get(1); - KVRangeDescriptor mergee = storeDescriptor.getRangesList().get(1).getId().equals(id) ? - storeDescriptor.getRangesList().get(0) : storeDescriptor.getRangesList().get(1); + KVRangeDescriptor merger = storeDescriptor.getRangesList().get(0).getId().equals(id) + ? storeDescriptor.getRangesList().get(0) : storeDescriptor.getRangesList().get(1); + KVRangeDescriptor mergee = storeDescriptor.getRangesList().get(1).getId().equals(id) + ? storeDescriptor.getRangesList().get(0) : storeDescriptor.getRangesList().get(1); log.info("Start Merging"); - rangeStore.merge(merger.getVer(), merger.getId(), mergee.getId()).toCompletableFuture().join(); + rangeStore.merge(merger.getVer(), merger.getId(), mergee.getId(), + Sets.newHashSet(mergee.getConfig().getVotersList())).toCompletableFuture().join(); KVRangeDescriptor mergeeDesc = await().atMost(Duration.ofSeconds(10000)).until(() -> rangeStore.describe() .flatMap(sd -> Observable.fromIterable(sd.getRangesList())) diff --git a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/KVRangeStoreTestCluster.java b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/KVRangeStoreTestCluster.java index e32ece77e..453c98992 100644 --- a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/KVRangeStoreTestCluster.java +++ b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/KVRangeStoreTestCluster.java @@ -14,38 +14,25 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.store; -import static org.apache.bifromq.basekv.utils.BoundaryUtil.FULL_BOUNDARY; import static java.util.Collections.emptyMap; import static java.util.Collections.emptySet; +import static org.apache.bifromq.basekv.localengine.StructUtil.toValue; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.DB_CHECKPOINT_ROOT_DIR; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.DB_ROOT_DIR; +import static org.apache.bifromq.basekv.utils.BoundaryUtil.FULL_BOUNDARY; import static org.awaitility.Awaitility.await; -import org.apache.bifromq.baseenv.EnvProvider; -import org.apache.bifromq.basekv.TestCoProcFactory; -import org.apache.bifromq.basekv.TestUtil; -import org.apache.bifromq.basekv.localengine.memory.InMemKVEngineConfigurator; -import org.apache.bifromq.basekv.localengine.rocksdb.RocksDBCPableKVEngineConfigurator; -import org.apache.bifromq.basekv.localengine.rocksdb.RocksDBWALableKVEngineConfigurator; -import org.apache.bifromq.basekv.proto.KVRangeDescriptor; -import org.apache.bifromq.basekv.proto.KVRangeId; -import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor; -import org.apache.bifromq.basekv.proto.State; -import org.apache.bifromq.basekv.proto.StoreMessage; -import org.apache.bifromq.basekv.raft.proto.RaftNodeStatus; -import org.apache.bifromq.basekv.store.exception.KVRangeException; -import org.apache.bifromq.basekv.store.option.KVRangeStoreOptions; -import org.apache.bifromq.basekv.store.proto.ROCoProcInput; -import org.apache.bifromq.basekv.store.proto.RWCoProcInput; -import org.apache.bifromq.basekv.utils.KVRangeIdUtil; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; import com.google.protobuf.ByteString; +import com.google.protobuf.Struct; import io.reactivex.rxjava3.core.Observable; import io.reactivex.rxjava3.disposables.CompositeDisposable; import io.reactivex.rxjava3.schedulers.Schedulers; @@ -67,9 +54,25 @@ import java.util.concurrent.ScheduledThreadPoolExecutor; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; +import java.util.function.Function; +import java.util.function.Predicate; import java.util.stream.Collectors; import lombok.SneakyThrows; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.baseenv.EnvProvider; +import org.apache.bifromq.basekv.TestCoProcFactory; +import org.apache.bifromq.basekv.TestUtil; +import org.apache.bifromq.basekv.proto.KVRangeDescriptor; +import org.apache.bifromq.basekv.proto.KVRangeId; +import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor; +import org.apache.bifromq.basekv.proto.State; +import org.apache.bifromq.basekv.proto.StoreMessage; +import org.apache.bifromq.basekv.raft.proto.RaftNodeStatus; +import org.apache.bifromq.basekv.store.exception.KVRangeException; +import org.apache.bifromq.basekv.store.option.KVRangeStoreOptions; +import org.apache.bifromq.basekv.store.proto.ROCoProcInput; +import org.apache.bifromq.basekv.store.proto.RWCoProcInput; +import org.apache.bifromq.basekv.utils.KVRangeIdUtil; @Slf4j public class KVRangeStoreTestCluster { @@ -94,6 +97,11 @@ public class KVRangeStoreTestCluster { private final ScheduledExecutorService bgTaskExecutor = new ScheduledThreadPoolExecutor(1, EnvProvider.INSTANCE.newThreadFactory("bg-task-executor")); + private final List networkRules = Lists.newCopyOnWriteArrayList(); + private final List injectRules = Lists.newCopyOnWriteArrayList(); + private final List captureRules = Lists.newCopyOnWriteArrayList(); + private final List holdRules = Lists.newCopyOnWriteArrayList(); + private final Path dbRootDir; @SneakyThrows @@ -142,6 +150,11 @@ public List allStoreIds() { return Lists.newArrayList(rangeStoreMap.keySet()); } + public String leaderOf(KVRangeId kvRangeId) { + checkKVRangeId(kvRangeId); + return rangeConfigMap.get(kvRangeId).leader; + } + public boolean hasKVRange(String storeId, KVRangeId kvRangeId) { checkStore(storeId); return storeDescriptorMap.getOrDefault(storeId, KVRangeStoreDescriptor.getDefaultInstance()) @@ -205,6 +218,29 @@ public KVRangeConfig awaitAllKVRangeReady(KVRangeId kvRangeId, long atLeastVer, return rangeConfigMap.get(kvRangeId); } + public void awaitKVRangeStateOnAllStores(KVRangeId kvRangeId, State.StateType state, long timeoutInSeconds) { + await().atMost(Duration.ofSeconds(timeoutInSeconds)).until(() -> { + boolean exists = false; + for (KVRangeStoreDescriptor sd : storeDescriptorMap.values()) { + for (KVRangeDescriptor rd : sd.getRangesList()) { + if (rd.getId().equals(kvRangeId)) { + exists = true; + if (rd.getState() != state) { + return false; + } + } + } + } + return exists; + }); + } + + public void awaitRangeAbsentAcrossStores(KVRangeId kvRangeId, long timeoutInSeconds) { + await().atMost(Duration.ofSeconds(timeoutInSeconds)).until(() -> + storeDescriptorMap.values().stream().noneMatch(sd -> + sd.getRangesList().stream().anyMatch(rd -> rd.getId().equals(kvRangeId)))); + } + public CompletionStage transferLeader(String storeId, long ver, KVRangeId kvRangeId, String newLeader) { checkStore(storeId); return rangeStoreMap.get(storeId).transferLeadership(ver, kvRangeId, newLeader); @@ -216,6 +252,13 @@ public CompletionStage changeReplicaConfig(String storeId, long ver, KVRan return rangeStoreMap.get(storeId).changeReplicaConfig(ver, kvRangeId, newVoters, newLearners); } + public CompletionStage purgeRange(KVRangeId kvRangeId) { + checkKVRangeId(kvRangeId); + KVRangeConfig cfg = rangeConfigMap.get(kvRangeId); + return changeReplicaConfig(cfg.leader, cfg.ver, kvRangeId, emptySet(), emptySet()) + .thenRun(() -> awaitRangeAbsentAcrossStores(kvRangeId, 60)); + } + public void cut(String fromStoreId, String toStoreId) { cutMap.computeIfAbsent(fromStoreId, k -> Sets.newConcurrentHashSet()).add(toStoreId); } @@ -247,7 +290,29 @@ public CompletionStage split(String storeId, long ver, KVRangeId kvRangeId public CompletionStage merge(String storeId, long ver, KVRangeId mergerId, KVRangeId mergeeId) { checkStore(storeId); - return rangeStoreMap.get(storeId).merge(ver, mergerId, mergeeId); + // Prefer voters from cached leader settings; fallback to any descriptor; else empty (absent mergee fast path) + Set voters; + KVRangeConfig mergeeCfg = rangeConfigMap.get(mergeeId); + if (mergeeCfg != null) { + voters = Sets.newHashSet(mergeeCfg.clusterConfig.getVotersList()); + } else { + voters = storeDescriptorMap.values().stream() + .flatMap(sd -> sd.getRangesList().stream()) + .filter(rd -> rd.getId().equals(mergeeId)) + .findFirst() + .map(rd -> Sets.newHashSet(rd.getConfig().getVotersList())) + .orElseGet(Sets::newHashSet); + } + return rangeStoreMap.get(storeId).merge(ver, mergerId, mergeeId, voters); + } + + public CompletionStage mergeWithMergeeVoters(String storeId, + long ver, + KVRangeId mergerId, + KVRangeId mergeeId, + Set explicitMergeeVoters) { + checkStore(storeId); + return rangeStoreMap.get(storeId).merge(ver, mergerId, mergeeId, explicitMergeeVoters); } public boolean exist(String storeId, KVRangeId kvRangeId, ByteString key) { @@ -406,20 +471,18 @@ public void shutdown() { private String buildStore(boolean isBootstrap) { String uuid = UUID.randomUUID().toString(); KVRangeStoreOptions options = optionsTpl.toBuilder().build(); - if (options.getDataEngineConfigurator() instanceof RocksDBCPableKVEngineConfigurator) { - options.setDataEngineConfigurator(((RocksDBCPableKVEngineConfigurator) options.getDataEngineConfigurator()) - .toBuilder() - .dbRootDir(Paths.get(dbRootDir.toString(), DB_NAME, uuid).toString()) - .dbCheckpointRootDir(Paths.get(dbRootDir.toString(), DB_CHECKPOINT_DIR_NAME, uuid) - .toString()) - .build()); - } - if (options.getWalEngineConfigurator() instanceof RocksDBWALableKVEngineConfigurator) { - options.setWalEngineConfigurator(((RocksDBWALableKVEngineConfigurator) options - .getWalEngineConfigurator()).toBuilder() - .dbRootDir(Paths.get(dbRootDir.toString(), DB_WAL_NAME, uuid).toString()) - .build()); - } + Struct dataConf = options.getDataEngineConf().toBuilder() + .putFields(DB_ROOT_DIR, toValue(Paths.get(dbRootDir.toString(), DB_NAME, uuid).toString())) + .putFields(DB_CHECKPOINT_ROOT_DIR, + toValue(Paths.get(dbRootDir.toString(), DB_CHECKPOINT_DIR_NAME, uuid).toString())) + .build(); + options.setDataEngineType(options.getDataEngineType()); + options.setDataEngineConf(dataConf); + Struct walConf = options.getWalEngineConf().toBuilder() + .putFields(DB_ROOT_DIR, toValue(Paths.get(dbRootDir.toString(), DB_WAL_NAME, uuid).toString())) + .build(); + options.setWalEngineType(options.getWalEngineType()); + options.setWalEngineConf(walConf); KVRangeStore store = initStore(options); if (isBootstrap) { store.bootstrap(KVRangeIdUtil.generate(), FULL_BOUNDARY).join(); @@ -431,23 +494,21 @@ private String buildStore(boolean isBootstrap) { private void loadStore(String storeId) { String uuid = storePathMap.get(storeId); KVRangeStoreOptions options = optionsTpl.toBuilder().build(); - if (options.getWalEngineConfigurator() instanceof InMemKVEngineConfigurator) { + if ("memory".equalsIgnoreCase(options.getWalEngineType())) { options.setOverrideIdentity(storeId); } - if (options.getDataEngineConfigurator() instanceof RocksDBCPableKVEngineConfigurator) { - options.setDataEngineConfigurator(((RocksDBCPableKVEngineConfigurator) options.getDataEngineConfigurator()) - .toBuilder() - .dbRootDir(Paths.get(dbRootDir.toString(), DB_NAME, uuid).toString()) - .dbCheckpointRootDir(Paths.get(dbRootDir.toString(), DB_CHECKPOINT_DIR_NAME, uuid).toString()) - .build()); - } - if (options.getWalEngineConfigurator() instanceof RocksDBWALableKVEngineConfigurator) { - options.setWalEngineConfigurator(((RocksDBWALableKVEngineConfigurator) options - .getWalEngineConfigurator()) - .toBuilder() - .dbRootDir(Paths.get(dbRootDir.toString(), DB_WAL_NAME, uuid).toString()) - .build()); - } + Struct dataConf = options.getDataEngineConf().toBuilder() + .putFields(DB_ROOT_DIR, toValue(Paths.get(dbRootDir.toString(), DB_NAME, uuid).toString())) + .putFields(DB_CHECKPOINT_ROOT_DIR, + toValue(Paths.get(dbRootDir.toString(), DB_CHECKPOINT_DIR_NAME, uuid).toString())) + .build(); + options.setDataEngineType(options.getDataEngineType()); + options.setDataEngineConf(dataConf); + Struct walConf = options.getWalEngineConf().toBuilder() + .putFields(DB_ROOT_DIR, toValue(Paths.get(dbRootDir.toString(), DB_WAL_NAME, uuid).toString())) + .build(); + options.setWalEngineType(options.getWalEngineType()); + options.setWalEngineConf(walConf); initStore(options); } @@ -465,22 +526,7 @@ private KVRangeStore initStore(KVRangeStoreOptions options) { store.start(new IStoreMessenger() { @Override public void send(StoreMessage message) { - if (message.getPayload().hasHostStoreId()) { - if (rangeStoreMsgSourceMap.containsKey(message.getPayload().getHostStoreId()) && - !cutMap.getOrDefault(message.getFrom(), emptySet()) - .contains(message.getPayload().getHostStoreId())) { - rangeStoreMsgSourceMap.get(message.getPayload().getHostStoreId()).onNext(message); - } - } else { - // broadcast - rangeStoreMsgSourceMap.forEach((storeId, msgSubject) -> - msgSubject.onNext(message.toBuilder() - .setPayload(message.getPayload().toBuilder() - // fill the target store - .setHostStoreId(storeId) - .build()) - .build())); - } + deliver(message); } @Override @@ -499,6 +545,143 @@ public void close() { return store; } + private void deliver(StoreMessage message) { + // hold first for deterministic delayed delivery + for (HoldRule r : holdRules) { + if (r.predicate.test(message)) { + r.buffer.add(message); + return; + } + } + // capture first + captureRules.forEach(r -> { + if (!r.future.isDone() && r.predicate.test(message)) { + r.future.complete(message); + if (r.oneShot) { + captureRules.remove(r); + } + } + }); + // inject once if triggered + injectRules.forEach(r -> { + if (r.predicate.test(message)) { + injectRules.remove(r); + StoreMessage injected = r.factory.apply(message); + if (injected != null) { + deliver(injected); + } + } + }); + if (message.getPayload().hasHostStoreId()) { + if (shouldDrop(message)) { + return; + } + long delayMs = delayMs(message); + Runnable sendTask = () -> { + if (rangeStoreMsgSourceMap.containsKey(message.getPayload().getHostStoreId()) + && !cutMap.getOrDefault(message.getFrom(), emptySet()) + .contains(message.getPayload().getHostStoreId())) { + rangeStoreMsgSourceMap.get(message.getPayload().getHostStoreId()).onNext(message); + } + }; + if (delayMs > 0) { + bgTaskExecutor.schedule(sendTask, delayMs, TimeUnit.MILLISECONDS); + } else { + sendTask.run(); + } + } else { + rangeStoreMsgSourceMap.forEach((sid, msgSubject) -> { + StoreMessage targetMsg = message.toBuilder() + .setPayload(message.getPayload().toBuilder() + .setHostStoreId(sid) + .build()) + .build(); + if (shouldDrop(targetMsg)) { + return; + } + long delayMs = delayMs(targetMsg); + Runnable sendTask = () -> msgSubject.onNext(targetMsg); + if (delayMs > 0) { + bgTaskExecutor.schedule(sendTask, delayMs, TimeUnit.MILLISECONDS); + } else { + sendTask.run(); + } + }); + } + } + + public AutoCloseable dropIf(Predicate predicate) { + NetworkRule rule = NetworkRule.drop(predicate, false); + networkRules.add(rule); + return () -> networkRules.remove(rule); + } + + public AutoCloseable dropOnceIf(Predicate predicate) { + NetworkRule rule = NetworkRule.drop(predicate, true); + networkRules.add(rule); + return () -> networkRules.remove(rule); + } + + public AutoCloseable delayIf(Predicate predicate, long delayMs) { + NetworkRule rule = NetworkRule.delay(predicate, delayMs, false); + networkRules.add(rule); + return () -> networkRules.remove(rule); + } + + public AutoCloseable delayOnceIf(Predicate predicate, long delayMs) { + NetworkRule rule = NetworkRule.delay(predicate, delayMs, true); + networkRules.add(rule); + return () -> networkRules.remove(rule); + } + + public void clearNetworkRules() { + networkRules.clear(); + } + + public HoldHandle holdIf(Predicate predicate) { + HoldRule rule = new HoldRule(predicate); + holdRules.add(rule); + return new HoldHandle(rule); + } + + public AutoCloseable injectOnceIf(Predicate trigger, + Function factory) { + InjectRule r = new InjectRule(trigger, factory, true); + injectRules.add(r); + return () -> injectRules.remove(r); + } + + public CompletableFuture captureOnce(Predicate predicate) { + CompletableFuture fut = new CompletableFuture<>(); + CaptureRule r = new CaptureRule(predicate, fut, true); + captureRules.add(r); + return fut; + } + + private boolean shouldDrop(StoreMessage m) { + for (NetworkRule r : networkRules) { + if (r.action == NetworkRule.Action.DROP && r.predicate.test(m)) { + if (r.oneShot) { + networkRules.remove(r); + } + return true; + } + } + return false; + } + + private long delayMs(StoreMessage m) { + for (NetworkRule r : networkRules) { + if (r.action == NetworkRule.Action.DELAY && r.predicate.test(m)) { + if (r.oneShot) { + networkRules.remove(r); + } + return r.delayMs; + } + } + return 0L; + } + private void handleStoreDescriptor(KVRangeStoreDescriptor storeDescriptor) { storeDescriptorMap.put(storeDescriptor.getId(), storeDescriptor); storeDescriptor.getRangesList().forEach(rangeDescriptor -> { @@ -539,4 +722,101 @@ private boolean shouldRetry(Throwable e) { private long reqId() { return System.nanoTime(); } + + private static class NetworkRule { + final Predicate predicate; + final Action action; + final long delayMs; + final boolean oneShot; + + private NetworkRule(Predicate predicate, Action action, long delayMs, + boolean oneShot) { + this.predicate = predicate; + this.action = action; + this.delayMs = delayMs; + this.oneShot = oneShot; + } + + static NetworkRule drop(Predicate predicate, boolean oneShot) { + return new NetworkRule(predicate, Action.DROP, 0L, oneShot); + } + + static NetworkRule delay(Predicate predicate, long delayMs, boolean oneShot) { + return new NetworkRule(predicate, Action.DELAY, delayMs, oneShot); + } + + enum Action { DROP, DELAY } + } + + private static class InjectRule { + final java.util.function.Predicate predicate; + final java.util.function.Function factory; + final boolean oneShot; + + InjectRule(java.util.function.Predicate predicate, + java.util.function.Function factory, + boolean oneShot) { + this.predicate = predicate; + this.factory = factory; + this.oneShot = oneShot; + } + } + + private static class CaptureRule { + final java.util.function.Predicate predicate; + final java.util.concurrent.CompletableFuture future; + final boolean oneShot; + + CaptureRule(java.util.function.Predicate predicate, + java.util.concurrent.CompletableFuture future, + boolean oneShot) { + this.predicate = predicate; + this.future = future; + this.oneShot = oneShot; + } + } + + private static class HoldRule { + final java.util.function.Predicate predicate; + final java.util.concurrent.ConcurrentLinkedQueue buffer = + new java.util.concurrent.ConcurrentLinkedQueue<>(); + + HoldRule(java.util.function.Predicate predicate) { + this.predicate = predicate; + } + } + + public final class HoldHandle implements AutoCloseable { + private final HoldRule rule; + + private HoldHandle(HoldRule rule) { + this.rule = rule; + } + + public void releaseOne() { + StoreMessage msg = rule.buffer.poll(); + if (msg != null) { + // temporarily remove rule to avoid re-hold + holdRules.remove(rule); + try { + deliver(msg); + } finally { + holdRules.add(rule); + } + } + } + + public void releaseAll() { + holdRules.remove(rule); + StoreMessage msg; + while ((msg = rule.buffer.poll()) != null) { + deliver(msg); + } + } + + @Override + public void close() { + releaseAll(); + } + } } diff --git a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/range/AbstractKVRangeTest.java b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/range/AbstractKVRangeTest.java index 35cbf1916..459379fb1 100644 --- a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/range/AbstractKVRangeTest.java +++ b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/range/AbstractKVRangeTest.java @@ -14,46 +14,51 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.store.range; -import org.apache.bifromq.basekv.MockableTest; -import org.apache.bifromq.basekv.TestUtil; -import org.apache.bifromq.basekv.localengine.ICPableKVSpace; -import org.apache.bifromq.basekv.localengine.IKVEngine; -import org.apache.bifromq.basekv.localengine.KVEngineFactory; -import org.apache.bifromq.basekv.localengine.rocksdb.RocksDBCPableKVEngineConfigurator; +import static org.apache.bifromq.basekv.localengine.StructUtil.toValue; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.DB_CHECKPOINT_ROOT_DIR; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.DB_ROOT_DIR; + +import com.google.protobuf.Struct; import java.lang.reflect.Method; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import lombok.SneakyThrows; +import org.apache.bifromq.basekv.MockableTest; +import org.apache.bifromq.basekv.TestUtil; +import org.apache.bifromq.basekv.localengine.ICPableKVSpace; +import org.apache.bifromq.basekv.localengine.IKVEngine; +import org.apache.bifromq.basekv.localengine.KVEngineFactory; +import org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs; public abstract class AbstractKVRangeTest extends MockableTest { public Path dbRootDir; + protected IKVEngine kvEngine; private String DB_NAME = "testDB"; private String DB_CHECKPOINT_DIR_NAME = "testDB_cp"; - private RocksDBCPableKVEngineConfigurator configurator = null; - protected IKVEngine kvEngine; + private Struct conf = null; @SneakyThrows protected void doSetup(Method method) { dbRootDir = Files.createTempDirectory(""); - configurator = RocksDBCPableKVEngineConfigurator.builder() - .dbRootDir(Paths.get(dbRootDir.toString(), DB_NAME).toString()) - .dbCheckpointRootDir(Paths.get(dbRootDir.toString(), DB_CHECKPOINT_DIR_NAME) - .toString()) + conf = RocksDBDefaultConfigs.CP.toBuilder() + .putFields(DB_ROOT_DIR, toValue(Paths.get(dbRootDir.toString(), DB_NAME).toString())) + .putFields(DB_CHECKPOINT_ROOT_DIR, + toValue(Paths.get(dbRootDir.toString(), DB_CHECKPOINT_DIR_NAME).toString())) .build(); - kvEngine = KVEngineFactory.createCPable(null, configurator); + kvEngine = KVEngineFactory.createCPable(null, "rocksdb", conf); kvEngine.start(); } protected void doTearDown(Method method) { kvEngine.stop(); - if (configurator != null) { + if (conf != null) { TestUtil.deleteDir(dbRootDir.toString()); dbRootDir.toFile().delete(); } diff --git a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/range/AdaptiveWriteBudgetTest.java b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/range/AdaptiveWriteBudgetTest.java deleted file mode 100644 index 80c4d8da6..000000000 --- a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/range/AdaptiveWriteBudgetTest.java +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.bifromq.basekv.store.range; - -import static org.testng.Assert.assertEquals; -import static org.testng.Assert.assertTrue; - -import org.testng.annotations.Test; - -public class AdaptiveWriteBudgetTest { - @Test - public void shouldRespectDefaultMaxLimits() { - AdaptiveWriteBudget budget = new AdaptiveWriteBudget(); - - for (int i = 0; i < 20; i++) { - budget.recordFlush(1_000_000, 10_000_000, 1); - } - - assertEquals(budget.currentEntryLimit(), 128_000); - assertEquals(budget.currentByteLimit(), 256L * 1024 * 1024); - } - - @Test - public void shouldGrowWhenFlushIsFast() { - AdaptiveWriteBudget budget = new AdaptiveWriteBudget(); - long initialEntryLimit = budget.currentEntryLimit(); - long initialByteLimit = budget.currentByteLimit(); - - budget.recordFlush(initialEntryLimit, initialByteLimit, 10); - - assertTrue(budget.currentEntryLimit() > initialEntryLimit); - assertTrue(budget.currentByteLimit() > initialByteLimit); - } - - @Test - public void shouldShrinkWhenFlushIsSlow() { - AdaptiveWriteBudget budget = new AdaptiveWriteBudget(); - long initialEntryLimit = budget.currentEntryLimit(); - long initialByteLimit = budget.currentByteLimit(); - - budget.recordFlush(initialEntryLimit * 2, initialByteLimit * 2, 10); - long increasedEntryLimit = budget.currentEntryLimit(); - long increasedByteLimit = budget.currentByteLimit(); - assertTrue(increasedEntryLimit > initialEntryLimit); - assertTrue(increasedByteLimit > initialByteLimit); - - budget.recordFlush(increasedEntryLimit, increasedByteLimit, 200); - - assertTrue(budget.currentEntryLimit() < increasedEntryLimit); - assertTrue(budget.currentByteLimit() < increasedByteLimit); - } - - @Test - public void shouldFlushWhenBudgetReached() { - AdaptiveWriteBudget budget = new AdaptiveWriteBudget(); - long entryLimit = budget.currentEntryLimit(); - long byteLimit = budget.currentByteLimit(); - - assertTrue(budget.shouldFlush(entryLimit, 0)); - assertTrue(budget.shouldFlush(0, byteLimit)); - } -} diff --git a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/range/KVRangeDumpSessionTest.java b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/range/KVRangeDumpSessionTest.java index 3d05e7728..8a5cbc852 100644 --- a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/range/KVRangeDumpSessionTest.java +++ b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/range/KVRangeDumpSessionTest.java @@ -44,6 +44,8 @@ import org.apache.bifromq.basekv.proto.SaveSnapshotDataReply; import org.apache.bifromq.basekv.proto.SaveSnapshotDataRequest; import org.apache.bifromq.basekv.proto.SnapshotSyncRequest; +import org.apache.bifromq.basekv.store.api.IKVIterator; +import org.apache.bifromq.basekv.store.api.IKVRangeReader; import org.apache.bifromq.basekv.utils.KVRangeIdUtil; import org.mockito.ArgumentCaptor; import org.mockito.Mock; @@ -56,11 +58,9 @@ public class KVRangeDumpSessionTest extends MockableTest { @Mock private IKVRangeMessenger messenger; @Mock - private IKVRangeCheckpointReader rangeCPReader; + private IKVRangeReader checkpointReader; @Mock - private IKVCheckpointReader rangeCPDataReader; - @Mock - private IKVCheckpointIterator rangeCPDataItr; + private IKVIterator checkpointItr; @Mock private KVRangeDumpSession.DumpBytesRecorder dumpBytesRecorder; @@ -70,15 +70,12 @@ public void dumpEmptySnapshot() { String peerStoreId = "follower"; String sessionId = "session"; KVRangeId rangeId = KVRangeIdUtil.generate(); - SnapshotSyncRequest request = SnapshotSyncRequest.newBuilder() - .setSessionId(sessionId) - .setSnapshot(KVRangeSnapshot.newBuilder() - .setId(rangeId) - .build()) + KVRangeSnapshot snapshot = KVRangeSnapshot.newBuilder() + .setId(rangeId) .build(); when(rangeAccessor.id()).thenReturn(rangeId); - KVRangeDumpSession dumpSession = new KVRangeDumpSession(peerStoreId, request, rangeAccessor, messenger, - Duration.ofSeconds(5), 1024, new SnapshotBandwidthGovernor(0), dumpBytesRecorder); + KVRangeDumpSession dumpSession = new KVRangeDumpSession(sessionId, snapshot, rangeId, peerStoreId, + rangeAccessor, messenger, Duration.ofSeconds(5), 1024, new SnapshotBandwidthGovernor(0), dumpBytesRecorder); await().until(() -> dumpSession.awaitDone().toCompletableFuture().isDone()); ArgumentCaptor messageCap = ArgumentCaptor.forClass(KVRangeMessage.class); verify(messenger).send(messageCap.capture()); @@ -104,8 +101,10 @@ public void dumpNonExistSnapshot() { .build(); when(rangeAccessor.id()).thenReturn(rangeId); when(rangeAccessor.hasCheckpoint(snapshot)).thenReturn(false); - KVRangeDumpSession dumpSession = new KVRangeDumpSession(peerStoreId, request, rangeAccessor, messenger, - Duration.ofSeconds(5), 1024, new SnapshotBandwidthGovernor(0), dumpBytesRecorder); + KVRangeDumpSession dumpSession = new KVRangeDumpSession(sessionId, snapshot, rangeId, peerStoreId, + rangeAccessor, + messenger, Duration.ofSeconds(5), 1024, new SnapshotBandwidthGovernor(0), dumpBytesRecorder); + await().until(() -> dumpSession.awaitDone().toCompletableFuture().isDone()); ArgumentCaptor messageCap = ArgumentCaptor.forClass(KVRangeMessage.class); verify(messenger).send(messageCap.capture()); @@ -113,7 +112,7 @@ public void dumpNonExistSnapshot() { assertEquals(message.getHostStoreId(), peerStoreId); assertEquals(message.getRangeId(), rangeId); assertEquals(message.getSaveSnapshotDataRequest().getSessionId(), sessionId); - assertEquals(message.getSaveSnapshotDataRequest().getFlag(), SaveSnapshotDataRequest.Flag.Error); + assertEquals(message.getSaveSnapshotDataRequest().getFlag(), SaveSnapshotDataRequest.Flag.NotFound); } @Test @@ -149,19 +148,18 @@ private void sessionEndWithFlag(SaveSnapshotDataReply.Result flag) { when(rangeAccessor.id()).thenReturn(rangeId); when(rangeAccessor.hasCheckpoint(snapshot)).thenReturn(true); - when(rangeAccessor.open(snapshot)).thenReturn(rangeCPReader); - when(rangeCPReader.newDataReader()).thenReturn(rangeCPDataReader); - when(rangeCPDataReader.iterator()).thenReturn(rangeCPDataItr); + when(rangeAccessor.open(snapshot)).thenReturn(checkpointReader); + when(checkpointReader.iterator()).thenReturn(checkpointItr); when(messenger.receive()).thenReturn(incomingMsgs); - when(rangeCPDataItr.isValid()).thenReturn(true, false); - when(rangeCPDataItr.key()).thenReturn(ByteString.copyFromUtf8("key")); - when(rangeCPDataItr.value()).thenReturn(ByteString.copyFromUtf8("value")); - KVRangeDumpSession dumpSession = new KVRangeDumpSession(peerStoreId, request, rangeAccessor, messenger, - Duration.ofSeconds(5), 1024, new SnapshotBandwidthGovernor(0), dumpBytesRecorder); + when(checkpointItr.isValid()).thenReturn(true, false); + when(checkpointItr.key()).thenReturn(ByteString.copyFromUtf8("key")); + when(checkpointItr.value()).thenReturn(ByteString.copyFromUtf8("value")); + KVRangeDumpSession dumpSession = new KVRangeDumpSession(sessionId, snapshot, rangeId, peerStoreId, + rangeAccessor, messenger, Duration.ofSeconds(5), 1024, new SnapshotBandwidthGovernor(0), dumpBytesRecorder); assertEquals(dumpSession.checkpointId(), checkpointId); - verify(rangeCPDataItr, timeout(100)).seekToFirst(); - verify(rangeCPDataItr, timeout(100)).next(); + verify(checkpointItr, timeout(100)).seekToFirst(); + verify(checkpointItr, timeout(100)).next(); assertFalse(dumpSession.awaitDone().toCompletableFuture().isDone()); ArgumentCaptor messageCap = ArgumentCaptor.forClass(KVRangeMessage.class); verify(messenger).send(messageCap.capture()); @@ -177,7 +175,8 @@ private void sessionEndWithFlag(SaveSnapshotDataReply.Result flag) { .build()); verify(dumpBytesRecorder).record(anyInt()); await().until(() -> dumpSession.awaitDone().toCompletableFuture().isDone()); - verify(rangeCPDataItr).close(); + verify(checkpointItr).close(); + verify(checkpointReader).close(); } @Test @@ -198,9 +197,8 @@ public void rateLimit() { when(rangeAccessor.id()).thenReturn(rangeId); when(rangeAccessor.hasCheckpoint(snapshot)).thenReturn(true); - when(rangeAccessor.open(snapshot)).thenReturn(rangeCPReader); - when(rangeCPReader.newDataReader()).thenReturn(rangeCPDataReader); - when(rangeCPDataReader.iterator()).thenReturn(rangeCPDataItr); + when(rangeAccessor.open(snapshot)).thenReturn(checkpointReader); + when(checkpointReader.iterator()).thenReturn(checkpointItr); when(messenger.receive()).thenReturn(incomingMsgs); @@ -213,15 +211,15 @@ public void rateLimit() { ByteString.copyFrom(new byte[200_000]), ByteString.copyFrom(new byte[200_000]) }; - when(rangeCPDataItr.isValid()).thenAnswer(invocation -> index.get() < values.length); - when(rangeCPDataItr.key()).thenAnswer(invocation -> keys[index.get()]); - when(rangeCPDataItr.value()).thenAnswer(invocation -> values[index.get()]); + when(checkpointItr.isValid()).thenAnswer(invocation -> index.get() < values.length); + when(checkpointItr.key()).thenAnswer(invocation -> keys[index.get()]); + when(checkpointItr.value()).thenAnswer(invocation -> values[index.get()]); doAnswer(invocation -> { index.incrementAndGet(); return null; - }).when(rangeCPDataItr).next(); + }).when(checkpointItr).next(); - KVRangeDumpSession dumpSession = new KVRangeDumpSession(peerStoreId, request, rangeAccessor, messenger, + KVRangeDumpSession dumpSession = new KVRangeDumpSession(sessionId, snapshot, rangeId, peerStoreId, rangeAccessor, messenger, Duration.ofMillis(100), 0, new SnapshotBandwidthGovernor(0), dumpBytesRecorder); ArgumentCaptor messageCap = ArgumentCaptor.forClass(KVRangeMessage.class); verify(messenger, timeout(100).times(1)).send(messageCap.capture()); @@ -249,16 +247,15 @@ public void resend() { when(rangeAccessor.id()).thenReturn(rangeId); when(rangeAccessor.hasCheckpoint(snapshot)).thenReturn(true); - when(rangeAccessor.open(snapshot)).thenReturn(rangeCPReader); - when(rangeCPReader.newDataReader()).thenReturn(rangeCPDataReader); - when(rangeCPDataReader.iterator()).thenReturn(rangeCPDataItr); + when(rangeAccessor.open(snapshot)).thenReturn(checkpointReader); + when(checkpointReader.iterator()).thenReturn(checkpointItr); when(messenger.receive()).thenReturn(incomingMsgs); - when(rangeCPDataItr.isValid()).thenReturn(true, false); - when(rangeCPDataItr.key()).thenReturn(ByteString.copyFromUtf8("key")); - when(rangeCPDataItr.value()).thenReturn(ByteString.copyFromUtf8("value")); - KVRangeDumpSession dumpSession = new KVRangeDumpSession(peerStoreId, request, rangeAccessor, messenger, + when(checkpointItr.isValid()).thenReturn(true, false); + when(checkpointItr.key()).thenReturn(ByteString.copyFromUtf8("key")); + when(checkpointItr.value()).thenReturn(ByteString.copyFromUtf8("value")); + KVRangeDumpSession dumpSession = new KVRangeDumpSession(sessionId, snapshot, rangeId, peerStoreId, rangeAccessor, messenger, Duration.ofMillis(100), 1024, new SnapshotBandwidthGovernor(0), dumpBytesRecorder); Thread.sleep(60); dumpSession.tick(); @@ -286,22 +283,22 @@ public void idle() { when(rangeAccessor.id()).thenReturn(rangeId); when(rangeAccessor.hasCheckpoint(snapshot)).thenReturn(true); - when(rangeAccessor.open(snapshot)).thenReturn(rangeCPReader); - when(rangeCPReader.newDataReader()).thenReturn(rangeCPDataReader); - when(rangeCPDataReader.iterator()).thenReturn(rangeCPDataItr); + when(rangeAccessor.open(snapshot)).thenReturn(checkpointReader); + when(checkpointReader.iterator()).thenReturn(checkpointItr); when(messenger.receive()).thenReturn(incomingMsgs); - when(rangeCPDataItr.isValid()).thenReturn(true, false); - when(rangeCPDataItr.key()).thenReturn(ByteString.copyFromUtf8("key")); - when(rangeCPDataItr.value()).thenReturn(ByteString.copyFromUtf8("value")); - KVRangeDumpSession dumpSession = new KVRangeDumpSession(peerStoreId, request, rangeAccessor, messenger, + when(checkpointItr.isValid()).thenReturn(true, false); + when(checkpointItr.key()).thenReturn(ByteString.copyFromUtf8("key")); + when(checkpointItr.value()).thenReturn(ByteString.copyFromUtf8("value")); + KVRangeDumpSession dumpSession = new KVRangeDumpSession(sessionId, snapshot, rangeId, peerStoreId, rangeAccessor, messenger, Duration.ofMillis(10), 1024, new SnapshotBandwidthGovernor(0), dumpBytesRecorder); Thread.sleep(20); dumpSession.tick(); - verify(messenger, timeout(100).times(1)).send(any()); + verify(messenger, timeout(100).times(2)).send(any()); assertTrue(dumpSession.awaitDone().toCompletableFuture().isDone()); - verify(rangeCPDataItr).close(); + verify(checkpointItr).close(); + verify(checkpointReader).close(); } @Test @@ -322,21 +319,21 @@ public void cancel() { when(rangeAccessor.id()).thenReturn(rangeId); when(rangeAccessor.hasCheckpoint(snapshot)).thenReturn(true); - when(rangeAccessor.open(snapshot)).thenReturn(rangeCPReader); - when(rangeCPReader.newDataReader()).thenReturn(rangeCPDataReader); - when(rangeCPDataReader.iterator()).thenReturn(rangeCPDataItr); + when(rangeAccessor.open(snapshot)).thenReturn(checkpointReader); + when(checkpointReader.iterator()).thenReturn(checkpointItr); when(messenger.receive()).thenReturn(incomingMsgs); - when(rangeCPDataItr.isValid()).thenReturn(true, false); - when(rangeCPDataItr.key()).thenReturn(ByteString.copyFromUtf8("key")); - when(rangeCPDataItr.value()).thenReturn(ByteString.copyFromUtf8("value")); - KVRangeDumpSession dumpSession = new KVRangeDumpSession(peerStoreId, request, rangeAccessor, messenger, + when(checkpointItr.isValid()).thenReturn(true, false); + when(checkpointItr.key()).thenReturn(ByteString.copyFromUtf8("key")); + when(checkpointItr.value()).thenReturn(ByteString.copyFromUtf8("value")); + KVRangeDumpSession dumpSession = new KVRangeDumpSession(sessionId, snapshot, rangeId, peerStoreId, rangeAccessor, messenger, Duration.ofMillis(10), 1024, new SnapshotBandwidthGovernor(0), dumpBytesRecorder); assertFalse(dumpSession.awaitDone().toCompletableFuture().isDone()); dumpSession.cancel(); - verify(messenger, timeout(100).times(1)).send(any()); + verify(messenger, timeout(100).times(2)).send(any()); assertTrue(dumpSession.awaitDone().toCompletableFuture().isDone()); - verify(rangeCPDataItr).close(); + verify(checkpointItr).close(); + verify(checkpointReader).close(); } } diff --git a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/range/KVRangeMetadataTest.java b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/range/KVRangeMetadataTest.java index ee238e107..a3a3a71ce 100644 --- a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/range/KVRangeMetadataTest.java +++ b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/range/KVRangeMetadataTest.java @@ -14,24 +14,23 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.store.range; import static org.apache.bifromq.basekv.utils.BoundaryUtil.FULL_BOUNDARY; import static org.testng.Assert.assertEquals; -import static org.testng.Assert.assertNull; import static org.testng.Assert.fail; +import io.reactivex.rxjava3.core.Maybe; +import java.util.concurrent.TimeUnit; import org.apache.bifromq.basekv.localengine.ICPableKVSpace; import org.apache.bifromq.basekv.proto.KVRangeId; import org.apache.bifromq.basekv.proto.KVRangeSnapshot; import org.apache.bifromq.basekv.proto.State; import org.apache.bifromq.basekv.raft.proto.ClusterConfig; import org.apache.bifromq.basekv.utils.KVRangeIdUtil; -import io.reactivex.rxjava3.core.Maybe; -import java.util.concurrent.TimeUnit; import org.testng.annotations.Test; public class KVRangeMetadataTest extends AbstractKVRangeTest { @@ -41,9 +40,9 @@ public void initWithNoData() { ICPableKVSpace keyRange = kvEngine.createIfMissing(KVRangeIdUtil.toString(id)); IKVRange accessor = new KVRange(id, keyRange); assertEquals(accessor.id(), id); - assertEquals(accessor.version(), -1); - assertEquals(accessor.lastAppliedIndex(), -1); - assertEquals(accessor.state().getType(), State.StateType.NoUse); + assertEquals(accessor.currentVer(), -1); + assertEquals(accessor.currentLastAppliedIndex(), -1); + assertEquals(accessor.currentState().getType(), State.StateType.NoUse); } @Test @@ -58,24 +57,26 @@ public void initExistingRange() { .setClusterConfig(initConfig) .build(); ICPableKVSpace keyRange = kvEngine.createIfMissing(KVRangeIdUtil.toString(snapshot.getId())); - IKVRange accessor = new KVRange(snapshot.getId(), keyRange).toReseter(snapshot).done(); + IKVRange accessor = new KVRange(snapshot.getId(), keyRange); + IKVRangeRestoreSession restoreSession = accessor.startRestore(snapshot, (c, b) -> {}); + restoreSession.done(); - assertEquals(accessor.version(), snapshot.getVer()); - assertEquals(accessor.boundary(), snapshot.getBoundary()); - assertEquals(accessor.lastAppliedIndex(), snapshot.getLastAppliedIndex()); - assertEquals(accessor.state(), snapshot.getState()); - assertEquals(accessor.clusterConfig(), snapshot.getClusterConfig()); + assertEquals(accessor.currentVer(), snapshot.getVer()); + assertEquals(accessor.currentBoundary(), snapshot.getBoundary()); + assertEquals(accessor.currentLastAppliedIndex(), snapshot.getLastAppliedIndex()); + assertEquals(accessor.currentState(), snapshot.getState()); + assertEquals(accessor.currentClusterConfig(), snapshot.getClusterConfig()); } @Test public void initWithNoDataAndDestroy() { try { KVRangeId rangeId = KVRangeIdUtil.generate(); - ICPableKVSpace keyRange = kvEngine.createIfMissing(KVRangeIdUtil.toString(rangeId)); - IKVRange kvRange = new KVRange(rangeId, keyRange); - Maybe metaMayBe = kvRange.metadata().firstElement(); - keyRange.destroy(); - assertNull(metaMayBe.timeout(5, TimeUnit.SECONDS).blockingGet()); + ICPableKVSpace kvSpace = kvEngine.createIfMissing(KVRangeIdUtil.toString(rangeId)); + IKVRange kvRange = new KVRange(rangeId, kvSpace); + Maybe stateMayBe = kvRange.state().firstElement(); + kvSpace.destroy(); + assertEquals(stateMayBe.timeout(5, TimeUnit.SECONDS).blockingGet().getType(), State.StateType.NoUse); } catch (Throwable e) { fail(); } @@ -95,11 +96,13 @@ public void lastAppliedIndex() { .setBoundary(FULL_BOUNDARY) .build(); ICPableKVSpace keyRange = kvEngine.createIfMissing(KVRangeIdUtil.toString(snapshot.getId())); - IKVRange accessor = new KVRange(snapshot.getId(), keyRange).toReseter(snapshot).done(); + IKVRange accessor = new KVRange(snapshot.getId(), keyRange); + IKVRangeRestoreSession restoreSession = accessor.startRestore(snapshot, (c, b) -> {}); + restoreSession.done(); lastAppliedIndex = 11; accessor.toWriter().lastAppliedIndex(lastAppliedIndex).done(); - assertEquals(accessor.lastAppliedIndex(), lastAppliedIndex); + assertEquals(accessor.currentLastAppliedIndex(), lastAppliedIndex); } } diff --git a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/range/KVRangeQueryLinearizerTest.java b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/range/KVRangeQueryLinearizerTest.java index 760ab8f4e..231d1b462 100644 --- a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/range/KVRangeQueryLinearizerTest.java +++ b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/range/KVRangeQueryLinearizerTest.java @@ -19,16 +19,21 @@ package org.apache.bifromq.basekv.store.range; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertTrue; -import org.apache.bifromq.basekv.MockableTest; import com.google.common.util.concurrent.MoreExecutors; import java.util.concurrent.CompletableFuture; +import java.util.function.Function; import java.util.function.Supplier; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.basekv.MockableTest; import org.mockito.Mock; +import org.mockito.stubbing.Answer; import org.testng.annotations.Test; @Slf4j @@ -36,10 +41,16 @@ public class KVRangeQueryLinearizerTest extends MockableTest { @Mock private Supplier> readIndexSupplier; + @Mock + private Function>, CompletableFuture> recordDuration; + @Test public void linearizeAfterInit() { + when(recordDuration.apply(any(Supplier.class))).thenAnswer( + (Answer>) invocation -> + ((Supplier>) invocation.getArgument(0)).get()); KVRangeQueryLinearizer linearizer = - new KVRangeQueryLinearizer(readIndexSupplier, MoreExecutors.directExecutor(), 3); + new KVRangeQueryLinearizer(readIndexSupplier, MoreExecutors.directExecutor(), 3, recordDuration); when(readIndexSupplier.get()) .thenReturn(CompletableFuture.completedFuture(1L), CompletableFuture.completedFuture(2L), @@ -50,12 +61,16 @@ public void linearizeAfterInit() { assertTrue(t1.isDone()); assertTrue(t2.isDone()); assertTrue(t3.isDone()); + verify(recordDuration, times(3)).apply(any()); } @Test public void linearize() { + when(recordDuration.apply(any(Supplier.class))).thenAnswer( + (Answer>) invocation -> + ((Supplier>) invocation.getArgument(0)).get()); KVRangeQueryLinearizer linearizer = - new KVRangeQueryLinearizer(readIndexSupplier, MoreExecutors.directExecutor(), 0); + new KVRangeQueryLinearizer(readIndexSupplier, MoreExecutors.directExecutor(), 0, recordDuration); when(readIndexSupplier.get()) .thenReturn(CompletableFuture.completedFuture(1L), CompletableFuture.completedFuture(1L), @@ -75,5 +90,6 @@ public void linearize() { assertTrue(t3.isDone()); assertTrue(linearizer.linearize().toCompletableFuture().isDone()); + verify(recordDuration, times(4)).apply(any()); } } diff --git a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/range/KVRangeQueryRunnerTest.java b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/range/KVRangeQueryRunnerTest.java index e20a01df2..a9c455ce7 100644 --- a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/range/KVRangeQueryRunnerTest.java +++ b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/range/KVRangeQueryRunnerTest.java @@ -14,12 +14,13 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.store.range; import static com.google.common.util.concurrent.MoreExecutors.directExecutor; +import static org.apache.bifromq.basekv.utils.BoundaryUtil.FULL_BOUNDARY; import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; @@ -28,15 +29,6 @@ import static org.testng.Assert.assertTrue; import static org.testng.Assert.fail; -import org.apache.bifromq.basekv.MockableTest; -import org.apache.bifromq.basekv.proto.KVRangeDescriptor; -import org.apache.bifromq.basekv.proto.State; -import org.apache.bifromq.basekv.store.api.IKVRangeCoProc; -import org.apache.bifromq.basekv.store.api.IKVReader; -import org.apache.bifromq.basekv.store.exception.KVRangeException; -import org.apache.bifromq.basekv.store.proto.ROCoProcInput; -import org.apache.bifromq.basekv.store.proto.ROCoProcOutput; -import org.apache.bifromq.basekv.store.util.VerUtil; import com.google.protobuf.ByteString; import java.util.Collections; import java.util.Optional; @@ -45,6 +37,16 @@ import java.util.concurrent.locks.StampedLock; import java.util.function.Supplier; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.basekv.MockableTest; +import org.apache.bifromq.basekv.proto.KVRangeDescriptor; +import org.apache.bifromq.basekv.proto.State; +import org.apache.bifromq.basekv.store.api.IKVRangeCoProc; +import org.apache.bifromq.basekv.store.api.IKVRangeReader; +import org.apache.bifromq.basekv.store.api.IKVRangeRefreshableReader; +import org.apache.bifromq.basekv.store.exception.KVRangeException; +import org.apache.bifromq.basekv.store.proto.ROCoProcInput; +import org.apache.bifromq.basekv.store.proto.ROCoProcOutput; +import org.apache.bifromq.basekv.store.util.VerUtil; import org.mockito.ArgumentCaptor; import org.mockito.Mock; import org.testng.annotations.Test; @@ -54,7 +56,7 @@ public class KVRangeQueryRunnerTest extends MockableTest { @Mock private IKVRange accessor; @Mock - private IKVReader kvReader; + private IKVRangeRefreshableReader kvReader; @Mock private IKVRangeQueryLinearizer linearizer; @Mock @@ -66,8 +68,8 @@ public class KVRangeQueryRunnerTest extends MockableTest { public void badVersionQuery() { KVRangeQueryRunner runner = new KVRangeQueryRunner(accessor, coProc, directExecutor(), linearizer, Collections.emptyList(), latestStatusSupplier, new StampedLock()); - when(accessor.borrowDataReader()).thenReturn(kvReader); - when(accessor.version()).thenReturn(VerUtil.bump(0L, true)); + when(accessor.newReader()).thenReturn(kvReader); + when(accessor.currentVer()).thenReturn(VerUtil.bump(0L, true)); when(kvReader.get(any(ByteString.class))).thenReturn(Optional.empty()); KVRangeDescriptor latest = KVRangeDescriptor.newBuilder().setVer(10L).build(); when(latestStatusSupplier.get()).thenReturn(latest); @@ -101,12 +103,13 @@ public void internalErrorByPurgedState() { private void internalErrorByWrongState(State.StateType stateType) { KVRangeQueryRunner runner = new KVRangeQueryRunner(accessor, coProc, directExecutor(), linearizer, Collections.emptyList(), latestStatusSupplier, new StampedLock()); - when(accessor.borrowDataReader()).thenReturn(kvReader); - when(accessor.state()).thenReturn(State.newBuilder().setType(stateType).build()); + when(accessor.newReader()).thenReturn(kvReader); + when(accessor.currentState()).thenReturn(State.newBuilder().setType(stateType).build()); + when(kvReader.boundary()).thenReturn(FULL_BOUNDARY); CompletableFuture queryFuture = runner.queryCoProc(0, ROCoProcInput.newBuilder().setRaw(ByteString.copyFromUtf8("key")).build(), false); - verify(accessor).returnDataReader(kvReader); + verify(kvReader).close(); try { queryFuture.join(); fail(); @@ -119,12 +122,13 @@ private void internalErrorByWrongState(State.StateType stateType) { public void get() { KVRangeQueryRunner runner = new KVRangeQueryRunner(accessor, coProc, directExecutor(), linearizer, Collections.emptyList(), latestStatusSupplier, new StampedLock()); - when(accessor.borrowDataReader()).thenReturn(kvReader); - when(accessor.version()).thenReturn(0L); - when(accessor.state()).thenReturn(State.newBuilder().setType(State.StateType.Normal).build()); + when(accessor.newReader()).thenReturn(kvReader); + when(accessor.currentVer()).thenReturn(0L); + when(accessor.currentState()).thenReturn(State.newBuilder().setType(State.StateType.Normal).build()); when(kvReader.get(any(ByteString.class))).thenReturn(Optional.empty()); + when(kvReader.boundary()).thenReturn(FULL_BOUNDARY); CompletionStage> queryFuture = runner.get(0, ByteString.copyFromUtf8("key"), false); - verify(accessor).returnDataReader(kvReader); + verify(kvReader).close(); try { Optional result = queryFuture.toCompletableFuture().join(); assertFalse(result.isPresent()); @@ -137,12 +141,13 @@ public void get() { public void exist() { KVRangeQueryRunner runner = new KVRangeQueryRunner(accessor, coProc, directExecutor(), linearizer, Collections.emptyList(), latestStatusSupplier, new StampedLock()); - when(accessor.borrowDataReader()).thenReturn(kvReader); - when(accessor.version()).thenReturn(0L); - when(accessor.state()).thenReturn(State.newBuilder().setType(State.StateType.Normal).build()); + when(accessor.newReader()).thenReturn(kvReader); + when(accessor.currentVer()).thenReturn(0L); + when(accessor.currentState()).thenReturn(State.newBuilder().setType(State.StateType.Normal).build()); when(kvReader.exist(any(ByteString.class))).thenReturn(false); + when(kvReader.boundary()).thenReturn(FULL_BOUNDARY); CompletionStage queryFuture = runner.exist(0, ByteString.copyFromUtf8("key"), false); - verify(accessor).returnDataReader(kvReader); + verify(kvReader).close(); try { assertFalse(queryFuture.toCompletableFuture().join()); } catch (Throwable e) { @@ -156,15 +161,15 @@ public void roCoProc() { Collections.emptyList(), latestStatusSupplier, new StampedLock()); ROCoProcInput key = ROCoProcInput.newBuilder().setRaw(ByteString.copyFromUtf8("key")).build(); ROCoProcOutput value = ROCoProcOutput.newBuilder().setRaw(ByteString.copyFromUtf8("value")).build(); - when(accessor.borrowDataReader()).thenReturn(kvReader); - when(accessor.version()).thenReturn(0L); - when(accessor.state()).thenReturn(State.newBuilder().setType(State.StateType.Normal).build()); - when(coProc.query(any(ROCoProcInput.class), any(IKVReader.class))) + when(accessor.newReader()).thenReturn(kvReader); + when(accessor.currentVer()).thenReturn(0L); + when(accessor.currentState()).thenReturn(State.newBuilder().setType(State.StateType.Normal).build()); + when(coProc.query(any(ROCoProcInput.class), any(IKVRangeRefreshableReader.class))) .thenReturn(CompletableFuture.completedFuture(value)); CompletableFuture queryFuture = runner.queryCoProc(0, key, false); - verify(accessor).returnDataReader(kvReader); + verify(kvReader).close(); ArgumentCaptor inputCap = ArgumentCaptor.forClass(ROCoProcInput.class); - ArgumentCaptor kvReaderCap = ArgumentCaptor.forClass(IKVReader.class); + ArgumentCaptor kvReaderCap = ArgumentCaptor.forClass(IKVRangeReader.class); verify(coProc).query(inputCap.capture(), kvReaderCap.capture()); assertEquals(inputCap.getValue(), key); try { @@ -180,16 +185,16 @@ public void linearizedRoCoProc() { Collections.emptyList(), latestStatusSupplier, new StampedLock()); ROCoProcInput key = ROCoProcInput.newBuilder().setRaw(ByteString.copyFromUtf8("key")).build(); ROCoProcOutput value = ROCoProcOutput.newBuilder().setRaw(ByteString.copyFromUtf8("value")).build(); - when(accessor.borrowDataReader()).thenReturn(kvReader); - when(accessor.version()).thenReturn(0L); - when(accessor.state()).thenReturn(State.newBuilder().setType(State.StateType.Normal).build()); - when(coProc.query(any(ROCoProcInput.class), any(IKVReader.class))) + when(accessor.newReader()).thenReturn(kvReader); + when(accessor.currentVer()).thenReturn(0L); + when(accessor.currentState()).thenReturn(State.newBuilder().setType(State.StateType.Normal).build()); + when(coProc.query(any(ROCoProcInput.class), any(IKVRangeReader.class))) .thenReturn(CompletableFuture.completedFuture(value)); when(linearizer.linearize()).thenReturn(CompletableFuture.completedFuture(null)); CompletableFuture queryFuture = runner.queryCoProc(0, key, true); - verify(accessor).returnDataReader(kvReader); + verify(kvReader).close(); ArgumentCaptor inputCap = ArgumentCaptor.forClass(ROCoProcInput.class); - ArgumentCaptor kvReaderCap = ArgumentCaptor.forClass(IKVReader.class); + ArgumentCaptor kvReaderCap = ArgumentCaptor.forClass(IKVRangeReader.class); verify(coProc).query(inputCap.capture(), kvReaderCap.capture()); assertEquals(inputCap.getValue(), key); try { @@ -204,12 +209,12 @@ public void close() { KVRangeQueryRunner runner = new KVRangeQueryRunner(accessor, coProc, directExecutor(), linearizer, Collections.emptyList(), latestStatusSupplier, new StampedLock()); ROCoProcInput key = ROCoProcInput.newBuilder().setRaw(ByteString.copyFromUtf8("key")).build(); - when(accessor.borrowDataReader()).thenReturn(kvReader); - when(accessor.version()).thenReturn(0L); - when(accessor.state()).thenReturn(State.newBuilder().setType(State.StateType.Normal).build()); + when(accessor.newReader()).thenReturn(kvReader); + when(accessor.currentVer()).thenReturn(0L); + when(accessor.currentState()).thenReturn(State.newBuilder().setType(State.StateType.Normal).build()); when(linearizer.linearize()).thenReturn(new CompletableFuture<>()); - when(coProc.query(any(ROCoProcInput.class), any(IKVReader.class))).thenReturn(new CompletableFuture<>()); + when(coProc.query(any(ROCoProcInput.class), any(IKVRangeReader.class))).thenReturn(new CompletableFuture<>()); CompletableFuture queryFuture = runner.queryCoProc(0, key, false); CompletableFuture linearizedQueryFuture = runner.queryCoProc(0, key, true); diff --git a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/range/KVRangeRestorerTest.java b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/range/KVRangeRestorerTest.java index 199d595f4..f6dbf4815 100644 --- a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/range/KVRangeRestorerTest.java +++ b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/range/KVRangeRestorerTest.java @@ -19,10 +19,12 @@ package org.apache.bifromq.basekv.store.range; +import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.argThat; import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.atLeast; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.timeout; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; @@ -31,25 +33,27 @@ import static org.testng.Assert.assertTrue; import static org.testng.AssertJUnit.assertSame; -import com.google.protobuf.ByteString; +import com.google.common.util.concurrent.MoreExecutors; import io.reactivex.rxjava3.subjects.PublishSubject; import java.util.concurrent.CompletableFuture; import java.util.concurrent.Executor; +import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; -import org.apache.bifromq.basekv.proto.KVPair; +import java.util.concurrent.TimeUnit; import org.apache.bifromq.basekv.proto.KVRangeId; import org.apache.bifromq.basekv.proto.KVRangeMessage; import org.apache.bifromq.basekv.proto.KVRangeSnapshot; +import org.apache.bifromq.basekv.proto.SaveSnapshotDataReply; import org.apache.bifromq.basekv.proto.SaveSnapshotDataRequest; import org.apache.bifromq.basekv.utils.KVRangeIdUtil; import org.mockito.ArgumentCaptor; +import org.testng.annotations.AfterMethod; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; public class KVRangeRestorerTest { private KVRangeId rangeId; private IKVRange range; - private IKVReseter reseter; private IKVRangeMessenger messenger; private PublishSubject messageSubject; private IKVRangeMetricManager metricManager; @@ -69,23 +73,32 @@ public void setUp() { snapshot = KVRangeSnapshot.newBuilder().setId(rangeId).build(); } + @AfterMethod + public void tearDown() { + if (executor instanceof ExecutorService) { + MoreExecutors.shutdownAndAwaitTermination((ExecutorService) executor, 5, TimeUnit.SECONDS); + } + } + @Test public void awaitDone() { - IKVReseter reseter = mock(IKVReseter.class); - when(range.toReseter(eq(snapshot))).thenReturn(reseter); + IKVRangeRestoreSession restoreSession = mock(IKVRangeRestoreSession.class); + when(range.startRestore(eq(snapshot), any())).thenReturn(restoreSession); KVRangeRestorer restorer = new KVRangeRestorer(snapshot, range, messenger, metricManager, executor, 10); assertTrue(restorer.awaitDone().isDone()); // Manually complete the future - restorer.restoreFrom("leader", snapshot); + CompletableFuture restoreFuture = restorer.restoreFrom("leader", snapshot); assertFalse(restorer.awaitDone().isDone()); + // Ensure no active subscription remains after test + restoreFuture.cancel(true); } @Test public void restoreFrom() { - IKVReseter reseter = mock(IKVReseter.class); - when(range.toReseter(snapshot)).thenReturn(reseter); + IKVRangeRestoreSession restoreSession = mock(IKVRangeRestoreSession.class); + when(range.startRestore(eq(snapshot), any())).thenReturn(restoreSession); KVRangeRestorer restorer = new KVRangeRestorer(snapshot, range, messenger, metricManager, executor, 10); CompletableFuture restoreFuture = restorer.restoreFrom("leader", snapshot); @@ -97,6 +110,7 @@ public void restoreFrom() { // Simulate receiving snapshot data messageSubject.onNext(KVRangeMessage.newBuilder() + .setHostStoreId("leader") .setSaveSnapshotDataRequest(SaveSnapshotDataRequest.newBuilder() .setSessionId(message.getSnapshotSyncRequest().getSessionId()) .setFlag(SaveSnapshotDataRequest.Flag.End) @@ -106,13 +120,13 @@ public void restoreFrom() { // Wait for the future to complete restoreFuture.join(); // Verify the reseter's put and done methods were called - verify(reseter, times(1)).done(); + verify(restoreSession, times(1)).done(); } @Test public void restoreFromWithError() { - IKVReseter reseter = mock(IKVReseter.class); - when(range.toReseter(snapshot)).thenReturn(reseter); + IKVRangeRestoreSession restoreSession = mock(IKVRangeRestoreSession.class); + when(range.startRestore(eq(snapshot), any())).thenReturn(restoreSession); KVRangeRestorer restorer = new KVRangeRestorer(snapshot, range, messenger, metricManager, executor, 10); CompletableFuture restoreFuture = restorer.restoreFrom("leader", snapshot); @@ -125,6 +139,7 @@ public void restoreFromWithError() { // Simulate receiving snapshot data messageSubject.onNext(KVRangeMessage.newBuilder() + .setHostStoreId("leader") .setSaveSnapshotDataRequest(SaveSnapshotDataRequest.newBuilder() .setSessionId(sessionId) .setFlag(SaveSnapshotDataRequest.Flag.Error) @@ -135,13 +150,13 @@ public void restoreFromWithError() { assertThrows(restoreFuture::join); // Verify the reseter's put and done methods were called - verify(reseter, times(1)).abort(); + verify(restoreSession, times(1)).abort(); } @Test public void restoreFromTimeout() { - IKVReseter reseter = mock(IKVReseter.class); - when(range.toReseter(eq(snapshot))).thenReturn(reseter); + IKVRangeRestoreSession restoreSession = mock(IKVRangeRestoreSession.class); + when(range.startRestore(eq(snapshot), any())).thenReturn(restoreSession); KVRangeRestorer restorer = new KVRangeRestorer(snapshot, range, messenger, metricManager, executor, 1); CompletableFuture restoreFuture = restorer.restoreFrom("leader", snapshot); @@ -150,33 +165,44 @@ public void restoreFromTimeout() { assertThrows(restoreFuture::join); // Verify the reseter's put and done methods were called - verify(reseter, times(1)).abort(); + verify(restoreSession, times(1)).abort(); } @Test public void cancelPreviousSession() { - IKVReseter reseter = mock(IKVReseter.class); - when(range.toReseter(eq(snapshot))).thenReturn(reseter); + IKVRangeRestoreSession firstRS = mock(IKVRangeRestoreSession.class); + IKVRangeRestoreSession secondRS = mock(IKVRangeRestoreSession.class); + when(range.startRestore(eq(snapshot), any())).thenReturn(firstRS); KVRangeRestorer restorer = new KVRangeRestorer(snapshot, range, messenger, metricManager, executor, 10); // Start the first restore session CompletableFuture firstRestore = restorer.restoreFrom("leader", snapshot); + // Ensure receiver subscribed before starting next session + assertTrue(messageSubject.hasObservers()); // ensure receiver subscribed + // Start the second restore session, which should cancel the first KVRangeSnapshot newSnapshot = KVRangeSnapshot.newBuilder().setId(snapshot.getId()).setVer(1).build(); - when(range.toReseter(eq(newSnapshot))).thenReturn(reseter); + when(range.startRestore(eq(newSnapshot), any())).thenReturn(secondRS); CompletableFuture secondRestore = restorer.restoreFrom("leader", newSnapshot); - verify(reseter, times(1)).abort(); + verify(firstRS, atLeast(1)).abort(); + verify(secondRS, times(0)).abort(); + // Allow async cancellation hook to send NoSessionFound + verify(messenger, timeout(500)).send(argThat(m -> + m.hasSaveSnapshotDataReply() && + m.getSaveSnapshotDataReply().getResult() == SaveSnapshotDataReply.Result.NoSessionFound)); assertTrue(firstRestore.isCancelled()); assertFalse(secondRestore.isDone()); + + secondRestore.cancel(true); } @Test public void reuseRestoreSessionForSameSnapshot() { - IKVReseter reseter = mock(IKVReseter.class); - when(range.toReseter(snapshot)).thenReturn(reseter); + IKVRangeRestoreSession restoreSession = mock(IKVRangeRestoreSession.class); + when(range.startRestore(eq(snapshot), any())).thenReturn(restoreSession); KVRangeRestorer restorer = new KVRangeRestorer(snapshot, range, messenger, metricManager, executor, 10); @@ -184,7 +210,7 @@ public void reuseRestoreSessionForSameSnapshot() { CompletableFuture secondRestore = restorer.restoreFrom("leader", snapshot); assertSame(firstRestore, secondRestore); - verify(range, times(1)).toReseter(snapshot); + verify(range, times(1)).startRestore(eq(snapshot), any()); verify(messenger, times(1)).send(argThat(KVRangeMessage::hasSnapshotSyncRequest)); ArgumentCaptor messageCaptor = ArgumentCaptor.forClass(KVRangeMessage.class); @@ -192,6 +218,7 @@ public void reuseRestoreSessionForSameSnapshot() { KVRangeMessage message = messageCaptor.getValue(); messageSubject.onNext(KVRangeMessage.newBuilder() + .setHostStoreId("leader") .setSaveSnapshotDataRequest(SaveSnapshotDataRequest.newBuilder() .setSessionId(message.getSnapshotSyncRequest().getSessionId()) .setFlag(SaveSnapshotDataRequest.Flag.End) @@ -200,14 +227,14 @@ public void reuseRestoreSessionForSameSnapshot() { firstRestore.join(); assertTrue(secondRestore.isDone()); - verify(reseter, times(1)).done(); + verify(restoreSession, times(1)).done(); } @Test public void startNewSessionWhenLeaderChanges() { - IKVReseter firstReseter = mock(IKVReseter.class); - IKVReseter secondReseter = mock(IKVReseter.class); - when(range.toReseter(snapshot)).thenReturn(firstReseter, secondReseter); + IKVRangeRestoreSession firstRS = mock(IKVRangeRestoreSession.class); + IKVRangeRestoreSession secondRS = mock(IKVRangeRestoreSession.class); + when(range.startRestore(eq(snapshot), any())).thenReturn(firstRS, secondRS); KVRangeRestorer restorer = new KVRangeRestorer(snapshot, range, messenger, metricManager, executor, 10); @@ -216,103 +243,54 @@ public void startNewSessionWhenLeaderChanges() { assertTrue(firstRestore.isCancelled()); assertFalse(secondRestore.isDone()); - verify(range, times(2)).toReseter(snapshot); + verify(range, times(2)).startRestore(eq(snapshot), any()); verify(messenger, times(2)).send(argThat(KVRangeMessage::hasSnapshotSyncRequest)); - verify(firstReseter, times(1)).abort(); - } - - @Test - public void autoFlushWhenEntryLimitReached() { - IKVReseter reseter = mock(IKVReseter.class); - when(range.toReseter(eq(snapshot))).thenReturn(reseter); - - KVRangeRestorer restorer = new KVRangeRestorer(snapshot, range, messenger, metricManager, executor, 10); - CompletableFuture restoreFuture = restorer.restoreFrom("leader", snapshot); - - ArgumentCaptor messageCaptor = ArgumentCaptor.forClass(KVRangeMessage.class); - verify(messenger).send(messageCaptor.capture()); - String sessionId = messageCaptor.getValue().getSnapshotSyncRequest().getSessionId(); - - SaveSnapshotDataRequest.Builder builder = SaveSnapshotDataRequest.newBuilder() - .setSessionId(sessionId) - .setFlag(SaveSnapshotDataRequest.Flag.End); - for (int i = 0; i < 1_025; i++) { - builder.addKv(KVPair.newBuilder() - .setKey(ByteString.copyFromUtf8("k" + i)) - .setValue(ByteString.copyFromUtf8("v" + i)) - .build()); - } - - messageSubject.onNext(KVRangeMessage.newBuilder() - .setSaveSnapshotDataRequest(builder.build()) - .build()); - - restoreFuture.join(); - - verify(reseter, atLeast(2)).flush(); - verify(reseter, times(1)).done(); + verify(firstRS, atLeast(1)).abort(); + // Cleanup: cancel the second pending restore to avoid executor use after shutdown + secondRestore.cancel(true); // ensure no lingering scheduling } @Test - public void flushRemainingDataOnEnd() { - IKVReseter reseter = mock(IKVReseter.class); - when(range.toReseter(eq(snapshot))).thenReturn(reseter); + public void reuseAfterDoneStartsNew() { + IKVRangeRestoreSession firstRS = mock(IKVRangeRestoreSession.class); + IKVRangeRestoreSession secondRS = mock(IKVRangeRestoreSession.class); + when(range.startRestore(eq(snapshot), any())).thenReturn(firstRS, secondRS); KVRangeRestorer restorer = new KVRangeRestorer(snapshot, range, messenger, metricManager, executor, 10); - CompletableFuture restoreFuture = restorer.restoreFrom("leader", snapshot); - - ArgumentCaptor messageCaptor = ArgumentCaptor.forClass(KVRangeMessage.class); - verify(messenger).send(messageCaptor.capture()); - String sessionId = messageCaptor.getValue().getSnapshotSyncRequest().getSessionId(); - KVPair kv1 = KVPair.newBuilder() - .setKey(ByteString.copyFromUtf8("k1")) - .setValue(ByteString.copyFromUtf8("v1")) - .build(); + // Start first restore + CompletableFuture firstRestore = restorer.restoreFrom("leader", snapshot); + ArgumentCaptor captor = ArgumentCaptor.forClass(KVRangeMessage.class); + verify(messenger).send(captor.capture()); + String sessionId1 = captor.getValue().getSnapshotSyncRequest().getSessionId(); + // Complete first restore messageSubject.onNext(KVRangeMessage.newBuilder() + .setHostStoreId("leader") .setSaveSnapshotDataRequest(SaveSnapshotDataRequest.newBuilder() - .setSessionId(sessionId) + .setSessionId(sessionId1) .setFlag(SaveSnapshotDataRequest.Flag.End) - .addKv(kv1) .build()) .build()); + firstRestore.join(); - restoreFuture.join(); - - verify(reseter, times(1)).flush(); - verify(reseter, times(1)).done(); - } - - @Test - public void adaptiveFlushWithUnlimitedBudget() { - IKVReseter reseter = mock(IKVReseter.class); - when(range.toReseter(eq(snapshot))).thenReturn(reseter); - - KVRangeRestorer restorer = new KVRangeRestorer(snapshot, range, messenger, metricManager, executor, 10); - CompletableFuture restoreFuture = restorer.restoreFrom("leader", snapshot); - - ArgumentCaptor messageCaptor = ArgumentCaptor.forClass(KVRangeMessage.class); - verify(messenger).send(messageCaptor.capture()); - String sessionId = messageCaptor.getValue().getSnapshotSyncRequest().getSessionId(); - - SaveSnapshotDataRequest.Builder builder = SaveSnapshotDataRequest.newBuilder() - .setSessionId(sessionId) - .setFlag(SaveSnapshotDataRequest.Flag.End); - for (int i = 0; i < 2048; i++) { - builder.addKv(KVPair.newBuilder() - .setKey(ByteString.copyFromUtf8("k" + i)) - .setValue(ByteString.copyFromUtf8("v" + i)) - .build()); - } - - messageSubject.onNext(KVRangeMessage.newBuilder() - .setSaveSnapshotDataRequest(builder.build()) - .build()); - - restoreFuture.join(); + // Start second restore with same leader and snapshot; should start a new session + CompletableFuture secondRestore = restorer.restoreFrom("leader", snapshot); - verify(reseter, atLeast(2)).flush(); - verify(reseter, times(1)).done(); + // Only count SnapshotSyncRequest sends; receiver also sends replies + verify(messenger, times(2)).send(argThat(KVRangeMessage::hasSnapshotSyncRequest)); + // Capture all sends, then filter out SnapshotSyncRequest to compare session ids + verify(messenger, atLeast(2)).send(captor.capture()); + String sessionId2 = captor.getAllValues().stream() + .filter(KVRangeMessage::hasSnapshotSyncRequest) + .map(m -> m.getSnapshotSyncRequest().getSessionId()) + .reduce((first, second) -> second) + .orElse(""); + + // Verify new startRestore invoked and session id differs + verify(range, times(2)).startRestore(eq(snapshot), any()); + assertFalse(sessionId1.equals(sessionId2)); + + secondRestore.cancel(true); } -} \ No newline at end of file +} diff --git a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/range/KVRangeStatsCollectorTest.java b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/range/KVRangeStatsCollectorTest.java index 2d49b64cb..d54e8018c 100644 --- a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/range/KVRangeStatsCollectorTest.java +++ b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/range/KVRangeStatsCollectorTest.java @@ -14,23 +14,20 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.store.range; -import static org.apache.bifromq.basekv.utils.BoundaryUtil.FULL_BOUNDARY; import static org.mockito.Mockito.when; import static org.testng.Assert.assertEquals; -import org.apache.bifromq.basekv.MockableTest; -import org.apache.bifromq.basekv.store.api.IKVRangeReader; -import org.apache.bifromq.basekv.store.api.IKVReader; -import org.apache.bifromq.basekv.store.wal.IKVRangeWAL; import com.google.common.util.concurrent.MoreExecutors; import io.reactivex.rxjava3.observers.TestObserver; import java.time.Duration; import java.util.Map; +import org.apache.bifromq.basekv.MockableTest; +import org.apache.bifromq.basekv.store.wal.IKVRangeWAL; import org.mockito.Mock; import org.testng.annotations.Test; @@ -38,23 +35,19 @@ public class KVRangeStatsCollectorTest extends MockableTest { @Mock private IKVRangeWAL rangeWAL; @Mock - private IKVRangeReader rangeReader; - @Mock - private IKVReader kvReader; + private IKVRange range; @Test public void testScrap() { - when(rangeReader.newDataReader()).thenReturn(kvReader); - when(kvReader.boundary()).thenReturn(FULL_BOUNDARY); - when(kvReader.size(FULL_BOUNDARY)).thenReturn(0L); + when(range.size()).thenReturn(0L); when(rangeWAL.logDataSize()).thenReturn(0L); - KVRangeStatsCollector statsCollector = new KVRangeStatsCollector(rangeReader, rangeWAL, + KVRangeStatsCollector statsCollector = new KVRangeStatsCollector(range, rangeWAL, Duration.ofSeconds(1), MoreExecutors.directExecutor()); TestObserver> statsObserver = TestObserver.create(); statsCollector.collect().subscribe(statsObserver); statsObserver.awaitCount(1); Map stats = statsObserver.values().get(0); - assertEquals(0.0, stats.get("dataSize")); - assertEquals(0.0, stats.get("walSize")); + assertEquals(stats.get("dataSize"), 0.0); + assertEquals(stats.get("walSize"), 0.0); } } diff --git a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/range/KVRangeTest.java b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/range/KVRangeTest.java index 1fd373f74..83555c514 100644 --- a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/range/KVRangeTest.java +++ b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/range/KVRangeTest.java @@ -20,17 +20,11 @@ package org.apache.bifromq.basekv.store.range; import static org.apache.bifromq.basekv.utils.BoundaryUtil.FULL_BOUNDARY; -import static org.awaitility.Awaitility.await; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; -import static org.testng.Assert.assertNotSame; -import static org.testng.Assert.assertSame; import static org.testng.Assert.assertTrue; import com.google.protobuf.ByteString; -import java.time.Duration; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicReference; import org.apache.bifromq.basekv.localengine.ICPableKVSpace; import org.apache.bifromq.basekv.proto.Boundary; import org.apache.bifromq.basekv.proto.KVRangeId; @@ -39,7 +33,7 @@ import org.apache.bifromq.basekv.raft.proto.ClusterConfig; import org.apache.bifromq.basekv.store.api.IKVIterator; import org.apache.bifromq.basekv.store.api.IKVRangeReader; -import org.apache.bifromq.basekv.store.api.IKVReader; +import org.apache.bifromq.basekv.store.api.IKVRangeRefreshableReader; import org.apache.bifromq.basekv.store.api.IKVWriter; import org.apache.bifromq.basekv.utils.KVRangeIdUtil; import org.testng.annotations.Test; @@ -52,10 +46,10 @@ public void init() { // no snapshot specified IKVRange accessor = new KVRange(id, keyRange); assertEquals(accessor.id(), id); - assertEquals(accessor.version(), -1); - assertEquals(accessor.lastAppliedIndex(), -1); - assertEquals(accessor.state().getType(), State.StateType.NoUse); - assertEquals(accessor.clusterConfig(), ClusterConfig.getDefaultInstance()); + assertEquals(accessor.currentVer(), -1); + assertEquals(accessor.currentLastAppliedIndex(), -1); + assertEquals(accessor.currentState().getType(), State.StateType.NoUse); + assertEquals(accessor.currentClusterConfig(), ClusterConfig.getDefaultInstance()); } @Test @@ -78,12 +72,14 @@ public void initWithSnapshot() { writer.put(key, value); rangeWriter.done(); - assertEquals(accessor.newDataReader().get(key).get(), value); - assertEquals(accessor.version(), snapshot.getVer()); - assertEquals(accessor.boundary(), snapshot.getBoundary()); - assertEquals(accessor.lastAppliedIndex(), snapshot.getLastAppliedIndex()); - assertEquals(accessor.state(), snapshot.getState()); - assertEquals(accessor.clusterConfig(), snapshot.getClusterConfig()); + try (IKVRangeRefreshableReader reader = accessor.newReader()) { + assertEquals(reader.get(key).get(), value); + } + assertEquals(accessor.currentVer(), snapshot.getVer()); + assertEquals(accessor.currentBoundary(), snapshot.getBoundary()); + assertEquals(accessor.currentLastAppliedIndex(), snapshot.getLastAppliedIndex()); + assertEquals(accessor.currentState(), snapshot.getState()); + assertEquals(accessor.currentClusterConfig(), snapshot.getClusterConfig()); } @Test @@ -99,14 +95,12 @@ public void metadata() { .build(); ICPableKVSpace keyRange = kvEngine.createIfMissing(KVRangeIdUtil.toString(snapshot.getId())); IKVRange accessor = new KVRange(snapshot.getId(), keyRange, snapshot); - IKVRange.KVRangeMeta metadata = accessor.metadata().blockingFirst(); - assertEquals(metadata.ver(), snapshot.getVer()); - assertEquals(metadata.boundary(), snapshot.getBoundary()); - assertEquals(metadata.state(), snapshot.getState()); - assertEquals(metadata.clusterConfig(), snapshot.getClusterConfig()); - accessor.toWriter().resetVer(2).done(); - metadata = accessor.metadata().blockingFirst(); - assertEquals(metadata.ver(), 2); + assertEquals(accessor.currentVer(), snapshot.getVer()); + assertEquals(accessor.currentBoundary(), snapshot.getBoundary()); + assertEquals(accessor.currentState(), snapshot.getState()); + assertEquals(accessor.currentClusterConfig(), snapshot.getClusterConfig()); + accessor.toWriter().ver(2).done(); + assertEquals(accessor.currentVer(), 2); } @Test @@ -134,13 +128,13 @@ public void checkpoint() { assertEquals(snap.getBoundary(), snapshot.getBoundary()); assertEquals(snap.getClusterConfig(), snapshot.getClusterConfig()); - IKVRangeReader rangeCP = accessor.open(snap); - assertEquals(snap.getId(), rangeCP.id()); - assertEquals(snap.getVer(), rangeCP.version()); - assertEquals(snap.getLastAppliedIndex(), rangeCP.lastAppliedIndex()); - assertEquals(snap.getState(), rangeCP.state()); - assertEquals(snap.getBoundary(), rangeCP.boundary()); - assertEquals(snap.getClusterConfig(), rangeCP.clusterConfig()); + try (IKVRangeReader rangeCP = accessor.open(snap)) { + assertEquals(snap.getVer(), rangeCP.version()); + assertEquals(snap.getLastAppliedIndex(), rangeCP.lastAppliedIndex()); + assertEquals(snap.getState(), rangeCP.state()); + assertEquals(snap.getBoundary(), rangeCP.boundary()); + assertEquals(snap.getClusterConfig(), rangeCP.clusterConfig()); + } } @Test @@ -162,18 +156,20 @@ public void openCheckpoint() { writer.kvWriter().put(key, val); writer.done(); - IKVIterator itr = accessor.open(snapshot).newDataReader().iterator(); - itr.seekToFirst(); - assertFalse(itr.isValid()); + try (IKVRangeReader reader = accessor.open(snapshot); IKVIterator itr = reader.iterator()) { + itr.seekToFirst(); + assertFalse(itr.isValid()); + } snapshot = accessor.checkpoint(); - itr = accessor.open(snapshot).newDataReader().iterator(); - itr.seekToFirst(); - assertTrue(itr.isValid()); - assertEquals(itr.key(), key); - assertEquals(itr.value(), val); - itr.next(); - assertFalse(itr.isValid()); + try (IKVRangeReader reader = accessor.open(snapshot); IKVIterator itr = reader.iterator()) { + itr.seekToFirst(); + assertTrue(itr.isValid()); + assertEquals(itr.key(), key); + assertEquals(itr.value(), val); + itr.next(); + assertFalse(itr.isValid()); + } } @Test @@ -192,119 +188,24 @@ public void readWrite() { ICPableKVSpace keyRange = kvEngine.createIfMissing(KVRangeIdUtil.toString(snapshot.getId())); IKVRange accessor = new KVRange(snapshot.getId(), keyRange, snapshot); IKVRangeWriter rangeWriter = accessor.toWriter(); - IKVReader rangeReader = accessor.newDataReader(); - IKVIterator kvItr = rangeReader.iterator(); - ByteString key = ByteString.copyFromUtf8("aKey"); - ByteString val = ByteString.copyFromUtf8("Value"); - rangeWriter.kvWriter().put(key, val); - rangeWriter.done(); - assertTrue(rangeReader.exist(key)); - kvItr.seek(key); - assertFalse(kvItr.isValid()); - rangeReader.refresh(); - kvItr.seek(key); - assertTrue(kvItr.isValid()); - assertEquals(kvItr.key(), key); - assertEquals(kvItr.value(), val); - - // make a range change - Boundary newBoundary = boundary.toBuilder().setStartKey(ByteString.copyFromUtf8("b")).build(); - accessor.toWriter().resetVer(1).boundary(newBoundary).done(); - assertEquals(accessor.version(), 1); - assertEquals(accessor.boundary(), newBoundary); - assertEquals(rangeReader.boundary(), newBoundary); - } - - @Test - public void borrowReader() { - Boundary boundary = Boundary.newBuilder() - .setStartKey(ByteString.copyFromUtf8("a")) - .setEndKey(ByteString.copyFromUtf8("c")) - .build(); - KVRangeSnapshot snapshot = KVRangeSnapshot.newBuilder() - .setId(KVRangeIdUtil.generate()) - .setVer(0) - .setLastAppliedIndex(0) - .setState(State.newBuilder().setType(State.StateType.Normal).build()) - .setBoundary(boundary) - .build(); - ICPableKVSpace keyRange = kvEngine.createIfMissing(KVRangeIdUtil.toString(snapshot.getId())); - IKVRange accessor = new KVRange(snapshot.getId(), keyRange, snapshot); - IKVRangeWriter rangeWriter = accessor.toWriter(); - IKVReader rangeReader1 = accessor.borrowDataReader(); - IKVReader rangeReader2 = accessor.borrowDataReader(); - assertNotSame(rangeReader1, rangeReader2); ByteString key = ByteString.copyFromUtf8("aKey"); ByteString val = ByteString.copyFromUtf8("Value"); - - assertFalse(rangeReader1.exist(key)); - assertFalse(rangeReader2.exist(key)); - rangeWriter.kvWriter().put(key, val); rangeWriter.done(); - - assertTrue(rangeReader1.exist(key)); - assertTrue(rangeReader2.exist(key)); - - accessor.returnDataReader(rangeReader2); - assertSame(rangeReader2, accessor.borrowDataReader()); - assertTrue(rangeReader2.exist(key)); - } - - @Test - public void borrowReaderConcurrently() { - Boundary range = Boundary.newBuilder() - .setStartKey(ByteString.copyFromUtf8("a")) - .setEndKey(ByteString.copyFromUtf8("c")) - .build(); - KVRangeSnapshot snapshot = KVRangeSnapshot.newBuilder() - .setId(KVRangeIdUtil.generate()) - .setVer(0) - .setLastAppliedIndex(0) - .setState(State.newBuilder().setType(State.StateType.Normal).build()) - .setBoundary(range) - .build(); - ICPableKVSpace keyRange = kvEngine.createIfMissing(KVRangeIdUtil.toString(snapshot.getId())); - IKVRange accessor = new KVRange(snapshot.getId(), keyRange, snapshot); - IKVReader rangeReader1 = accessor.borrowDataReader(); - IKVReader rangeReader2 = accessor.borrowDataReader(); - accessor.returnDataReader(rangeReader1); - accessor.returnDataReader(rangeReader2); - AtomicReference t1Borrowed = new AtomicReference<>(); - AtomicReference t2Borrowed = new AtomicReference<>(); - AtomicBoolean stop = new AtomicBoolean(false); - Thread t1 = new Thread(() -> { - if (stop.get()) { - return; - } - t1Borrowed.set(accessor.borrowDataReader()); - IKVReader t1Reader = t1Borrowed.getAndSet(null); - accessor.returnDataReader(t1Reader); - }); - Thread t2 = new Thread(() -> { - if (stop.get()) { - return; - } - t2Borrowed.set(accessor.borrowDataReader()); - IKVReader t2Reader = t2Borrowed.getAndSet(null); - accessor.returnDataReader(t2Reader); - }); - t1.start(); - t2.start(); - AtomicBoolean success = new AtomicBoolean(true); - try { - await().atMost(Duration.ofSeconds(5)).until(() -> { - IKVReader t1Reader = t1Borrowed.get(); - IKVReader t2Reader = t2Borrowed.get(); - return t2Reader != null && t1Reader == t2Reader; // this should not be true - }); - success.set(false); - } catch (Throwable e) { - - } finally { - stop.set(true); + try (IKVRangeRefreshableReader rangeReader = accessor.newReader(); IKVIterator kvItr = rangeReader.iterator()) { + assertTrue(rangeReader.exist(key)); + kvItr.seek(key); + assertTrue(kvItr.isValid()); + assertEquals(kvItr.key(), key); + assertEquals(kvItr.value(), val); + + // make a range change + Boundary newBoundary = boundary.toBuilder().setStartKey(ByteString.copyFromUtf8("b")).build(); + accessor.toWriter().ver(1).boundary(newBoundary).done(); + assertEquals(accessor.currentVer(), 1); + assertEquals(accessor.currentBoundary(), newBoundary); + assertEquals(rangeReader.boundary(), newBoundary); } - assertTrue(success.get()); } @Test @@ -326,9 +227,10 @@ public void resetFromCheckpoint() { rangeWriter.kvWriter().put(key, val); rangeWriter.done(); - accessor.toReseter(snapshot).done(); - IKVReader rangeReader = accessor.newDataReader(); - assertFalse(rangeReader.exist(key)); + accessor.startRestore(snapshot, (c, b) -> {}).done(); + try (IKVRangeRefreshableReader rangeReader = accessor.newReader()) { + assertFalse(rangeReader.exist(key)); + } } @Test @@ -353,71 +255,9 @@ public void destroy() { keyRange = kvEngine.createIfMissing(KVRangeIdUtil.toString(snapshot.getId())); accessor = new KVRange(snapshot.getId(), keyRange, snapshot); - IKVReader rangeReader = accessor.newDataReader(); - assertEquals(accessor.version(), 0); - assertFalse(accessor.newDataReader().exist(key)); - } - - @Test - public void resetFlushSegments() { - KVRangeSnapshot snapshot = KVRangeSnapshot.newBuilder() - .setId(KVRangeIdUtil.generate()) - .setVer(3) - .setLastAppliedIndex(5) - .setState(State.newBuilder().setType(State.StateType.Normal).build()) - .setBoundary(FULL_BOUNDARY) - .setClusterConfig(ClusterConfig.newBuilder().addVoters("A").build()) - .build(); - ICPableKVSpace keyRange = kvEngine.createIfMissing(KVRangeIdUtil.toString(snapshot.getId())); - KVRange range = new KVRange(snapshot.getId(), keyRange); - IKVReseter reseter = range.toReseter(snapshot); - ByteString key1 = ByteString.copyFromUtf8("key1"); - ByteString val1 = ByteString.copyFromUtf8("value1"); - ByteString key2 = ByteString.copyFromUtf8("key2"); - ByteString val2 = ByteString.copyFromUtf8("value2"); - reseter.put(key1, val1); - reseter.flush(); - reseter.put(key2, val2); - reseter.done(); - - IKVReader reader = range.newDataReader(); - assertEquals(reader.get(key1).get(), val1); - assertEquals(reader.get(key2).get(), val2); - assertEquals(range.version(), snapshot.getVer()); - assertEquals(range.boundary(), snapshot.getBoundary()); - assertEquals(range.lastAppliedIndex(), snapshot.getLastAppliedIndex()); - assertEquals(range.state(), snapshot.getState()); - assertEquals(range.clusterConfig(), snapshot.getClusterConfig()); - } - - @Test - public void resetWithoutFlush() { - KVRangeSnapshot snapshot = KVRangeSnapshot.newBuilder() - .setId(KVRangeIdUtil.generate()) - .setVer(4) - .setLastAppliedIndex(6) - .setState(State.newBuilder().setType(State.StateType.Normal).build()) - .setBoundary(FULL_BOUNDARY) - .setClusterConfig(ClusterConfig.newBuilder().addVoters("B").build()) - .build(); - ICPableKVSpace keyRange = kvEngine.createIfMissing(KVRangeIdUtil.toString(snapshot.getId())); - KVRange range = new KVRange(snapshot.getId(), keyRange); - IKVReseter reseter = range.toReseter(snapshot); - ByteString key1 = ByteString.copyFromUtf8("segKey1"); - ByteString val1 = ByteString.copyFromUtf8("segVal1"); - ByteString key2 = ByteString.copyFromUtf8("segKey2"); - ByteString val2 = ByteString.copyFromUtf8("segVal2"); - reseter.put(key1, val1); - reseter.put(key2, val2); - reseter.done(); - - IKVReader reader = range.newDataReader(); - assertEquals(reader.get(key1).get(), val1); - assertEquals(reader.get(key2).get(), val2); - assertEquals(range.version(), snapshot.getVer()); - assertEquals(range.boundary(), snapshot.getBoundary()); - assertEquals(range.lastAppliedIndex(), snapshot.getLastAppliedIndex()); - assertEquals(range.state(), snapshot.getState()); - assertEquals(range.clusterConfig(), snapshot.getClusterConfig()); + try (IKVRangeRefreshableReader rangeReader = accessor.newReader()) { + assertEquals(accessor.currentVer(), 0); + assertFalse(rangeReader.exist(key)); + } } } diff --git a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/range/KVReaderTest.java b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/range/KVReaderTest.java deleted file mode 100644 index 21ae695b7..000000000 --- a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/range/KVReaderTest.java +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.bifromq.basekv.store.range; - -import static org.apache.bifromq.basekv.utils.BoundaryUtil.FULL_BOUNDARY; -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.Mockito.never; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; -import static org.testng.Assert.assertFalse; -import static org.testng.Assert.assertTrue; - -import org.apache.bifromq.basekv.MockableTest; -import org.apache.bifromq.basekv.localengine.IKVSpaceIterator; -import org.apache.bifromq.basekv.localengine.IKVSpaceReader; -import org.apache.bifromq.basekv.proto.Boundary; -import org.apache.bifromq.basekv.store.api.IKVCloseableReader; -import org.apache.bifromq.basekv.store.api.IKVIterator; -import org.apache.bifromq.basekv.store.api.IKVRangeReader; -import org.apache.bifromq.basekv.store.api.IKVReader; -import com.google.protobuf.ByteString; -import java.lang.reflect.Method; -import java.util.Optional; -import org.mockito.Mock; -import org.testng.annotations.Test; - -public class KVReaderTest extends MockableTest { - @Mock - private IKVRangeReader kvRangeReader; - @Mock - private IKVSpaceReader keyRangeReader; - @Mock - private IKVSpaceIterator keyRangeIterator; - @Mock - private IKVReader dataReader; - - @Override - protected void doSetup(Method method) { - when(kvRangeReader.newDataReader()).thenReturn(dataReader); - when(keyRangeReader.newIterator()).thenReturn(keyRangeIterator); - } - - @Test - public void read() { - IKVCloseableReader reader = new KVReader(keyRangeReader, kvRangeReader); - // range - when(kvRangeReader.boundary()).thenReturn(FULL_BOUNDARY); - reader.boundary(); - - // size - Boundary range = Boundary.newBuilder() - .setStartKey(ByteString.copyFromUtf8("a")) - .setEndKey(ByteString.copyFromUtf8("z")) - .build(); - reader.size(range); - verify(kvRangeReader).size(range); - - // exist - when(dataReader.exist(any())).thenReturn(false); - ByteString existKey1 = ByteString.copyFromUtf8("existKey1"); - assertFalse(reader.exist(existKey1)); - - when(keyRangeReader.exist(any())).thenReturn(true); - ByteString existKey2 = ByteString.copyFromUtf8("existKey2"); - assertTrue(reader.exist(existKey2)); - - // get - when(keyRangeReader.get(any())).thenReturn(Optional.empty()); - ByteString getKey1 = ByteString.copyFromUtf8("getKey1"); - assertFalse(reader.get(getKey1).isPresent()); - - when(keyRangeReader.get(any())).thenReturn(Optional.of(ByteString.copyFromUtf8("value"))); - ByteString getKey2 = ByteString.copyFromUtf8("getKey2"); - assertTrue(reader.get(getKey2).isPresent()); - } - - @Test - public void close() { - IKVCloseableReader reader = new KVReader(keyRangeReader, kvRangeReader); - reader.close(); - verify(keyRangeIterator, never()).close(); - } - - @Test - public void closeItr() { - IKVCloseableReader reader = new KVReader(keyRangeReader, kvRangeReader); - IKVIterator iterator = reader.iterator(); - reader.close(); - verify(keyRangeIterator).close(); - } -} diff --git a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/range/hinter/LoadRecorderTest.java b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/range/hinter/LoadRecorderTest.java index f3391c94f..92343b4ee 100644 --- a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/range/hinter/LoadRecorderTest.java +++ b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/range/hinter/LoadRecorderTest.java @@ -23,12 +23,11 @@ import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertTrue; +import com.google.protobuf.ByteString; +import java.util.function.Supplier; import org.apache.bifromq.basekv.MockableTest; -import org.apache.bifromq.basekv.store.api.IKVLoadRecord; import org.apache.bifromq.basekv.store.range.IKVLoadRecorder; import org.apache.bifromq.basekv.store.range.KVLoadRecorder; -import com.google.protobuf.ByteString; -import java.util.function.Supplier; import org.mockito.Mock; import org.testng.annotations.Test; diff --git a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/range/hinter/QueryKVIOBasedSplitHinterTest.java b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/range/hinter/QueryKVIOBasedSplitHinterTest.java index bbad4532c..c78244a65 100644 --- a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/range/hinter/QueryKVIOBasedSplitHinterTest.java +++ b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/range/hinter/QueryKVIOBasedSplitHinterTest.java @@ -27,14 +27,13 @@ import static org.testng.Assert.assertSame; import static org.testng.Assert.assertTrue; +import com.google.protobuf.ByteString; +import java.time.Duration; +import java.util.function.Supplier; import org.apache.bifromq.basekv.MockableTest; import org.apache.bifromq.basekv.proto.SplitHint; import org.apache.bifromq.basekv.store.range.IKVLoadRecorder; import org.apache.bifromq.basekv.store.range.KVLoadRecorder; -import com.google.protobuf.ByteString; -import java.time.Duration; -import java.util.Optional; -import java.util.function.Supplier; import org.mockito.Mock; import org.testng.annotations.Test; @@ -45,7 +44,7 @@ public class QueryKVIOBasedSplitHinterTest extends MockableTest { @Test public void noLoadRecorded() { QueryKVLoadBasedSplitHinter testEstimator = - new QueryKVLoadBasedSplitHinter(Duration.ofSeconds(5), Optional::of); + new QueryKVLoadBasedSplitHinter(Duration.ofSeconds(5)); SplitHint hint = testEstimator.estimate(); assertFalse(hint.hasSplitKey()); } @@ -53,7 +52,7 @@ public void noLoadRecorded() { @Test public void hintMemoization() { QueryKVLoadBasedSplitHinter estimator = - new QueryKVLoadBasedSplitHinter(nanoSource, Duration.ofSeconds(5), Optional::of); + new QueryKVLoadBasedSplitHinter(nanoSource, Duration.ofSeconds(5)); long now = 0L; // track enough records for (int i = 0; i < 11; i++) { @@ -76,7 +75,7 @@ public void hintMemoization() { @Test public void trackClearExpiredSlots() { QueryKVLoadBasedSplitHinter estimator = - new QueryKVLoadBasedSplitHinter(nanoSource, Duration.ofSeconds(1), Optional::of); + new QueryKVLoadBasedSplitHinter(nanoSource, Duration.ofSeconds(1)); long now = 0L; for (int i = 0; i < 11; i++) { when(nanoSource.get()).thenReturn(now); diff --git a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/range/hinter/RecordingWindowSlotTest.java b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/range/hinter/RecordingWindowSlotTest.java index 059f6d9d3..741ed68ae 100644 --- a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/range/hinter/RecordingWindowSlotTest.java +++ b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/range/hinter/RecordingWindowSlotTest.java @@ -23,12 +23,11 @@ import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertTrue; -import org.apache.bifromq.basekv.MockableTest; -import org.apache.bifromq.basekv.store.api.IKVLoadRecord; -import org.apache.bifromq.basekv.store.range.KVLoadRecorder; import com.google.protobuf.ByteString; import java.util.Collections; import java.util.function.Supplier; +import org.apache.bifromq.basekv.MockableTest; +import org.apache.bifromq.basekv.store.range.KVLoadRecorder; import org.mockito.Mock; import org.testng.annotations.Test; diff --git a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/wal/KVRangeWALStoreEngineTest.java b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/wal/KVRangeWALStoreEngineTest.java index e646ac836..e42c566dd 100644 --- a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/wal/KVRangeWALStoreEngineTest.java +++ b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/wal/KVRangeWALStoreEngineTest.java @@ -14,26 +14,20 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.store.wal; +import static org.apache.bifromq.basekv.localengine.StructUtil.toValue; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.DB_ROOT_DIR; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertTrue; import static org.testng.Assert.fail; -import org.apache.bifromq.basekv.MockableTest; -import org.apache.bifromq.basekv.TestUtil; -import org.apache.bifromq.basekv.localengine.rocksdb.RocksDBWALableKVEngineConfigurator; -import org.apache.bifromq.basekv.proto.KVRangeId; -import org.apache.bifromq.basekv.raft.IRaftStateStore; -import org.apache.bifromq.basekv.raft.proto.ClusterConfig; -import org.apache.bifromq.basekv.raft.proto.LogEntry; -import org.apache.bifromq.basekv.raft.proto.Snapshot; -import org.apache.bifromq.basekv.utils.KVRangeIdUtil; import com.google.protobuf.ByteString; +import com.google.protobuf.Struct; import java.io.File; import java.lang.reflect.Method; import java.nio.file.Files; @@ -42,22 +36,31 @@ import java.util.Collections; import lombok.SneakyThrows; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.basekv.MockableTest; +import org.apache.bifromq.basekv.TestUtil; +import org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs; +import org.apache.bifromq.basekv.proto.KVRangeId; +import org.apache.bifromq.basekv.raft.IRaftStateStore; +import org.apache.bifromq.basekv.raft.proto.ClusterConfig; +import org.apache.bifromq.basekv.raft.proto.LogEntry; +import org.apache.bifromq.basekv.raft.proto.Snapshot; +import org.apache.bifromq.basekv.utils.KVRangeIdUtil; import org.testng.annotations.Test; @Slf4j public class KVRangeWALStoreEngineTest extends MockableTest { private static final String DB_NAME = "testDB"; private static final String DB_CHECKPOINT_DIR = "testDB_cp"; - private String dbPath; - private RocksDBWALableKVEngineConfigurator engineConfigurator; public Path dbRootDir; + private String dbPath; + private Struct engineConf; @SneakyThrows @Override protected void doSetup(Method method) { dbRootDir = Files.createTempDirectory(""); dbPath = Paths.get(dbRootDir.toString(), DB_NAME).toString(); - engineConfigurator = RocksDBWALableKVEngineConfigurator.builder().dbRootDir(dbPath).build(); + engineConf = RocksDBDefaultConfigs.WAL.toBuilder().putFields(DB_ROOT_DIR, toValue(dbPath)).build(); } protected void doTearDown(Method method) { @@ -71,9 +74,9 @@ protected void doTearDown(Method method) { public void startAndStop() { try { KVRangeWALStorageEngine stateStorageEngine = - new KVRangeWALStorageEngine("testcluster", null, engineConfigurator); + new KVRangeWALStorageEngine("testcluster", null, "rocksdb", engineConf); stateStorageEngine.start(); - if (engineConfigurator != null) { + if (engineConf != null) { assertTrue((new File(dbPath)).isDirectory()); } assertTrue(stateStorageEngine.allKVRangeIds().isEmpty()); @@ -89,7 +92,7 @@ public void newRaftStateStorage() { try { KVRangeId testId = KVRangeIdUtil.generate(); KVRangeWALStorageEngine stateStorageEngine = - new KVRangeWALStorageEngine("testcluster", null, engineConfigurator); + new KVRangeWALStorageEngine("testcluster", null, "rocksdb", engineConf); stateStorageEngine.start(); Snapshot snapshot = Snapshot.newBuilder() .setIndex(0) @@ -122,7 +125,7 @@ public void loadExistingRaftStateStorage() { KVRangeId testId1 = KVRangeIdUtil.generate(); KVRangeId testId2 = KVRangeIdUtil.next(testId1); KVRangeWALStorageEngine stateStorageEngine = - new KVRangeWALStorageEngine("testcluster", null, engineConfigurator); + new KVRangeWALStorageEngine("testcluster", null, "rocksdb", engineConf); stateStorageEngine.start(); Snapshot snapshot = Snapshot.newBuilder() .setIndex(0) @@ -141,7 +144,7 @@ public void loadExistingRaftStateStorage() { assertEquals(walStore1.lastIndex(), 1); stateStorageEngine.stop(); - stateStorageEngine = new KVRangeWALStorageEngine("testcluster", null, engineConfigurator); + stateStorageEngine = new KVRangeWALStorageEngine("testcluster", null, "rocksdb", engineConf); stateStorageEngine.start(); assertEquals(stateStorageEngine.allKVRangeIds().size(), 2); IRaftStateStore stateStorage = stateStorageEngine.get(testId1); @@ -158,7 +161,7 @@ public void destroyRaftStateStorage() { KVRangeId testId1 = KVRangeIdUtil.generate(); KVRangeId testId2 = KVRangeIdUtil.next(testId1); KVRangeWALStorageEngine stateStorageEngine = - new KVRangeWALStorageEngine("testcluster", null, engineConfigurator); + new KVRangeWALStorageEngine("testcluster", null, "rocksdb", engineConf); stateStorageEngine.start(); Snapshot snapshot = Snapshot.newBuilder() .setIndex(0) @@ -175,7 +178,7 @@ public void destroyRaftStateStorage() { assertTrue(stateStorageEngine.has(testId2)); stateStorageEngine.stop(); - stateStorageEngine = new KVRangeWALStorageEngine("testcluster", null, engineConfigurator); + stateStorageEngine = new KVRangeWALStorageEngine("testcluster", null, "rocksdb", engineConf); stateStorageEngine.start(); assertEquals(stateStorageEngine.allKVRangeIds().size(), 1); assertTrue(stateStorageEngine.has(testId2)); @@ -185,7 +188,7 @@ public void destroyRaftStateStorage() { public void destroyAndCreate() { KVRangeId testId1 = KVRangeIdUtil.generate(); KVRangeWALStorageEngine stateStorageEngine = - new KVRangeWALStorageEngine("testcluster", null, engineConfigurator); + new KVRangeWALStorageEngine("testcluster", null, "rocksdb", engineConf); stateStorageEngine.start(); Snapshot snapshot = Snapshot.newBuilder() .setIndex(0) diff --git a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/wal/KVRangeWALStoreTest.java b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/wal/KVRangeWALStoreTest.java index 1ed25dea3..847ec6708 100644 --- a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/wal/KVRangeWALStoreTest.java +++ b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/wal/KVRangeWALStoreTest.java @@ -14,38 +14,41 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.store.wal; +import static org.apache.bifromq.basekv.localengine.StructUtil.toValue; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.DB_ROOT_DIR; + +import com.google.protobuf.Struct; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; import org.apache.bifromq.basekv.TestUtil; -import org.apache.bifromq.basekv.localengine.rocksdb.RocksDBWALableKVEngineConfigurator; +import org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs; import org.apache.bifromq.basekv.raft.BasicStateStoreTest; import org.apache.bifromq.basekv.raft.IRaftStateStore; import org.apache.bifromq.basekv.raft.proto.Snapshot; import org.apache.bifromq.basekv.utils.KVRangeIdUtil; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; import org.testng.annotations.AfterMethod; import org.testng.annotations.BeforeMethod; public class KVRangeWALStoreTest extends BasicStateStoreTest { private static final String DB_NAME = "testDB"; private static final String DB_CHECKPOINT_DIR = "testDB_cp"; - private KVRangeWALStorageEngine stateStorageEngine; public Path dbRootDir; + private KVRangeWALStorageEngine stateStorageEngine; @BeforeMethod public void setup() throws IOException { - RocksDBWALableKVEngineConfigurator walConfigurator; dbRootDir = Files.createTempDirectory(""); - walConfigurator = RocksDBWALableKVEngineConfigurator.builder() - .dbRootDir(Paths.get(dbRootDir.toString(), DB_NAME).toString()) + Struct walConf = RocksDBDefaultConfigs.WAL.toBuilder() + .putFields(DB_ROOT_DIR, toValue(Paths.get(dbRootDir.toString(), DB_NAME).toString())) .build(); - stateStorageEngine = new KVRangeWALStorageEngine("testcluster", null, walConfigurator); + stateStorageEngine = new KVRangeWALStorageEngine("testcluster", null, "rocksdb", walConf); stateStorageEngine.start(); } diff --git a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/wal/KVRangeWALSubscriptionTest.java b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/wal/KVRangeWALSubscriptionTest.java index ee1e984a4..85fad8cfd 100644 --- a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/wal/KVRangeWALSubscriptionTest.java +++ b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/wal/KVRangeWALSubscriptionTest.java @@ -53,6 +53,7 @@ import lombok.extern.slf4j.Slf4j; import org.apache.bifromq.basekv.MockableTest; import org.apache.bifromq.basekv.proto.KVRangeSnapshot; +import org.apache.bifromq.basekv.raft.ILogEntryIterator; import org.apache.bifromq.basekv.raft.IRaftNode; import org.apache.bifromq.basekv.raft.event.CommitEvent; import org.apache.bifromq.basekv.raft.proto.LogEntry; @@ -76,6 +77,29 @@ public class KVRangeWALSubscriptionTest extends MockableTest { private IRaftNode.IAfterInstalledCallback afterInstalled; private ExecutorService executor; + /** + * Build a closeable iterator for log entries. + */ + private static ILogEntryIterator it(LogEntry... entries) { + Iterator delegate = Iterators.forArray(entries); + return new ILogEntryIterator() { + @Override + public void close() { + // no-op for unit tests + } + + @Override + public boolean hasNext() { + return delegate.hasNext(); + } + + @Override + public LogEntry next() { + return delegate.next(); + } + }; + } + protected void doSetup(Method method) { executor = Executors.newSingleThreadScheduledExecutor(); commitIndexSource = BehaviorSubject.create(); @@ -95,7 +119,7 @@ protected void doTearDown(Method method) { public void retrieveFailAndRetry() { when(wal.retrieveCommitted(0, maxSize)).thenReturn( CompletableFuture.failedFuture(new RuntimeException("For Testing")), - CompletableFuture.completedFuture(Iterators.forArray(LogEntry.newBuilder() + CompletableFuture.completedFuture(it(LogEntry.newBuilder() .setTerm(0) .setIndex(0) .build()))); @@ -118,7 +142,7 @@ public void retrieveFailAndRetry() { public void NoRetryWhenIndexOutOfBound() { when(wal.retrieveCommitted(0, maxSize)).thenReturn( CompletableFuture.failedFuture(new IndexOutOfBoundsException("For Testing")), - CompletableFuture.completedFuture(Iterators.forArray(LogEntry.newBuilder() + CompletableFuture.completedFuture(it(LogEntry.newBuilder() .setTerm(0) .setIndex(0) .build()))); @@ -134,7 +158,7 @@ public void NoRetryWhenIndexOutOfBound() { public void stopRetryWhenStop() { CountDownLatch latch = new CountDownLatch(2); when(wal.retrieveCommitted(0, maxSize)) - .thenAnswer((Answer>>) invocationOnMock -> { + .thenAnswer((Answer>) invocationOnMock -> { latch.countDown(); return CompletableFuture.failedFuture( new IllegalArgumentException()); @@ -152,7 +176,7 @@ public void stopRetryWhenStop() { @Test public void reapplyLog() { when(wal.retrieveCommitted(0, maxSize)).thenReturn(CompletableFuture.completedFuture( - Iterators.forArray( + it( LogEntry.newBuilder().setTerm(0).setIndex(0).build(), LogEntry.newBuilder().setTerm(0).setIndex(1).build())) ); @@ -185,7 +209,7 @@ public void reapplyLog() { @Test public void cancelApplyLogWhenSnapshot() { when(wal.retrieveCommitted(0, maxSize)).thenReturn(CompletableFuture.completedFuture( - Iterators.forArray( + it( LogEntry.newBuilder().setTerm(0).setIndex(0).build(), LogEntry.newBuilder().setTerm(0).setIndex(1).build())) ); @@ -210,7 +234,7 @@ public void cancelApplyLogWhenSnapshot() { @Test public void cancelReapplyWhenSnapshot() { when(wal.retrieveCommitted(0, maxSize)).thenReturn(CompletableFuture.completedFuture( - Iterators.forArray( + it( LogEntry.newBuilder().setTerm(0).setIndex(0).build(), LogEntry.newBuilder().setTerm(0).setIndex(1).build())) ); @@ -287,7 +311,7 @@ public void applyLogsAndSnapshot() { LogEntry entry1 = LogEntry.newBuilder().setTerm(0).setIndex(0).build(); LogEntry entry2 = LogEntry.newBuilder().setTerm(0).setIndex(1).build(); when(wal.retrieveCommitted(0, maxSize)) - .thenReturn(CompletableFuture.completedFuture(Iterators.forArray(entry1, entry2))); + .thenReturn(CompletableFuture.completedFuture(it(entry1, entry2))); CountDownLatch latch = new CountDownLatch(1); CompletableFuture applyLogFuture1 = new CompletableFuture<>(); when(subscriber.apply(eq(entry1), anyBoolean())) @@ -312,7 +336,7 @@ public void applyLogsAndSnapshot() { public void leaderIdentitySegmentation() { // Logs: 0,1,2,3; Commits: (1,true) then (3,false) when(wal.retrieveCommitted(eq(0L), eq(maxSize))).thenReturn( - CompletableFuture.completedFuture(Iterators.forArray( + CompletableFuture.completedFuture(it( LogEntry.newBuilder().setTerm(0).setIndex(0).build(), LogEntry.newBuilder().setTerm(0).setIndex(1).build(), LogEntry.newBuilder().setTerm(0).setIndex(2).build(), @@ -355,14 +379,14 @@ public void leaderIdentitySegmentation() { public void notFetchBeyondObservedCommit() { // Logs: 0,1,2; only commit up to 1, then later commit 2 when(wal.retrieveCommitted(eq(0L), eq(maxSize))).thenReturn( - CompletableFuture.completedFuture(Iterators.forArray( + CompletableFuture.completedFuture(it( LogEntry.newBuilder().setTerm(0).setIndex(0).build(), LogEntry.newBuilder().setTerm(0).setIndex(1).build(), LogEntry.newBuilder().setTerm(0).setIndex(2).build() )) ); when(wal.retrieveCommitted(eq(2L), eq(maxSize))).thenReturn( - CompletableFuture.completedFuture(Iterators.forArray( + CompletableFuture.completedFuture(it( LogEntry.newBuilder().setTerm(0).setIndex(2).build() )) ); @@ -404,17 +428,17 @@ public void ignoreOldCommitDoesNotFetch() { // Apply 0..2 first, then send an older commit(1), no extra fetch should happen AtomicInteger retrieveCount = new AtomicInteger(); when(wal.retrieveCommitted(org.mockito.ArgumentMatchers.anyLong(), eq(maxSize))).thenAnswer( - (Answer>>) inv -> { + (Answer>) inv -> { long from = inv.getArgument(0); retrieveCount.incrementAndGet(); if (from == 0L) { - return CompletableFuture.completedFuture(Iterators.forArray( + return CompletableFuture.completedFuture(it( LogEntry.newBuilder().setTerm(0).setIndex(0).build(), LogEntry.newBuilder().setTerm(0).setIndex(1).build(), LogEntry.newBuilder().setTerm(0).setIndex(2).build() )); } - return CompletableFuture.completedFuture(Iterators.forArray()); + return CompletableFuture.completedFuture(it()); }); when(subscriber.apply(any(LogEntry.class), anyBoolean())) .thenReturn(CompletableFuture.completedFuture(null)); @@ -438,9 +462,9 @@ public void commitArrivesDuringFetchAndContinuesNextRound() { // First round: commit(1,true) => apply 0,1 only; Second round: commit(3,false) => apply 2,3 Map calls = new HashMap<>(); when(wal.retrieveCommitted(eq(0L), eq(maxSize))).thenAnswer( - (Answer>>) inv -> { + (Answer>) inv -> { calls.computeIfAbsent(0L, k -> new AtomicInteger()).incrementAndGet(); - return CompletableFuture.completedFuture(Iterators.forArray( + return CompletableFuture.completedFuture(it( LogEntry.newBuilder().setTerm(0).setIndex(0).build(), LogEntry.newBuilder().setTerm(0).setIndex(1).build(), LogEntry.newBuilder().setTerm(0).setIndex(2).build(), @@ -448,9 +472,9 @@ public void commitArrivesDuringFetchAndContinuesNextRound() { )); }); when(wal.retrieveCommitted(eq(2L), eq(maxSize))).thenAnswer( - (Answer>>) inv -> { + (Answer>) inv -> { calls.computeIfAbsent(2L, k -> new AtomicInteger()).incrementAndGet(); - return CompletableFuture.completedFuture(Iterators.forArray( + return CompletableFuture.completedFuture(it( LogEntry.newBuilder().setTerm(0).setIndex(2).build(), LogEntry.newBuilder().setTerm(0).setIndex(3).build() )); @@ -506,13 +530,13 @@ public void snapshotResetsIndexAndClearsPending() { AtomicInteger calledFrom6 = new AtomicInteger(); // Default: for non-6 fromIndex, return empty iterator to avoid NPE/stall before snapshot finishes when(wal.retrieveCommitted(org.mockito.ArgumentMatchers.longThat(v -> v != 6L), eq(maxSize))) - .thenAnswer((Answer>>) inv -> - CompletableFuture.completedFuture(Iterators.forArray()) + .thenAnswer((Answer>) inv -> + CompletableFuture.completedFuture(it()) ); when(wal.retrieveCommitted(eq(6L), eq(maxSize))).thenAnswer( - (Answer>>) inv -> { + (Answer>) inv -> { calledFrom6.incrementAndGet(); - return CompletableFuture.completedFuture(Iterators.forArray( + return CompletableFuture.completedFuture(it( LogEntry.newBuilder().setTerm(0).setIndex(6).build() )); }); @@ -549,14 +573,14 @@ public void snapshotResetsIndexAndClearsPending() { public void sameLeaderCommitCompression() { // Logs: 0,1,2; Commits: (1,true) then (2,true). All applies with true. when(wal.retrieveCommitted(eq(0L), eq(maxSize))).thenReturn( - CompletableFuture.completedFuture(Iterators.forArray( + CompletableFuture.completedFuture(it( LogEntry.newBuilder().setTerm(0).setIndex(0).build(), LogEntry.newBuilder().setTerm(0).setIndex(1).build(), LogEntry.newBuilder().setTerm(0).setIndex(2).build() )) ); when(wal.retrieveCommitted(eq(2L), eq(maxSize))).thenReturn( - CompletableFuture.completedFuture(Iterators.forArray( + CompletableFuture.completedFuture(it( LogEntry.newBuilder().setTerm(0).setIndex(2).build() )) ); @@ -585,7 +609,7 @@ public void sameLeaderCommitCompression() { public void startWithCustomLastFetchedIndex() { // Start from lastFetchedIndex=10; after commit(12,false), fetch from 11 and apply 11,12 with false. when(wal.retrieveCommitted(eq(11L), eq(maxSize))).thenReturn( - CompletableFuture.completedFuture(Iterators.forArray( + CompletableFuture.completedFuture(it( LogEntry.newBuilder().setTerm(0).setIndex(11).build(), LogEntry.newBuilder().setTerm(0).setIndex(12).build() )) diff --git a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/wal/KVRangeWALTest.java b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/wal/KVRangeWALTest.java index b44ed7857..526229a40 100644 --- a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/wal/KVRangeWALTest.java +++ b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/wal/KVRangeWALTest.java @@ -35,7 +35,6 @@ import io.reactivex.rxjava3.observers.TestObserver; import java.lang.reflect.Method; import java.util.Collections; -import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Optional; @@ -49,6 +48,7 @@ import org.apache.bifromq.basekv.proto.KVRangeCommand; import org.apache.bifromq.basekv.proto.KVRangeId; import org.apache.bifromq.basekv.proto.KVRangeSnapshot; +import org.apache.bifromq.basekv.raft.ILogEntryIterator; import org.apache.bifromq.basekv.raft.IRaftNode; import org.apache.bifromq.basekv.raft.IRaftStateStore; import org.apache.bifromq.basekv.raft.InMemoryStateStore; @@ -211,7 +211,7 @@ public void testProposeOnceButFailed() { } private static class InMemoryKVRangeWALStore implements IKVRangeWALStore { - private IRaftStateStore delegate; + private final IRaftStateStore delegate; InMemoryKVRangeWALStore(String replicaId) { delegate = new InMemoryStateStore(replicaId, Snapshot.newBuilder() @@ -291,7 +291,7 @@ public Optional entryAt(long index) { } @Override - public Iterator entries(long lo, long hi, long maxSize) { + public ILogEntryIterator entries(long lo, long hi, long maxSize) { return delegate.entries(lo, hi, maxSize); } diff --git a/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/wal/LogEntryIteratorTest.java b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/wal/LogEntryIteratorTest.java new file mode 100644 index 000000000..f787f01da --- /dev/null +++ b/base-kv/base-kv-store-server/src/test/java/org/apache/bifromq/basekv/store/wal/LogEntryIteratorTest.java @@ -0,0 +1,354 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basekv.store.wal; + +import static org.apache.bifromq.basekv.store.wal.KVRangeWALKeys.logEntryKey; +import static org.testng.Assert.assertEquals; + +import com.google.protobuf.ByteString; +import io.reactivex.rxjava3.core.Observable; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.NavigableMap; +import java.util.Objects; +import java.util.Optional; +import java.util.TreeMap; +import java.util.concurrent.CompletableFuture; +import org.apache.bifromq.basekv.localengine.IKVSpaceIterator; +import org.apache.bifromq.basekv.localengine.IKVSpaceRefreshableReader; +import org.apache.bifromq.basekv.localengine.IKVSpaceWriter; +import org.apache.bifromq.basekv.localengine.IWALableKVSpace; +import org.apache.bifromq.basekv.localengine.KVSpaceDescriptor; +import org.apache.bifromq.basekv.proto.Boundary; +import org.apache.bifromq.basekv.raft.proto.LogEntry; +import org.apache.bifromq.basekv.store.exception.KVRangeStoreException; +import org.testng.Assert; +import org.testng.annotations.Test; + +public class LogEntryIteratorTest { + private static void put(InMemWALSpace space, int infix, long index, int dataSize) { + LogEntry entry = LogEntry.newBuilder() + .setTerm(1) + .setIndex(index) + .setData(ByteString.copyFrom(new byte[dataSize])) + .build(); + space.putRaw(logEntryKey(infix, index), entry.toByteString()); + } + + @Test + public void testBoundaryNotCrossInfix() { + InMemWALSpace space = new InMemWALSpace("t"); + int infix0 = 0; + int infix1 = 1; + // infix=0 entries + put(space, infix0, 1, 10); + put(space, infix0, 2, 10); + // infix=1 entries + put(space, infix1, 1, 10); + put(space, infix1, 2, 10); + + try (LogEntryIterator itr = new LogEntryIterator(space, 1, 10, Long.MAX_VALUE, infix0)) { + List indexes = new ArrayList<>(); + while (itr.hasNext()) { + indexes.add(itr.next().getIndex()); + } + // Should only read indices from infix0 + assertEquals(indexes, List.of(1L, 2L)); + } + } + + @Test + public void testMaxSizeTooSmallReturnsAtLeastOne() { + InMemWALSpace space = new InMemWALSpace("t"); + int infix = 0; + put(space, infix, 1, 64); + put(space, infix, 2, 64); + + try (LogEntryIterator itr = new LogEntryIterator(space, 1, 3, 1, infix)) { + List indexes = new ArrayList<>(); + while (itr.hasNext()) { + indexes.add(itr.next().getIndex()); + } + // At least the first entry should be returned + assertEquals(indexes, List.of(1L)); + } + } + + @Test + public void testMaxSizeMayExceedTotal() { + InMemWALSpace space = new InMemWALSpace("t"); + int infix = 0; + put(space, infix, 1, 60); + put(space, infix, 2, 60); + + try (LogEntryIterator itr = new LogEntryIterator(space, 1, 3, 100, infix)) { + List indexes = new ArrayList<>(); + while (itr.hasNext()) { + indexes.add(itr.next().getIndex()); + } + // Two entries returned even though 60+60 > 100 + assertEquals(indexes, List.of(1L, 2L)); + } + } + + @Test + public void testEndIndexExclusive() { + InMemWALSpace space = new InMemWALSpace("t"); + int infix = 0; + put(space, infix, 1, 10); + put(space, infix, 2, 10); + put(space, infix, 3, 10); + put(space, infix, 4, 10); + + try (LogEntryIterator itr = new LogEntryIterator(space, 2, 4, Long.MAX_VALUE, infix)) { + List indexes = new ArrayList<>(); + while (itr.hasNext()) { + indexes.add(itr.next().getIndex()); + } + assertEquals(indexes, List.of(2L, 3L)); + } + } + + @Test(expectedExceptions = KVRangeStoreException.class) + public void testCorruptedValueThrows() { + InMemWALSpace space = new InMemWALSpace("t"); + int infix = 0; + // good entry + put(space, infix, 1, 10); + // corrupted bytes that cannot be parsed as a valid protobuf (malformed varint) + space.putRaw(logEntryKey(infix, 2), ByteString.copyFrom(new byte[] {(byte) 0xFF})); + + try (LogEntryIterator itr = new LogEntryIterator(space, 1, 3, Long.MAX_VALUE, infix)) { + // first ok + Assert.assertTrue(itr.hasNext()); + LogEntry e1 = itr.next(); + assertEquals(e1.getIndex(), 1L); + // second should throw + itr.next(); + } + } + + private static class InMemWALSpace implements IWALableKVSpace { + private final String id; + private final NavigableMap store; + + InMemWALSpace(String id) { + this.id = id; + // Use unsigned lexicographical comparator to align with ByteString ordering in production. + this.store = new TreeMap<>(ByteString.unsignedLexicographicalComparator()); + } + + void putRaw(ByteString key, ByteString value) { + store.put(Objects.requireNonNull(key), Objects.requireNonNull(value)); + } + + @Override + public CompletableFuture flush() { + // Not used in tests + return CompletableFuture.completedFuture(System.nanoTime()); + } + + @Override + public IKVSpaceRefreshableReader reader() { + // For IWALableKVSpace, the reader() is inherited from IKVSpace (refreshable). + return new Reader(); + } + + @Override + public IKVSpaceWriter toWriter() { + throw new UnsupportedOperationException("Not needed in tests"); + } + + @Override + public String id() { + return id; + } + + @Override + public Observable> metadata() { + return Observable.never(); + } + + @Override + public KVSpaceDescriptor describe() { + return new KVSpaceDescriptor(id, Map.of()); + } + + @Override + public void open() { + // no-op + } + + @Override + public void close() { + // no-op + } + + @Override + public void destroy() { + store.clear(); + } + + @Override + public long size() { + return store.size(); + } + + /** + * Iterator view over a navigable map with ByteString keys. + */ + private static class IteratorView implements IKVSpaceIterator { + private final NavigableMap view; + private Map.Entry current; + + IteratorView(NavigableMap view) { + this.view = view; + seekToFirst(); + } + + @Override + public ByteString key() { + return current.getKey(); + } + + @Override + public ByteString value() { + return current.getValue(); + } + + @Override + public boolean isValid() { + return current != null; + } + + @Override + public void next() { + if (current == null) { + return; + } + current = view.higherEntry(current.getKey()); + } + + @Override + public void prev() { + if (current == null) { + return; + } + current = view.lowerEntry(current.getKey()); + } + + @Override + public void seekToFirst() { + current = view.isEmpty() ? null : view.firstEntry(); + } + + @Override + public void seekToLast() { + current = view.isEmpty() ? null : view.lastEntry(); + } + + @Override + public void seek(ByteString target) { + current = view.ceilingEntry(target); + } + + @Override + public void seekForPrev(ByteString target) { + current = view.floorEntry(target); + } + + @Override + public void close() { + current = null; + } + } + + private class Reader implements IKVSpaceRefreshableReader { + @Override + public Optional metadata(ByteString metaKey) { + return Optional.empty(); + } + + @Override + public boolean exist(ByteString key) { + return store.containsKey(key); + } + + @Override + public Optional get(ByteString key) { + return Optional.ofNullable(store.get(key)); + } + + @Override + public IKVSpaceIterator newIterator() { + return new IteratorView(store); + } + + @Override + public IKVSpaceIterator newIterator(Boundary subBoundary) { + // Apply [start, end) semantics consistent with production readers. + NavigableMap view = store; + if (subBoundary.hasStartKey() && subBoundary.hasEndKey()) { + view = store.subMap(subBoundary.getStartKey(), true, subBoundary.getEndKey(), false); + } else if (subBoundary.hasStartKey()) { + view = store.tailMap(subBoundary.getStartKey(), true); + } else if (subBoundary.hasEndKey()) { + view = store.headMap(subBoundary.getEndKey(), false); + } + return new IteratorView(view); + } + + @Override + public long size(Boundary boundary) { + ByteString start = boundary.hasStartKey() ? boundary.getStartKey() : null; + ByteString end = boundary.hasEndKey() ? boundary.getEndKey() : null; + NavigableMap view = store; + if (start != null && end != null) { + view = store.subMap(start, true, end, false); + } else if (start != null) { + view = store.tailMap(start, true); + } else if (end != null) { + view = store.headMap(end, false); + } + long total = 0; + for (ByteString v : view.values()) { + total += v.size(); + } + return total; + } + + @Override + public String id() { + return id; + } + + @Override + public void close() { + // no-op + } + + @Override + public void refresh() { + // no-op for in-memory + } + } + } +} diff --git a/base-kv/pom.xml b/base-kv/pom.xml index fdcf2baae..ff1378f20 100644 --- a/base-kv/pom.xml +++ b/base-kv/pom.xml @@ -35,12 +35,16 @@ base-kv-type-proto base-kv-raft-type base-kv-raft - base-kv-local-engine + base-kv-local-engine-spi + base-kv-local-engine-rocksdb + base-kv-local-engine-memory base-kv-store-rpc-definition base-kv-store-client + base-kv-store-coproc-api base-kv-store-server base-kv-store-balance-controller base-kv-store-balance-spi + base-kv-split-hinter-spi base-kv-meta-service - \ No newline at end of file + diff --git a/base-scheduler/src/main/java/org/apache/bifromq/basescheduler/BatchCallScheduler.java b/base-scheduler/src/main/java/org/apache/bifromq/basescheduler/BatchCallScheduler.java index bb64ade1a..a30306a84 100644 --- a/base-scheduler/src/main/java/org/apache/bifromq/basescheduler/BatchCallScheduler.java +++ b/base-scheduler/src/main/java/org/apache/bifromq/basescheduler/BatchCallScheduler.java @@ -19,10 +19,6 @@ package org.apache.bifromq.basescheduler; -import org.apache.bifromq.basescheduler.exception.AbortException; -import org.apache.bifromq.basescheduler.exception.BatcherUnavailableException; -import org.apache.bifromq.basescheduler.spi.ICallScheduler; -import org.apache.bifromq.basescheduler.spi.ICapacityEstimator; import com.github.benmanes.caffeine.cache.Caffeine; import com.github.benmanes.caffeine.cache.LoadingCache; import com.github.benmanes.caffeine.cache.RemovalListener; @@ -31,11 +27,15 @@ import io.micrometer.core.instrument.Gauge; import io.micrometer.core.instrument.Metrics; import java.lang.reflect.ParameterizedType; +import java.lang.reflect.Type; import java.time.Duration; import java.util.Optional; import java.util.concurrent.CompletableFuture; import java.util.concurrent.atomic.LongAdder; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.basescheduler.exception.AbortException; +import org.apache.bifromq.basescheduler.exception.BatcherUnavailableException; +import org.apache.bifromq.basescheduler.spi.ICallScheduler; /** * The abstract class for batch call scheduler. @@ -49,7 +49,6 @@ public abstract class BatchCallScheduler implements IBatchCallScheduler { private static final int BATCHER_EXPIRY_SECONDS = 600; private final ICallScheduler callScheduler; - private final ICapacityEstimator capacityEstimator; private final LoadingCache> batchers; private final LongAdder runningCalls = new LongAdder(); private final Gauge runningCallsGauge; @@ -61,7 +60,6 @@ protected BatchCallScheduler(IBatchCallBuilderFactory - new Batcher<>(name, batchCallFactory.newBuilder(name, k), maxBurstLatency, capacityEstimator)); + .build(k -> new Batcher<>(name, k, + batchCallFactory.newBuilder(name, k), + maxBurstLatency, + CapacityEstimatorFactory.INSTANCE.get(name, k), + BatchCallWeighterFactory.INSTANCE.create(name, getReqType()))); runningCallsGauge = Gauge.builder("batcher.call.running.gauge", runningCalls::sum) .tags("name", name) .register(Metrics.globalRegistry); @@ -118,13 +119,19 @@ public void close() { batchers.asMap().values().stream().map(Batcher::close).toArray(CompletableFuture[]::new)).join(); batchers.invalidateAll(); callScheduler.close(); - capacityEstimator.close(); + CapacityEstimatorFactory.INSTANCE.close(); Metrics.globalRegistry.remove(runningCallsGauge); Metrics.globalRegistry.remove(batcherNumGauge); Metrics.globalRegistry.remove(callSubmitCounter); Metrics.globalRegistry.remove(callSchedCounter); } + @SuppressWarnings("unchecked") + private Class getReqType() { + Type type = ((ParameterizedType) getClass().getGenericSuperclass()).getActualTypeArguments()[0]; + return (Class) type; + } + private String getName() { String typeName = ((ParameterizedType) getClass().getGenericSuperclass()) .getActualTypeArguments()[0] diff --git a/base-scheduler/src/main/java/org/apache/bifromq/basescheduler/BatchCallWeighterFactory.java b/base-scheduler/src/main/java/org/apache/bifromq/basescheduler/BatchCallWeighterFactory.java new file mode 100644 index 000000000..be6895c70 --- /dev/null +++ b/base-scheduler/src/main/java/org/apache/bifromq/basescheduler/BatchCallWeighterFactory.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basescheduler; + +import java.util.Map; +import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.basehookloader.BaseHookLoader; +import org.apache.bifromq.basescheduler.spi.IBatchCallWeighter; +import org.apache.bifromq.basescheduler.spi.IBatchCallWeighterFactory; + +@Slf4j +class BatchCallWeighterFactory implements IBatchCallWeighterFactory { + public static final IBatchCallWeighterFactory INSTANCE = new BatchCallWeighterFactory(); + + private final IBatchCallWeighterFactory delegate; + + private BatchCallWeighterFactory() { + Map factoryMap = BaseHookLoader.load(IBatchCallWeighterFactory.class); + if (factoryMap.isEmpty()) { + delegate = FallbackFactory.INSTANCE; + } else { + delegate = factoryMap.values().iterator().next(); + if (factoryMap.size() > 1) { + log.warn("Multiple BatchCallWeigher implementations found, the first loaded will be used:{}", + delegate.getClass().getName()); + } + } + } + + @Override + public IBatchCallWeighter create(String name, Class reqType) { + try { + IBatchCallWeighter weighter = delegate.create(name, reqType); + if (weighter == null) { + return FallbackFactory.INSTANCE.create(name, reqType); + } + return weighter; + } catch (Throwable e) { + log.error("Failed to create BatchCallWeighter: scheduler={}", name, e); + return FallbackFactory.INSTANCE.create(name, reqType); + } + } + + private static class FallbackWeighter implements IBatchCallWeighter { + private int count = 0; + + @Override + public void add(ReqT req) { + count++; + } + + @Override + public long weight() { + return count; + } + + @Override + public void reset() { + count = 0; + } + } + + private static class FallbackFactory implements IBatchCallWeighterFactory { + private static final IBatchCallWeighterFactory INSTANCE = new FallbackFactory(); + + @Override + public IBatchCallWeighter create(String name, Class reqType) { + return new FallbackWeighter<>(); + } + } +} diff --git a/base-scheduler/src/main/java/org/apache/bifromq/basescheduler/Batcher.java b/base-scheduler/src/main/java/org/apache/bifromq/basescheduler/Batcher.java index 01f0fdff0..64200b734 100644 --- a/base-scheduler/src/main/java/org/apache/bifromq/basescheduler/Batcher.java +++ b/base-scheduler/src/main/java/org/apache/bifromq/basescheduler/Batcher.java @@ -27,9 +27,7 @@ import io.micrometer.core.instrument.Timer; import java.util.LinkedList; import java.util.Queue; -import java.util.Set; import java.util.concurrent.CompletableFuture; -import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentLinkedDeque; import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.RejectedExecutionException; @@ -37,59 +35,94 @@ import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; import lombok.extern.slf4j.Slf4j; import org.apache.bifromq.basescheduler.exception.BackPressureException; +import org.apache.bifromq.basescheduler.spi.IBatchCallWeighter; import org.apache.bifromq.basescheduler.spi.ICapacityEstimator; @Slf4j final class Batcher { + private final BatcherKeyT key; private final IBatchCallBuilder batchCallBuilder; private final Queue> batchPool; private final Queue> callTaskBuffers = new ConcurrentLinkedQueue<>(); private final AtomicReference state = new AtomicReference<>(State.RUNNING); private final AtomicBoolean triggering = new AtomicBoolean(); private final AtomicInteger pipelineDepth = new AtomicInteger(); - private final ICapacityEstimator capacityEstimator; + private final AtomicInteger queuedCallCount = new AtomicInteger(0); + private final AtomicInteger inFlightCallCount = new AtomicInteger(0); + private final ICapacityEstimator capacityEstimator; + private final IBatchCallWeighter batchCallWeighter; private final long maxBurstLatency; private final EMALong emaQueueingTime; - private final Gauge maxPipelineDepthGauge; private final Gauge pipelineDepthGauge; private final Counter dropCounter; private final Timer batchCallTimer; private final Timer batchExecTimer; private final Timer batchBuildTimer; - private final DistributionSummary batchSizeSummary; + private final DistributionSummary batchCountSummary; + private final DistributionSummary batchWeightSizeSummary; private final DistributionSummary queueingTimeSummary; + private final Gauge maxCapacityGauge; + private final Gauge queueingCountGauge; + private final Gauge inflightCountGauge; + private final Gauge inflightWeightGauge; // Future to signal shutdown completion private final CompletableFuture shutdownFuture = new CompletableFuture<>(); - // hold a ref to all running batch calls for debugging - private final Set> runningBatchCalls = ConcurrentHashMap.newKeySet(); + private final AtomicLong inFlightWeight = new AtomicLong(0L); Batcher(String name, + BatcherKeyT key, IBatchCallBuilder batchCallBuilder, long maxBurstLatency, - ICapacityEstimator capacityEstimator) { + ICapacityEstimator capacityEstimator, + IBatchCallWeighter batchCallWeighter) { + this.key = key; this.batchCallBuilder = batchCallBuilder; this.capacityEstimator = capacityEstimator; + this.batchCallWeighter = batchCallWeighter; this.maxBurstLatency = maxBurstLatency; this.batchPool = new ConcurrentLinkedDeque<>(); this.emaQueueingTime = new EMALong(System::nanoTime, 0.1, 0.9, maxBurstLatency); Tags tags = Tags.of("name", name, "key", Integer.toUnsignedString(System.identityHashCode(this))); - maxPipelineDepthGauge = Gauge.builder("batcher.pipeline.max", capacityEstimator::maxPipelineDepth) + pipelineDepthGauge = Gauge.builder("batcher.pipeline.depth", pipelineDepth::get) .tags(tags) .register(Metrics.globalRegistry); - pipelineDepthGauge = Gauge.builder("batcher.pipeline.depth", pipelineDepth::get) + dropCounter = Counter.builder("batcher.call.drop.count") + .tags(tags) + .register(Metrics.globalRegistry); + batchCallTimer = Timer.builder("batcher.call.time") + .tags(tags) + .register(Metrics.globalRegistry); + batchExecTimer = Timer.builder("batcher.exec.time") + .tags(tags) + .register(Metrics.globalRegistry); + batchBuildTimer = Timer.builder("batcher.build.time") + .tags(tags) + .register(Metrics.globalRegistry); + batchCountSummary = DistributionSummary.builder("batcher.batch.count") + .tags(tags) + .register(Metrics.globalRegistry); + batchWeightSizeSummary = DistributionSummary.builder("batcher.batch.size") + .tags(tags) + .register(Metrics.globalRegistry); + queueingTimeSummary = DistributionSummary.builder("batcher.queueing.time") + .tags(tags) + .register(Metrics.globalRegistry); + maxCapacityGauge = Gauge.builder("batcher.capacity.max", () -> capacityEstimator.maxCapacity(key)) + .tags(tags) + .register(Metrics.globalRegistry); + queueingCountGauge = Gauge.builder("batcher.queueing.count", queuedCallCount::get) + .tags(tags) + .register(Metrics.globalRegistry); + inflightCountGauge = Gauge.builder("batcher.inflight.count", inFlightCallCount::get) + .tags(tags) + .register(Metrics.globalRegistry); + inflightWeightGauge = Gauge.builder("batcher.inflight.size", inFlightWeight::get) .tags(tags) .register(Metrics.globalRegistry); - dropCounter = Counter.builder("batcher.call.drop.count").tags(tags).register(Metrics.globalRegistry); - batchCallTimer = Timer.builder("batcher.call.time").tags(tags).register(Metrics.globalRegistry); - batchExecTimer = Timer.builder("batcher.exec.time").tags(tags).register(Metrics.globalRegistry); - batchBuildTimer = Timer.builder("batcher.build.time").tags(tags).register(Metrics.globalRegistry); - batchSizeSummary = - DistributionSummary.builder("batcher.batch.size").tags(tags).register(Metrics.globalRegistry); - queueingTimeSummary = - DistributionSummary.builder("batcher.queueing.time").tags(tags).register(Metrics.globalRegistry); } public CompletableFuture submit(BatcherKeyT batcherKey, CallT request) { @@ -97,24 +130,20 @@ public CompletableFuture submit(BatcherKeyT batcherKey, CallT reque return CompletableFuture.failedFuture( new RejectedExecutionException("Batcher has been shut down")); } - if (Math.max(emaQueueingTime.get(), headCallWaitingNanos()) < maxBurstLatency) { - ICallTask callTask = new CallTask<>(batcherKey, request); - boolean offered = callTaskBuffers.offer(callTask); - assert offered; - trigger(); - return callTask.resultPromise(); - } else { + if (emaQueueingTime.get() > maxBurstLatency) { dropCounter.increment(); + if (pipelineDepth.get() == 0 && !callTaskBuffers.isEmpty()) { + trigger(); + } return CompletableFuture.failedFuture(new BackPressureException("Batch call busy")); } - } - private long headCallWaitingNanos() { - ICallTask head = callTaskBuffers.peek(); - if (head != null) { - return System.nanoTime() - head.ts(); - } - return 0; + ICallTask callTask = new CallTask<>(batcherKey, request); + boolean offered = callTaskBuffers.offer(callTask); + assert offered; + queuedCallCount.incrementAndGet(); + trigger(); + return callTask.resultPromise(); } public CompletableFuture close() { @@ -128,20 +157,25 @@ private void checkShutdownCompletion() { // If no tasks pending and no pipeline in-flight, complete if (callTaskBuffers.isEmpty() && pipelineDepth.get() == 0) { cleanupMetrics(); + batchCallWeighter.reset(); state.set(State.TERMINATED); shutdownFuture.complete(null); } } private void cleanupMetrics() { - Metrics.globalRegistry.remove(maxPipelineDepthGauge); Metrics.globalRegistry.remove(pipelineDepthGauge); Metrics.globalRegistry.remove(dropCounter); Metrics.globalRegistry.remove(batchCallTimer); Metrics.globalRegistry.remove(batchExecTimer); Metrics.globalRegistry.remove(batchBuildTimer); - Metrics.globalRegistry.remove(batchSizeSummary); + Metrics.globalRegistry.remove(batchCountSummary); + Metrics.globalRegistry.remove(batchWeightSizeSummary); Metrics.globalRegistry.remove(queueingTimeSummary); + Metrics.globalRegistry.remove(maxCapacityGauge); + Metrics.globalRegistry.remove(queueingCountGauge); + Metrics.globalRegistry.remove(inflightCountGauge); + Metrics.globalRegistry.remove(inflightWeightGauge); IBatchCall batchCall; while ((batchCall = batchPool.poll()) != null) { batchCall.destroy(); @@ -152,13 +186,13 @@ private void cleanupMetrics() { private void trigger() { if (triggering.compareAndSet(false, true)) { try { - if (!callTaskBuffers.isEmpty() && pipelineDepth.get() < capacityEstimator.maxPipelineDepth()) { + if (!callTaskBuffers.isEmpty() && capacityEstimator.hasCapacity(inFlightWeight.get(), key)) { batchAndEmit(); } } finally { triggering.set(false); - if (!callTaskBuffers.isEmpty() && pipelineDepth.get() < capacityEstimator.maxPipelineDepth()) { - trigger(); + if (!callTaskBuffers.isEmpty() && capacityEstimator.hasCapacity(inFlightWeight.get(), key)) { + this.trigger(); } } } @@ -168,52 +202,57 @@ private void batchAndEmit() { pipelineDepth.incrementAndGet(); long buildStart = System.nanoTime(); IBatchCall batchCall = borrowBatchCall(); - - int batchSize = 0; - int maxBatchSize = capacityEstimator.maxBatchSize(); + int batchedCallNums = 0; LinkedList> batchedTasks = new LinkedList<>(); ICallTask callTask; - while (batchSize < maxBatchSize && (callTask = callTaskBuffers.poll()) != null) { + batchCallWeighter.reset(); + long avail = capacityEstimator.maxCapacity(key); + while (batchCallWeighter.weight() < avail && (callTask = callTaskBuffers.poll()) != null) { + batchCallWeighter.add(callTask.call()); long queueingTime = System.nanoTime() - callTask.ts(); - queueingTimeSummary.record(queueingTime); emaQueueingTime.update(queueingTime); + queueingTimeSummary.record(queueingTime); batchCall.add(callTask); batchedTasks.add(callTask); - batchSize++; + batchedCallNums++; } - batchSizeSummary.record(batchSize); + final long batchWeight = batchCallWeighter.weight(); + queuedCallCount.addAndGet(-batchedCallNums); + inFlightCallCount.addAndGet(batchedCallNums); + batchCountSummary.record(batchedCallNums); + batchWeightSizeSummary.record(batchWeight); long execBegin = System.nanoTime(); batchBuildTimer.record(execBegin - buildStart, TimeUnit.NANOSECONDS); try { - int finalBatchSize = batchSize; + int finalBatchSize = batchedCallNums; + inFlightWeight.addAndGet(batchWeight); CompletableFuture future = batchCall.execute(); - runningBatchCalls.add(future); future - .orTimeout(maxBurstLatency, TimeUnit.NANOSECONDS) // Ensure we don't block indefinitely + .orTimeout(maxBurstLatency, TimeUnit.NANOSECONDS) .whenComplete((v, e) -> { - runningBatchCalls.remove(future); long execEnd = System.nanoTime(); if (e != null) { - if (e instanceof TimeoutException) { + if (e instanceof BackPressureException || e instanceof TimeoutException) { + capacityEstimator.onBackPressure(); batchedTasks.forEach(t -> t.resultPromise() - .completeExceptionally(new BackPressureException("Batch Call timeout", e))); - if (!future.isDone()) { - future.cancel(true); - } + .completeExceptionally(new BackPressureException("Downstream Busy", e))); + returnBatchCall(batchCall, true); } else { batchedTasks.forEach(t -> t.resultPromise().completeExceptionally(e)); + returnBatchCall(batchCall, true); } - returnBatchCall(batchCall, true); } else { - long batchCallLatency = execEnd - execBegin; - capacityEstimator.record(finalBatchSize, batchCallLatency); - batchExecTimer.record(batchCallLatency, TimeUnit.NANOSECONDS); + long execLatency = execEnd - execBegin; + batchExecTimer.record(execLatency, TimeUnit.NANOSECONDS); + capacityEstimator.record(batchWeight, execLatency); batchedTasks.forEach(t -> { long callLatency = execEnd - t.ts(); batchCallTimer.record(callLatency, TimeUnit.NANOSECONDS); }); returnBatchCall(batchCall, false); } + inFlightCallCount.addAndGet(-finalBatchSize); + inFlightWeight.addAndGet(-batchWeight); pipelineDepth.getAndDecrement(); // After each completion, check for shutdown if (state.get() == State.SHUTTING_DOWN) { @@ -227,6 +266,9 @@ private void batchAndEmit() { log.error("Batch call failed unexpectedly", e); batchedTasks.forEach(t -> t.resultPromise().completeExceptionally(e)); returnBatchCall(batchCall, true); + // decrease in-flight count by completed size on failure path + inFlightCallCount.addAndGet(-batchedCallNums); + inFlightWeight.addAndGet(-batchWeight); pipelineDepth.getAndDecrement(); if (state.get() == State.SHUTTING_DOWN) { checkShutdownCompletion(); diff --git a/base-scheduler/src/main/java/org/apache/bifromq/basescheduler/CapacityEstimatorFactory.java b/base-scheduler/src/main/java/org/apache/bifromq/basescheduler/CapacityEstimatorFactory.java index 52bb99f3f..c7664653c 100644 --- a/base-scheduler/src/main/java/org/apache/bifromq/basescheduler/CapacityEstimatorFactory.java +++ b/base-scheduler/src/main/java/org/apache/bifromq/basescheduler/CapacityEstimatorFactory.java @@ -14,16 +14,16 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basescheduler; +import java.util.Map; +import lombok.extern.slf4j.Slf4j; import org.apache.bifromq.basehookloader.BaseHookLoader; import org.apache.bifromq.basescheduler.spi.ICapacityEstimator; import org.apache.bifromq.basescheduler.spi.ICapacityEstimatorFactory; -import java.util.Map; -import lombok.extern.slf4j.Slf4j; @Slf4j class CapacityEstimatorFactory implements ICapacityEstimatorFactory { @@ -37,38 +37,49 @@ private CapacityEstimatorFactory() { delegate = FallbackFactory.INSTANCE; } else { delegate = factoryMap.values().iterator().next(); + if (factoryMap.size() > 1) { + log.warn("Multiple CapacityEstimatorFactory implementations found, the first loaded will be used:{}", + delegate.getClass().getName()); + } } } @Override - public ICapacityEstimator create(String name) { + public ICapacityEstimator get(String name, BatcherKey batcherKey) { try { - ICapacityEstimator estimator = delegate.create(name); + ICapacityEstimator estimator = delegate.get(name, batcherKey); if (estimator == null) { - return FallbackFactory.INSTANCE.create(name); + return FallbackFactory.INSTANCE.get(name, batcherKey); } return estimator; } catch (Throwable e) { - log.error("Failed to create pipelineDepthEstimator: scheduler={}", name, e); - return FallbackFactory.INSTANCE.create(name); + log.error("Failed to create CapacityEstimator: scheduler={}", name, e); + return FallbackFactory.INSTANCE.get(name, batcherKey); } } - private static class FallbackCapacityEstimator implements ICapacityEstimator { - private static final ICapacityEstimator INSTANCE = new FallbackCapacityEstimator(); + @Override + public void close() { + delegate.close(); + } + + private static class FallbackCapacityEstimator implements ICapacityEstimator { @Override - public void record(int batchSize, long latencyNs) { + public void record(long weightedSize, long latencyNs) { + } + + public boolean hasCapacity(long inflight, BatcherKey key) { + return inflight <= 0; } @Override - public int maxPipelineDepth() { - return 1; + public long maxCapacity(BatcherKey key) { + return Long.MAX_VALUE; } @Override - public int maxBatchSize() { - return Integer.MAX_VALUE; + public void onBackPressure() { } } @@ -76,8 +87,8 @@ private static class FallbackFactory implements ICapacityEstimatorFactory { private static final ICapacityEstimatorFactory INSTANCE = new FallbackFactory(); @Override - public ICapacityEstimator create(String name) { - return FallbackCapacityEstimator.INSTANCE; + public ICapacityEstimator get(String name, BatcherKey batcherKey) { + return new FallbackCapacityEstimator<>(); } } } diff --git a/base-scheduler/src/main/java/org/apache/bifromq/basescheduler/spi/IBatchCallWeighter.java b/base-scheduler/src/main/java/org/apache/bifromq/basescheduler/spi/IBatchCallWeighter.java new file mode 100644 index 000000000..7efc34e07 --- /dev/null +++ b/base-scheduler/src/main/java/org/apache/bifromq/basescheduler/spi/IBatchCallWeighter.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basescheduler.spi; + +/** + * SPI interface for estimating the weighted size of a BatchCall. + * + * @param the batched call type + */ +public interface IBatchCallWeighter { + /** + * Will be invoked by batcher when building batch call. + * + * @param req the batched call + */ + void add(ReqT req); + + /** + * The accumulated weighted size of the BatchCall. + * + * @return the weighted size + */ + long weight(); + + /** + * Reset the weighter for next use. + */ + void reset(); +} diff --git a/base-scheduler/src/main/java/org/apache/bifromq/basescheduler/spi/IBatchCallWeighterFactory.java b/base-scheduler/src/main/java/org/apache/bifromq/basescheduler/spi/IBatchCallWeighterFactory.java new file mode 100644 index 000000000..8013e95cc --- /dev/null +++ b/base-scheduler/src/main/java/org/apache/bifromq/basescheduler/spi/IBatchCallWeighterFactory.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basescheduler.spi; + +/** + * SPI factory for creating {@link IBatchCallWeighter} instances. + */ +public interface IBatchCallWeighterFactory { + /** + * The factory method to create an instance of {@link IBatchCallWeighter}. + * + * @param name the name of scheduler + * @param reqType the class of batched call + * @return an instance of {@link IBatchCallWeighter} + */ + IBatchCallWeighter create(String name, Class reqType); +} diff --git a/base-scheduler/src/main/java/org/apache/bifromq/basescheduler/spi/ICapacityEstimator.java b/base-scheduler/src/main/java/org/apache/bifromq/basescheduler/spi/ICapacityEstimator.java index 9bfce886e..96d509006 100644 --- a/base-scheduler/src/main/java/org/apache/bifromq/basescheduler/spi/ICapacityEstimator.java +++ b/base-scheduler/src/main/java/org/apache/bifromq/basescheduler/spi/ICapacityEstimator.java @@ -14,40 +14,42 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basescheduler.spi; /** - * SPI interface for estimating the capacity of a BatchCall pipeline. + * SPI interface for estimating downstream capacity. */ -public interface ICapacityEstimator { +public interface ICapacityEstimator { /** - * Callback to record the latency of a batch call. + * Record the outcome of a batch call. * - * @param batchSize the size of the batch - * @param latencyNs the latency in nanoseconds + * @param weight the total weight of the batch + * @param latencyNs the execution latency in nanoseconds */ - void record(int batchSize, long latencyNs); + void record(long weight, long latencyNs); /** - * Get the maximum pipeline depth for the BatchCall pipeline. + * Determine if it's allowed to emit batch call for given batcher. * - * @return the maximum pipeline depth + * @param inflightWeight the inflight weight of batch call + * @param batcherKey the key of the batcher + * @return if it's allowed */ - int maxPipelineDepth(); + boolean hasCapacity(long inflightWeight, BatcherKey batcherKey); /** - * Get the maximum batch size for a batch. + * Get the current maximum allowed capacity in weighted size for given batcher. * - * @return the maximum batch size + * @param batcherKey the key of the batcher + * @return the capacity budget */ - int maxBatchSize(); + long maxCapacity(BatcherKey batcherKey); /** - * Close the call scheduler. + * Notify estimator that downstream backpressure has been observed. */ - default void close() { - } + void onBackPressure(); } diff --git a/base-scheduler/src/main/java/org/apache/bifromq/basescheduler/spi/ICapacityEstimatorFactory.java b/base-scheduler/src/main/java/org/apache/bifromq/basescheduler/spi/ICapacityEstimatorFactory.java index 8e04f2e65..c168880d0 100644 --- a/base-scheduler/src/main/java/org/apache/bifromq/basescheduler/spi/ICapacityEstimatorFactory.java +++ b/base-scheduler/src/main/java/org/apache/bifromq/basescheduler/spi/ICapacityEstimatorFactory.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basescheduler.spi; @@ -27,7 +27,15 @@ public interface ICapacityEstimatorFactory { * The factory method to create an instance of {@link ICapacityEstimator}. * * @param name the name of scheduler + * @param batcherKey the key of the batcher * @return an instance of {@link ICapacityEstimator} */ - ICapacityEstimator create(String name); + ICapacityEstimator get(String name, BatcherKey batcherKey); + + /** + * Close the factory. + */ + default void close() { + + } } diff --git a/base-scheduler/src/test/java/org/apache/bifromq/basescheduler/BatcherTest.java b/base-scheduler/src/test/java/org/apache/bifromq/basescheduler/BatcherTest.java index 919d326fe..e9b1acd13 100644 --- a/base-scheduler/src/test/java/org/apache/bifromq/basescheduler/BatcherTest.java +++ b/base-scheduler/src/test/java/org/apache/bifromq/basescheduler/BatcherTest.java @@ -32,10 +32,12 @@ import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.CopyOnWriteArrayList; +import java.util.concurrent.RejectedExecutionException; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; import org.apache.bifromq.basescheduler.exception.BackPressureException; +import org.apache.bifromq.basescheduler.spi.IBatchCallWeighter; import org.apache.bifromq.basescheduler.spi.ICapacityEstimator; import org.testng.annotations.AfterMethod; import org.testng.annotations.BeforeMethod; @@ -44,13 +46,15 @@ public class BatcherTest { private RecordingBatchCallBuilder builder; private TestCapacityEstimator estimator; + private IBatchCallWeighter batchCallWeighter; private Batcher batcher; @BeforeMethod public void setup() { builder = new RecordingBatchCallBuilder(); - estimator = new TestCapacityEstimator(1, 16); - batcher = new Batcher<>("test", builder, Duration.ofSeconds(1).toNanos(), estimator); + estimator = new TestCapacityEstimator(16); + batchCallWeighter = new TestBatchCallWeighter(); + batcher = new Batcher<>("test", 1, builder, Duration.ofSeconds(1).toNanos(), estimator, batchCallWeighter); } @AfterMethod @@ -62,15 +66,18 @@ public void tearDown() { public void submitAcceptsWhenUnderBurstLatency() { // success immediate builder.setSuccessMode(); - estimator.maxBatchSize = 10; - estimator.maxPipelineDepth = 1; + estimator.maxCapacity = 10; int n = 7; List req = new ArrayList<>(); List resp = new CopyOnWriteArrayList<>(); List> futures = new ArrayList<>(); for (int i = 0; i < n; i++) { req.add(i); - futures.add(batcher.submit(0, i).whenComplete((v, e) -> {if (e == null) {resp.add(v);}})); + futures.add(batcher.submit(0, i).whenComplete((v, e) -> { + if (e == null) { + resp.add(v); + } + })); } CompletableFuture.allOf(futures.toArray(CompletableFuture[]::new)).join(); assertEquals(resp, req); @@ -79,27 +86,9 @@ public void submitAcceptsWhenUnderBurstLatency() { } @Test - public void submitDropsWhenBurstLatencyZero() { - // rebuild with 0 burst to trigger drop - batcher.close().join(); - builder = new RecordingBatchCallBuilder(); - estimator = new TestCapacityEstimator(1, 16); - batcher = new Batcher<>("test", builder, 0L, estimator); - - CompletableFuture f = batcher.submit(0, 1); - try { - f.join(); - fail(); - } catch (Throwable e) { - assertTrue(e.getCause() instanceof BackPressureException); - } - } - - @Test - public void batching_respectsMaxBatchSize() { + public void batchingRespectsMaxCapacity() { builder.setSuccessMode(); - estimator.maxBatchSize = 3; - estimator.maxPipelineDepth = 1; + estimator.maxCapacity = 3; int n = 7; List> futures = new ArrayList<>(); for (int i = 0; i < n; i++) { @@ -113,38 +102,16 @@ public void batching_respectsMaxBatchSize() { } @Test - public void pipelineDepthLimitsConcurrentExecute() { - batcher.close().join(); - builder = new RecordingBatchCallBuilder(); - estimator = new TestCapacityEstimator(1, 2); - batcher = new Batcher<>("test", builder, Duration.ofSeconds(5).toNanos(), estimator); - builder.setDelaySuccessMode(Duration.ofMillis(200)); - int n = 4; - List> futures = new ArrayList<>(); - for (int i = 0; i < n; i++) { - futures.add(batcher.submit(0, i)); - } - await().atMost(java.time.Duration.ofSeconds(1)).until(() -> builder.executeCount.get() == 1); - assertEquals(builder.executeCount.get(), 1); - await().atMost(java.time.Duration.ofSeconds(2)).until(() -> builder.executeCount.get() == 2); - CompletableFuture.allOf(futures.toArray(CompletableFuture[]::new)).join(); - int sum = builder.batchSizes.stream().mapToInt(Integer::intValue).sum(); - assertEquals(sum, n); - assertTrue(builder.batchSizes.stream().allMatch(sz -> sz <= 2)); - } - - @Test - public void timeoutCompletesTasksWithBackPressureException_andCancelsFuture() { - // small burst to trigger timeout + public void timeoutCompletesTasksWithBackPressureExceptionAndCancelsFuture() { batcher.close().join(); builder = new RecordingBatchCallBuilder(); - estimator = new TestCapacityEstimator(1, 2); - batcher = new Batcher<>("test", builder, Duration.ofMillis(20).toNanos(), estimator); + estimator = new TestCapacityEstimator(2); + batcher = new Batcher<>("test", 1, builder, Duration.ofMillis(20).toNanos(), estimator, batchCallWeighter); builder.setHoldMode(); List> futures = new ArrayList<>(); futures.add(batcher.submit(0, 1)); futures.add(batcher.submit(0, 2)); - await().atMost(java.time.Duration.ofSeconds(3)) + await().atMost(Duration.ofSeconds(3)) .until(() -> futures.stream().allMatch(CompletableFuture::isDone)); for (CompletableFuture f : futures) { try { @@ -160,7 +127,7 @@ public void timeoutCompletesTasksWithBackPressureException_andCancelsFuture() { @Test public void executeExceptionPropagatesToAllTasksAndAbortReset() { builder.setFailureMode(new IllegalStateException("failure")); - estimator.maxBatchSize = 3; + estimator.maxCapacity = 3; List> futures = new ArrayList<>(); futures.add(batcher.submit(0, 1)); futures.add(batcher.submit(0, 2)); @@ -168,7 +135,8 @@ public void executeExceptionPropagatesToAllTasksAndAbortReset() { CompletableFuture all = CompletableFuture.allOf(futures.toArray(CompletableFuture[]::new)); try { all.join(); - } catch (Throwable ignore) { + } catch (Throwable e) { + // ignore } for (CompletableFuture f : futures) { try { @@ -180,32 +148,6 @@ public void executeExceptionPropagatesToAllTasksAndAbortReset() { assertTrue(builder.resetAbortTrueCount.get() >= 1); } - @Test - public void closeCompletesWhenDrained() { - batcher.close().join(); - builder = new RecordingBatchCallBuilder(); - estimator = new TestCapacityEstimator(1, 2); - batcher = new Batcher<>("test", builder, Duration.ofSeconds(5).toNanos(), estimator); - builder.setDelaySuccessMode(Duration.ofMillis(200)); - CompletableFuture f1 = batcher.submit(0, 1); - CompletableFuture f2 = batcher.submit(0, 2); - await().atMost(java.time.Duration.ofSeconds(1)) - .until(() -> builder.executeCount.get() == 1); - CompletableFuture shutdown = batcher.close(); - assertFalse(shutdown.isDone()); - await().atMost(java.time.Duration.ofSeconds(3)).until(shutdown::isDone); - // further submit should be rejected - CompletableFuture f3 = batcher.submit(0, 3); - try { - f3.join(); - } catch (Throwable ex) { - assertTrue(ex.getCause() instanceof java.util.concurrent.RejectedExecutionException); - } - // original tasks should have completed - assertEquals(f1.join(), (Integer) 1); - assertEquals(f2.join(), (Integer) 2); - } - @Test public void closeImmediateWhenIdle() { CompletableFuture shutdown = batcher.close(); @@ -215,15 +157,14 @@ public void closeImmediateWhenIdle() { f.join(); throw new AssertionError("Expected RejectedExecutionException"); } catch (Throwable ex) { - assertTrue(ex.getCause() instanceof java.util.concurrent.RejectedExecutionException); + assertTrue(ex.getCause() instanceof RejectedExecutionException); } } @Test public void batchCallObjectReusedOnSuccess() { builder.setSuccessMode(); - estimator.maxBatchSize = 1; - estimator.maxPipelineDepth = 1; + estimator.maxCapacity = 1; // two sequential singleton batches assertEquals(batcher.submit(0, 1).join(), (Integer) 1); assertEquals(batcher.submit(0, 2).join(), (Integer) 2); @@ -235,15 +176,14 @@ public void batchCallObjectReusedOnSuccess() { @Test public void batchCallObjectReusedOnFailure() { builder.setFailureMode(new RuntimeException("x")); - estimator.maxBatchSize = 1; - estimator.maxPipelineDepth = 1; - // first fails + estimator.maxCapacity = 1; CompletableFuture f1 = batcher.submit(0, 1); + await().atMost(Duration.ofSeconds(3)).until(f1::isDone); try { f1.join(); - } catch (Throwable ignore) { + } catch (Throwable e) { + // ignore } - // switch to success and submit again builder.setSuccessMode(); assertEquals(batcher.submit(0, 2).join(), (Integer) 2); // reused same object @@ -252,33 +192,163 @@ public void batchCallObjectReusedOnFailure() { assertTrue(builder.resetAbortFalseCount.get() >= 1); } - private static class TestCapacityEstimator implements ICapacityEstimator { + @Test + public void closeWaitsForInFlightAndCleansResources() { + builder.setHoldMode(); + estimator.maxCapacity = 2; + CompletableFuture f1 = batcher.submit(0, 1); + CompletableFuture f2 = batcher.submit(0, 2); + await().atMost(Duration.ofSeconds(3)).until(() -> !builder.heldFutures.isEmpty()); + + CompletableFuture shutdown = batcher.close(); + assertFalse(shutdown.isDone()); + + // release held batch and complete successfully + builder.releaseHeldSuccess(); + assertEquals(f1.join(), (Integer) 1); + assertEquals(f2.join(), (Integer) 2); + + await().atMost(Duration.ofSeconds(3)).until(shutdown::isDone); + assertTrue(builder.resetAbortFalseCount.get() >= 1); + assertTrue(builder.destroyCount.get() >= 1); + assertEquals(builder.closeCount.get(), 1); + } + + @Test + public void submitRejectedAfterCloseWhileBusy() { + builder.setHoldMode(); + estimator.maxCapacity = 1; + CompletableFuture f = batcher.submit(0, 1); + await().atMost(Duration.ofSeconds(3)) + .until(() -> !builder.heldFutures.isEmpty()); + + CompletableFuture shutdown = batcher.close(); + assertFalse(shutdown.isDone()); + + CompletableFuture rejected = batcher.submit(0, 2); + try { + rejected.join(); + fail(); + } catch (Throwable ex) { + assertTrue(ex.getCause() instanceof RejectedExecutionException); + } + + // cleanup to finish shutdown + builder.releaseHeldSuccess(); + await().atMost(Duration.ofSeconds(3)).until(shutdown::isDone); + assertEquals(f.join(), (Integer) 1); + } + + @Test + public void executeThrowsSynchronouslyHandled() { + RuntimeException boom = new RuntimeException("sync"); + builder.setSyncThrowMode(boom); + estimator.maxCapacity = 3; + List> futures = new ArrayList<>(); + futures.add(batcher.submit(0, 1)); + futures.add(batcher.submit(0, 2)); + futures.add(batcher.submit(0, 3)); + // ensure all futures completed to avoid indefinite join + await().atMost(Duration.ofSeconds(3)) + .until(() -> futures.stream().allMatch(CompletableFuture::isDone)); + for (CompletableFuture f : futures) { + try { + f.join(); + fail(); + } catch (Throwable ex) { + assertTrue(ex.getCause() instanceof RuntimeException); + assertEquals(ex.getCause().getMessage(), "sync"); + } + } + assertTrue(builder.resetAbortTrueCount.get() >= 1); + assertEquals(builder.newBatchCallCount.get(), 1); + } + + @Test(enabled = false) + public void weightBasedCapacityRespectedWithHeavierRequests() { + builder.setSuccessMode(); + batcher.close().join(); + builder = new RecordingBatchCallBuilder(); + estimator = new TestCapacityEstimator(3); + batchCallWeighter = new HeavierBatchCallWeighter(); + batcher = new Batcher<>("test", 1, builder, Duration.ofSeconds(1).toNanos(), estimator, batchCallWeighter); + + List> futures = new ArrayList<>(); + futures.add(batcher.submit(0, 1)); + futures.add(batcher.submit(0, 2)); + futures.add(batcher.submit(0, 3)); + CompletableFuture.allOf(futures.toArray(CompletableFuture[]::new)).join(); + + assertTrue(builder.batchSizes.stream().allMatch(sz -> sz <= 1)); + } + + private static class TestCapacityEstimator implements ICapacityEstimator { final AtomicInteger recordCount = new AtomicInteger(); - volatile int maxPipelineDepth; - volatile int maxBatchSize; - volatile int lastRecordedBatchSize; + volatile long maxCapacity; + volatile long lastRecordedWeight; volatile long lastRecordedLatency; - TestCapacityEstimator(int maxPipelineDepth, int maxBatchSize) { - this.maxPipelineDepth = maxPipelineDepth; - this.maxBatchSize = maxBatchSize; + TestCapacityEstimator(long maxCapacity) { + this.maxCapacity = maxCapacity; } @Override - public void record(int batchSize, long latencyNs) { - lastRecordedBatchSize = batchSize; + public void record(long weightedSize, long latencyNs) { + lastRecordedWeight = weightedSize; lastRecordedLatency = latencyNs; recordCount.incrementAndGet(); } @Override - public int maxPipelineDepth() { - return maxPipelineDepth; + public boolean hasCapacity(long currentOccupation, Integer key) { + return maxCapacity - currentOccupation > 0; } @Override - public int maxBatchSize() { - return maxBatchSize; + public long maxCapacity(Integer key) { + return maxCapacity; + } + + @Override + public void onBackPressure() { + } + } + + private static class TestBatchCallWeighter implements IBatchCallWeighter { + private int count = 0; + + @Override + public void add(Integer req) { + count++; + } + + @Override + public long weight() { + return count; + } + + @Override + public void reset() { + count = 0; + } + } + + private static class HeavierBatchCallWeighter implements IBatchCallWeighter { + private int weight; + + @Override + public void add(Integer req) { + weight += 2; + } + + @Override + public long weight() { + return weight; + } + + @Override + public void reset() { + weight = 0; } } @@ -287,6 +357,8 @@ private static class RecordingBatchCallBuilder implements IBatchCallBuilder batchSizes = new CopyOnWriteArrayList<>(); final AtomicReference mode = new AtomicReference<>(Mode.SUCCESS); final AtomicReference failure = new AtomicReference<>(new RuntimeException("x")); @@ -306,7 +378,8 @@ void setSuccessMode() { } void setFailureMode(RuntimeException e) { - failure.set(e); mode.set(Mode.FAILURE); + failure.set(e); + mode.set(Mode.FAILURE); } void setHoldMode() { @@ -314,10 +387,31 @@ void setHoldMode() { } void setDelaySuccessMode(Duration d) { - this.delay = d; mode.set(Mode.DELAY); + this.delay = d; + mode.set(Mode.DELAY); + } + + void setSyncThrowMode(RuntimeException e) { + failure.set(e); + mode.set(Mode.SYNC_THROW); + } + + void releaseHeldSuccess() { + RecordingBatchCall c; + CompletableFuture f; + while ((c = heldCalls.poll()) != null && (f = heldFutures.poll()) != null) { + c.completeTasksSuccess(); + f.complete(null); + } + } + + @Override + public void close() { + // record builder close + closeCount.incrementAndGet(); } - enum Mode { SUCCESS, FAILURE, HOLD, DELAY } + enum Mode { SUCCESS, FAILURE, HOLD, DELAY, SYNC_THROW } } private static class RecordingBatchCall implements IBatchCall { @@ -362,6 +456,10 @@ public CompletableFuture execute() { } case DELAY -> CompletableFuture.runAsync(this::completeTasksSuccess, CompletableFuture.delayedExecutor(owner.delay.toMillis(), TimeUnit.MILLISECONDS)); + case SYNC_THROW -> { + // throw directly to simulate synchronous failure + throw owner.failure.get(); + } }; } @@ -370,5 +468,11 @@ void completeTasksSuccess() { t.resultPromise().complete(t.call()); } } + + @Override + public void destroy() { + // record batch destroy + owner.destroyCount.incrementAndGet(); + } } } diff --git a/base-util/pom.xml b/base-util/pom.xml index 701acd89b..8eaeb9142 100644 --- a/base-util/pom.xml +++ b/base-util/pom.xml @@ -56,5 +56,20 @@ org.testng testng + + org.apache.logging.log4j + log4j-api + test + + + org.apache.logging.log4j + log4j-core + test + + + org.apache.logging.log4j + log4j-slf4j2-impl + test + \ No newline at end of file diff --git a/base-util/src/main/java/org/apache/bifromq/base/util/AsyncRetry.java b/base-util/src/main/java/org/apache/bifromq/base/util/AsyncRetry.java index 5dc033097..323ffe56e 100644 --- a/base-util/src/main/java/org/apache/bifromq/base/util/AsyncRetry.java +++ b/base-util/src/main/java/org/apache/bifromq/base/util/AsyncRetry.java @@ -21,16 +21,22 @@ import static org.apache.bifromq.base.util.CompletableFutureUtil.unwrap; -import org.apache.bifromq.base.util.exception.NeedRetryException; -import org.apache.bifromq.base.util.exception.RetryTimeoutException; +import java.util.Objects; +import java.util.concurrent.CancellationException; import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CompletionException; import java.util.concurrent.Executor; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; import java.util.function.BiPredicate; import java.util.function.Supplier; +import org.apache.bifromq.base.util.exception.NeedRetryException; +import org.apache.bifromq.base.util.exception.RetryTimeoutException; /** - * An asynchronous retry utility with exponential backoff. + * An asynchronous retry utility with exponential backoff, supporting cooperative cancellation: + * cancel() on the returned future cancels the in-flight task and stops further retries. */ public class AsyncRetry { @@ -47,7 +53,9 @@ public class AsyncRetry { public static CompletableFuture exec(Supplier> taskSupplier, long retryTimeoutNanos) { return exec(taskSupplier, (result, t) -> { if (t != null) { - return t instanceof NeedRetryException || t.getCause() instanceof NeedRetryException; + Throwable cause = t instanceof CompletionException ? t.getCause() : t; + return (cause instanceof NeedRetryException) + || (cause != null && cause.getCause() instanceof NeedRetryException); } return false; }, retryTimeoutNanos / 5, retryTimeoutNanos); @@ -73,58 +81,126 @@ public static CompletableFuture exec(Supplier> taskS BiPredicate retryPredicate, long initialBackoffNanos, long maxDelayNanos) { - assert initialBackoffNanos <= maxDelayNanos; - CompletableFuture onDone = new CompletableFuture<>(); - exec(taskSupplier, retryPredicate, initialBackoffNanos, maxDelayNanos, 0, 0, onDone); - return onDone; + if (initialBackoffNanos < 0 || maxDelayNanos < 0 || initialBackoffNanos > maxDelayNanos) { + throw new IllegalArgumentException("Invalid backoff/timeout settings"); + } + + AtomicReference> current = new AtomicReference<>(); + AtomicBoolean cancelled = new AtomicBoolean(false); + CancellableRetryFuture onDone = new CancellableRetryFuture<>(current, cancelled); + + // kick off + execLoop(taskSupplier, retryPredicate, initialBackoffNanos, maxDelayNanos, 0, 0L, current, cancelled, onDone); + + return CascadeCancelCompletableFuture.fromRoot(onDone); } - private static void exec(Supplier> taskSupplier, - BiPredicate retryPredicate, - long initialBackoffNanos, - long maxDelayNanos, - int retryCount, - long delayNanosSoFar, - CompletableFuture onDone) { + private static void execLoop(Supplier> taskSupplier, + BiPredicate retryPredicate, + long initialBackoffNanos, + long maxDelayNanos, + int retryCount, + long delayNanosSoFar, + AtomicReference> current, + AtomicBoolean cancelled, + CompletableFuture onDone) { + // If already cancelled, stop immediately. + if (cancelled.get()) { + onDone.completeExceptionally(new CancellationException()); + return; + } if (initialBackoffNanos > 0 && delayNanosSoFar >= maxDelayNanos) { onDone.completeExceptionally(new RetryTimeoutException("Max retry delay exceeded")); return; } - // Execute the asynchronous task. - executeTask(taskSupplier).whenComplete(unwrap((result, t) -> { - // If the result satisfies the retry predicate, return it. - if (initialBackoffNanos == 0 || !retryPredicate.test(result, t)) { + // Execute one attempt + CompletableFuture attempt = executeTask(taskSupplier); + current.set(attempt); + + attempt.whenComplete(unwrap((result, t) -> { + // If caller cancelled during the task, respect it and stop. + if (cancelled.get()) { + // make sure attempt is cancelled + attempt.cancel(true); + return; + } + + boolean shouldRetry = false; + if (initialBackoffNanos > 0) { + // decide retry only when backoff enabled + try { + shouldRetry = retryPredicate.test(result, t); + } catch (Throwable predicateError) { + // defensive: if predicate misbehaves, treat as terminal failure + onDone.completeExceptionally(predicateError); + return; + } + } + + if (!shouldRetry) { + // terminal path if (t != null) { onDone.completeExceptionally(t); } else { onDone.complete(result); } - } else { - long delay = initialBackoffNanos * (1L << retryCount); - if (delayNanosSoFar + delay > maxDelayNanos) { - delay = maxDelayNanos - delayNanosSoFar; - } - long delayMillisSoFarNew = delayNanosSoFar + delay; - // Otherwise, schedule a retry after the calculated delay. - Executor delayExecutor = CompletableFuture.delayedExecutor(delay, TimeUnit.NANOSECONDS); - CompletableFuture.runAsync(() -> exec( - taskSupplier, - retryPredicate, - initialBackoffNanos, - maxDelayNanos, - retryCount + 1, - delayMillisSoFarNew, onDone), delayExecutor); + return; + } + + // compute next delay (exponential, capped by remaining budget) + long delay = initialBackoffNanos * (1L << Math.min(retryCount, 30)); // guard overflow + long remaining = maxDelayNanos - delayNanosSoFar; + if (delay > remaining) { + delay = remaining; } + + long nextDelaySoFar = delayNanosSoFar + delay; + + // schedule next attempt after delay; if cancelled in the meantime, the runnable will no-op. + Executor delayExecutor = CompletableFuture.delayedExecutor(delay, TimeUnit.NANOSECONDS); + CompletableFuture.runAsync(() -> execLoop( + taskSupplier, retryPredicate, + initialBackoffNanos, maxDelayNanos, + retryCount + 1, nextDelaySoFar, + current, cancelled, onDone + ), delayExecutor); })); } private static CompletableFuture executeTask(Supplier> taskSupplier) { try { - return taskSupplier.get(); + return Objects.requireNonNull(taskSupplier.get(), + "taskSupplier returned null CompletableFuture"); } catch (Throwable e) { return CompletableFuture.failedFuture(e); } } + + /** + * A CompletableFuture that can cancel the in-flight task of the retry loop. + */ + private static final class CancellableRetryFuture extends CompletableFuture { + private final AtomicReference> currentTask; + private final AtomicBoolean cancelled; + + CancellableRetryFuture(AtomicReference> currentTask, AtomicBoolean cancelled) { + this.currentTask = currentTask; + this.cancelled = cancelled; + } + + @Override + public boolean cancel(boolean mayInterruptIfRunning) { + // idempotent: set cancelled flag first + cancelled.set(true); + // best effort: cancel the current in-flight task if any + CompletableFuture inFlight = currentTask.get(); + if (inFlight != null) { + inFlight.cancel(mayInterruptIfRunning); + } + // complete this future as cancelled + return super.cancel(mayInterruptIfRunning); + } + } } \ No newline at end of file diff --git a/base-util/src/main/java/org/apache/bifromq/base/util/CascadeCancelCompletableFuture.java b/base-util/src/main/java/org/apache/bifromq/base/util/CascadeCancelCompletableFuture.java new file mode 100644 index 000000000..4b24ddcb8 --- /dev/null +++ b/base-util/src/main/java/org/apache/bifromq/base/util/CascadeCancelCompletableFuture.java @@ -0,0 +1,186 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.base.util; + +import java.lang.invoke.MethodHandles; +import java.lang.invoke.VarHandle; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CompletionStage; +import java.util.concurrent.Executor; +import java.util.function.Function; + +/** + * A CompletableFuture that supports "cascade cancel" to the first not-yet-completed upstream cancellable stage. + * + *

Semantics on cancel(tail): + *

    + *
  • If root is not completed, cancel root;
  • + *
  • Otherwise, scan forward the recorded upstream cancellable stages in creation order and + * cancel the first stage that is not yet completed (e.g. A, then B, then D...);
  • + *
  • If all recorded stages are completed, just cancel the current stage.
  • + *
+ * + *

Only stages that return a {@link CompletableFuture} (via thenCompose/exceptionallyCompose and their async variants) + * are considered "cancellable" and recorded for cascade cancellation. Pure mapping stages (thenApply, thenAccept, etc.) + * are not recorded and thus won't be directly cancelled. + */ +public final class CascadeCancelCompletableFuture extends CompletableFuture { + private final Shared shared; + + private CascadeCancelCompletableFuture(Shared shared) { + this.shared = shared; + } + + private CascadeCancelCompletableFuture(CompletableFuture root) { + this.shared = new Shared(root); + } + + /** + * Start a cascade-cancellable chain from a root future. + */ + public static CompletableFuture fromRoot(CompletableFuture root) { + CascadeCancelCompletableFuture head = new CascadeCancelCompletableFuture<>(root); + root.whenComplete((v, ex) -> { + if (ex == null) { + head.complete(v); + } else { + head.completeExceptionally(ex); + } + }); + return head; + } + + @Override + public CompletableFuture newIncompleteFuture() { + return new CascadeCancelCompletableFuture<>(shared); + } + + @Override + public boolean cancel(boolean mayInterruptIfRunning) { + cancelUpstream(mayInterruptIfRunning); + return super.cancel(mayInterruptIfRunning); + } + + private void cancelUpstream(boolean mayInterruptIfRunning) { + // Scan from root to find the first not-yet-completed cancellable + for (Node n = shared.head; n != null; n = n.next) { + CompletableFuture f = n.future; + if (!f.isDone()) { + f.cancel(mayInterruptIfRunning); + break; + } + } + } + + private void addCancelable(CompletableFuture cf) { + shared.append(cf); + } + + private Function> wrapCompose(Function> fn) { + return (T t) -> { + CompletionStage stage = fn.apply(t); + if (stage instanceof CompletableFuture) { + addCancelable((CompletableFuture) stage); + } + return stage; + }; + } + + private Function> wrapExCompose( + Function> fn) { + return (Throwable ex) -> { + CompletionStage stage = fn.apply(ex); + if (stage instanceof CompletableFuture) { + addCancelable((CompletableFuture) stage); + } + return stage; + }; + } + + // thenCompose overrides + @Override + public CompletableFuture thenCompose(Function> fn) { + return super.thenCompose(wrapCompose(fn)); + } + + @Override + public CompletableFuture thenComposeAsync(Function> fn) { + return super.thenComposeAsync(wrapCompose(fn)); + } + + @Override + public CompletableFuture thenComposeAsync(Function> fn, + Executor executor) { + return super.thenComposeAsync(wrapCompose(fn), executor); + } + + // exceptionallyCompose + @Override + public CompletableFuture exceptionallyCompose(Function> fn) { + return super.exceptionallyCompose(wrapExCompose(fn)); + } + + @Override + public CompletableFuture exceptionallyComposeAsync(Function> fn) { + return super.exceptionallyComposeAsync(wrapExCompose(fn)); + } + + @Override + public CompletableFuture exceptionallyComposeAsync(Function> fn, + Executor executor) { + return super.exceptionallyComposeAsync(wrapExCompose(fn), executor); + } + + private static final class Node { + final CompletableFuture future; + volatile Node next; + + Node(CompletableFuture future) { + this.future = future; + } + } + + private static final class Shared { + private static final VarHandle TAIL; + + static { + try { + TAIL = MethodHandles.lookup().findVarHandle(Shared.class, "tail", Node.class); + } catch (ReflectiveOperationException e) { + throw new ExceptionInInitializerError(e); + } + } + + final Node head; + volatile Node tail; + + Shared(CompletableFuture root) { + Node n = new Node(root); + this.head = n; + this.tail = n; + } + + void append(CompletableFuture cf) { + Node n = new Node(cf); + Node prev = (Node) TAIL.getAndSet(this, n); + prev.next = n; + } + } +} diff --git a/base-util/src/test/java/org/apache/bifromq/base/util/AsyncRetryTest.java b/base-util/src/test/java/org/apache/bifromq/base/util/AsyncRetryTest.java index 4c3514b67..c78b2228f 100644 --- a/base-util/src/test/java/org/apache/bifromq/base/util/AsyncRetryTest.java +++ b/base-util/src/test/java/org/apache/bifromq/base/util/AsyncRetryTest.java @@ -19,16 +19,20 @@ package org.apache.bifromq.base.util; +import static org.awaitility.Awaitility.await; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertThrows; import static org.testng.Assert.fail; import static org.testng.AssertJUnit.assertTrue; -import org.apache.bifromq.base.util.exception.RetryTimeoutException; +import java.util.concurrent.CancellationException; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; import java.util.function.Supplier; +import org.apache.bifromq.base.util.exception.RetryTimeoutException; import org.testng.annotations.Test; public class AsyncRetryTest { @@ -86,4 +90,36 @@ public void testTimeoutExceeded() throws Exception { } assertTrue(counter.get() > 0); } -} \ No newline at end of file + + @Test + public void testCancelCancelsInFlightTask() { + AtomicInteger attempts = new AtomicInteger(0); + AtomicReference> inFlightRef = new AtomicReference<>(); + + Supplier> taskSupplier = () -> { + attempts.incrementAndGet(); + CompletableFuture f = new CompletableFuture<>(); + inFlightRef.set(f); + return f; // never completes unless cancelled by AsyncRetry + }; + + CompletableFuture resultFuture = AsyncRetry.exec(taskSupplier, (result, e) -> true, + TimeUnit.MILLISECONDS.toNanos(10), TimeUnit.SECONDS.toNanos(1) + ); + + await().atMost(1, TimeUnit.SECONDS).until(() -> inFlightRef.get() != null); + + resultFuture.cancel(true); + + await().atMost(1, TimeUnit.SECONDS).until(() -> { + CompletableFuture f = inFlightRef.get(); + return f != null && f.isCancelled(); + }); + + await().atMost(1, TimeUnit.SECONDS).until(resultFuture::isCancelled); + + assertThrows(CancellationException.class, resultFuture::join); + + assertEquals(attempts.get(), 1); + } +} diff --git a/base-util/src/test/java/org/apache/bifromq/base/util/AsyncRunnerTest.java b/base-util/src/test/java/org/apache/bifromq/base/util/AsyncRunnerTest.java index 526edd107..0e8d33927 100644 --- a/base-util/src/test/java/org/apache/bifromq/base/util/AsyncRunnerTest.java +++ b/base-util/src/test/java/org/apache/bifromq/base/util/AsyncRunnerTest.java @@ -60,6 +60,8 @@ public final void tearDown(Method method) { log.info("Test case[{}.{}] finished, doing teardown", method.getDeclaringClass().getName(), method.getName()); try { + executor.shutdownNow(); + executor.awaitTermination(5, TimeUnit.SECONDS); log.info("Test case[{}.{}] teared down", method.getDeclaringClass().getName(), method.getName()); } catch (Throwable e) { @@ -92,18 +94,22 @@ public void testAwaitTask() { AtomicInteger counter = new AtomicInteger(); for (int i = 0; i < 10; i++) { queue.add(() -> new CompletableFuture() - .orTimeout(10, TimeUnit.MILLISECONDS) + // Use a slightly larger timeout to reduce CI flakiness + .orTimeout(100, TimeUnit.MILLISECONDS) .whenComplete((v, e) -> counter.incrementAndGet())); } - queue.awaitDone().toCompletableFuture().join(); + // Add a guard timeout to avoid indefinite blocking in CI + queue.awaitDone().toCompletableFuture().orTimeout(5, TimeUnit.SECONDS).join(); assertEquals(counter.get(), 10); for (int i = 0; i < 10; i++) { queue.add(() -> new CompletableFuture() - .orTimeout(10, TimeUnit.MILLISECONDS) + // Use a slightly larger timeout to reduce CI flakiness + .orTimeout(100, TimeUnit.MILLISECONDS) .whenComplete((v, e) -> counter.incrementAndGet())); } - queue.awaitDone().toCompletableFuture().join(); + // Add a guard timeout to avoid indefinite blocking in CI + queue.awaitDone().toCompletableFuture().orTimeout(5, TimeUnit.SECONDS).join(); assertEquals(counter.get(), 20); } @@ -115,8 +121,8 @@ public void testCancelRunningAsyncTask() { CountDownLatch latch1 = new CountDownLatch(1); CountDownLatch latch2 = new CountDownLatch(1); - CompletableFuture f1 = queue.add(() -> { - CompletableFuture f = new CompletableFuture<>(); + CompletableFuture f1 = queue.add(() -> { + CompletableFuture f = new CompletableFuture<>(); f.whenComplete((v, e) -> { if (f.isCancelled()) { canceled.set(true); @@ -150,9 +156,9 @@ public void testCancelNotRunning() { @Test public void testCancelAll() { AsyncRunner queue = new AsyncRunner(executor); - CompletableFuture f1 = queue.add(() -> new CompletableFuture()); - CompletableFuture f2 = queue.add(() -> new CompletableFuture()); - CompletableFuture f3 = queue.add(() -> new CompletableFuture()); + CompletableFuture f1 = queue.add(() -> new CompletableFuture<>()); + CompletableFuture f2 = queue.add(() -> new CompletableFuture<>()); + CompletableFuture f3 = queue.add(() -> new CompletableFuture<>()); queue.cancelAll(); queue.awaitDone().toCompletableFuture().join(); assertTrue(f1.isCancelled()); @@ -207,10 +213,7 @@ public void testTaskSupplierException() { RuntimeException exp = new RuntimeException(); try { queue.add(() -> { - if (true) { - throw exp; - } - return new CompletableFuture<>(); + throw exp; }).join(); fail(); } catch (Throwable e) { diff --git a/base-util/src/test/java/org/apache/bifromq/base/util/CascadeCancelCompletableFutureTest.java b/base-util/src/test/java/org/apache/bifromq/base/util/CascadeCancelCompletableFutureTest.java new file mode 100644 index 000000000..8b59e8a97 --- /dev/null +++ b/base-util/src/test/java/org/apache/bifromq/base/util/CascadeCancelCompletableFutureTest.java @@ -0,0 +1,343 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.base.util; + +import static java.util.concurrent.TimeUnit.SECONDS; +import static org.awaitility.Awaitility.await; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertSame; +import static org.testng.Assert.assertTrue; +import static org.testng.Assert.fail; + +import java.util.List; +import java.util.concurrent.CancellationException; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CompletionException; +import java.util.concurrent.CopyOnWriteArrayList; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.atomic.AtomicInteger; +import org.testng.annotations.AfterClass; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +public class CascadeCancelCompletableFutureTest { + private ExecutorService single; + + private static void assertCancelledSemantics(CompletableFuture f) { + await().atMost(1, SECONDS).until(() -> f.isCancelled() || f.isCompletedExceptionally()); + if (!f.isCancelled()) { + assertTrue(f.isCompletedExceptionally()); + try { + f.join(); + fail(); + } catch (CompletionException e) { + assertTrue(e.getCause() instanceof CancellationException); + } + } + } + + private static void assertDoneOrCancelled(CompletableFuture f) { + await().atMost(1, SECONDS).until(f::isDone); + if (!f.isCancelled() && !f.isCompletedExceptionally()) { + // done normally is acceptable for this scenario + return; + } + // otherwise, must satisfy cancelled semantics + assertCancelledSemantics(f); + } + + @BeforeClass + public void setup() { + single = Executors.newSingleThreadExecutor(); + } + + @AfterClass + public void teardown() { + if (single != null) { + single.shutdownNow(); + } + } + + @Test + public void fromRootPropagatesCompletion() throws Exception { + CompletableFuture root = new CompletableFuture<>(); + CompletableFuture head = CascadeCancelCompletableFuture.fromRoot(root); + + root.complete("ok"); + assertEquals(head.join(), "ok"); + + CompletableFuture root2 = new CompletableFuture<>(); + CompletableFuture head2 = CascadeCancelCompletableFuture.fromRoot(root2); + RuntimeException ex = new RuntimeException("Mocked"); + root2.completeExceptionally(ex); + assertTrue(head2.isCompletedExceptionally()); + try { + head2.get(); + fail(); + } catch (ExecutionException e) { + assertSame(e.getCause(), ex); + } + } + + @Test + public void typePreservedAcrossStages() { + CompletableFuture root = new CompletableFuture<>(); + CompletableFuture head = CascadeCancelCompletableFuture.fromRoot(root); + + CompletableFuture s1 = head.thenCompose(v -> CompletableFuture.completedFuture(1)); + CompletableFuture s2 = head.thenComposeAsync(v -> CompletableFuture.completedFuture(2)); + CompletableFuture s3 = head.thenApply(v -> v); + CompletableFuture s4 = head.exceptionallyCompose(ex -> CompletableFuture.completedFuture("x")); + CompletableFuture s5 = head.exceptionallyComposeAsync(ex -> CompletableFuture.completedFuture("y")); + + assertTrue(s1 instanceof CascadeCancelCompletableFuture); + assertTrue(s2 instanceof CascadeCancelCompletableFuture); + assertTrue(s3 instanceof CascadeCancelCompletableFuture); + assertTrue(s4 instanceof CascadeCancelCompletableFuture); + assertTrue(s5 instanceof CascadeCancelCompletableFuture); + } + + @Test + public void cancelHitsRootWhenRootPending() { + RecordingFuture root = new RecordingFuture<>(); + CompletableFuture head = CascadeCancelCompletableFuture.fromRoot(root); + + RecordingFuture a = new RecordingFuture<>(); + CompletableFuture aStage = head.thenCompose(v -> a); + RecordingFuture b = new RecordingFuture<>(); + CompletableFuture bStage = aStage.thenCompose(v -> b); + CompletableFuture cStage = bStage.thenApply(v -> v); + RecordingFuture d = new RecordingFuture<>(); + CompletableFuture dStage = cStage.exceptionallyCompose(ex -> d); + CompletableFuture e = dStage.thenApply(v -> v); + + e.cancel(true); + // root should be cancelled; others untouched + assertEquals(root.cancelCount.get(), 1); + assertEquals(root.cancelFlags.size(), 1); + assertTrue(root.cancelFlags.get(0)); + assertEquals(a.cancelCount.get(), 0); + assertEquals(b.cancelCount.get(), 0); + assertEquals(d.cancelCount.get(), 0); + assertCancelledSemantics(e); + } + + @Test + public void cancelHitsAWhenAExecuting() { + RecordingFuture root = new RecordingFuture<>(); + CompletableFuture head = CascadeCancelCompletableFuture.fromRoot(root); + + RecordingFuture a = new RecordingFuture<>(); + CompletableFuture aStage = head.thenCompose(v -> a); + RecordingFuture b = new RecordingFuture<>(); + CompletableFuture bStage = aStage.thenCompose(v -> b); + CompletableFuture cStage = bStage.thenApply(v -> v); + RecordingFuture d = new RecordingFuture<>(); + CompletableFuture dStage = cStage.exceptionallyCompose(ex -> d); + CompletableFuture e = dStage.thenApply(v -> v); + + root.complete("rootDone"); + // ensure A is created and pending + await().atMost(1, SECONDS).until(() -> !a.isDone()); + + e.cancel(false); + assertEquals(a.cancelCount.get(), 1); + assertEquals(a.cancelFlags.size(), 1); + assertFalse(a.cancelFlags.get(0)); + assertEquals(root.cancelCount.get(), 0); // root already completed, should not be cancelled now + assertEquals(b.cancelCount.get(), 0); + assertEquals(d.cancelCount.get(), 0); + assertCancelledSemantics(e); + } + + @Test + public void cancelHitsB_whenBExecuting() { + RecordingFuture root = new RecordingFuture<>(); + CompletableFuture head = CascadeCancelCompletableFuture.fromRoot(root); + + RecordingFuture a = new RecordingFuture<>(); + CompletableFuture aStage = head.thenCompose(v -> a); + RecordingFuture b = new RecordingFuture<>(); + CompletableFuture bStage = aStage.thenCompose(v -> b); + CompletableFuture cStage = bStage.thenApply(v -> v); + RecordingFuture d = new RecordingFuture<>(); + CompletableFuture dStage = cStage.exceptionallyCompose(ex -> d); + CompletableFuture e = dStage.thenApply(v -> v); + + root.complete("rootDone"); + await().atMost(1, SECONDS).until(() -> !a.isDone()); + a.complete("aDone"); // advance to B + await().atMost(1, SECONDS).until(() -> !b.isDone()); + + e.cancel(true); + assertEquals(b.cancelCount.get(), 1); + assertEquals(b.cancelFlags.size(), 1); + assertTrue(b.cancelFlags.get(0)); + assertEquals(root.cancelCount.get(), 0); + assertEquals(a.cancelCount.get(), 0); + assertEquals(d.cancelCount.get(), 0); + assertCancelledSemantics(e); + } + + @Test + public void cancelHitsDWhenDExecutingExceptionPath() { + RecordingFuture root = new RecordingFuture<>(); + CompletableFuture head = CascadeCancelCompletableFuture.fromRoot(root); + + RecordingFuture a = new RecordingFuture<>(); + CompletableFuture aStage = head.thenCompose(v -> a); + RecordingFuture b = new RecordingFuture<>(); + CompletableFuture bStage = aStage.thenCompose(v -> b); + // C throws, to enter exceptionallyCompose(D) + CompletableFuture cStage = bStage.thenApply(v -> { + throw new RuntimeException("C failed"); + }); + RecordingFuture d = new RecordingFuture<>(); + CompletableFuture dStage = cStage.exceptionallyCompose(ex -> d); + CompletableFuture e = dStage.thenApply(v -> v); + + root.complete("rootDone"); + a.complete("aDone"); + b.complete("bDone"); + // ensure D exists and pending + await().atMost(1, SECONDS).until(() -> !d.isDone()); + + e.cancel(false); + assertEquals(d.cancelCount.get(), 1); + assertEquals(d.cancelFlags.size(), 1); + assertFalse(d.cancelFlags.get(0)); + assertEquals(root.cancelCount.get(), 0); + assertEquals(a.cancelCount.get(), 0); + assertEquals(b.cancelCount.get(), 0); + assertCancelledSemantics(e); + } + + @Test + public void cancelOnlyTailWhenAllUpstreamDone() { + RecordingFuture root = new RecordingFuture<>(); + CompletableFuture head = CascadeCancelCompletableFuture.fromRoot(root); + + RecordingFuture a = new RecordingFuture<>(); + CompletableFuture aStage = head.thenCompose(v -> a); + RecordingFuture b = new RecordingFuture<>(); + CompletableFuture bStage = aStage.thenCompose(v -> b); + CompletableFuture cStage = bStage.thenApply(v -> v); + RecordingFuture d = new RecordingFuture<>(); + CompletableFuture dStage = cStage.exceptionallyCompose(ex -> d); + CompletableFuture e = dStage.thenApply(v -> v); + + root.complete("rootDone"); + a.complete("aDone"); + b.complete("bDone"); + + // cancel tail + e.cancel(true); + assertEquals(root.cancelCount.get(), 0); + assertEquals(a.cancelCount.get(), 0); + assertEquals(b.cancelCount.get(), 0); + assertEquals(d.cancelCount.get(), 0); + assertDoneOrCancelled(e); + } + + @Test + public void asyncComposeCancelHitsFirstPending() { + RecordingFuture root = new RecordingFuture<>(); + CompletableFuture head = CascadeCancelCompletableFuture.fromRoot(root); + + RecordingFuture a = new RecordingFuture<>(); + CompletableFuture aStage = head.thenComposeAsync(v -> a, single); + RecordingFuture b = new RecordingFuture<>(); + CompletableFuture bStage = aStage.thenComposeAsync(v -> b, single); + CompletableFuture cStage = bStage.thenApplyAsync(v -> v, single); + RecordingFuture d = new RecordingFuture<>(); + CompletableFuture dStage = cStage.exceptionallyComposeAsync(ex -> d, single); + CompletableFuture e = dStage.thenApplyAsync(v -> v, single); + + root.complete("rootDone"); + await().atMost(1, SECONDS).until(() -> !a.isDone()); + + e.cancel(false); + assertEquals(a.cancelCount.get(), 1); + assertFalse(a.cancelFlags.get(0)); + assertCancelledSemantics(e); + } + + @Test + public void immediateCompletedComposeSkipsToNextPending() { + RecordingFuture root = new RecordingFuture<>(); + CompletableFuture head = CascadeCancelCompletableFuture.fromRoot(root); + + CompletableFuture aCompleted = CompletableFuture.completedFuture("aDone"); + CompletableFuture aStage = head.thenCompose(v -> aCompleted); + RecordingFuture b = new RecordingFuture<>(); + CompletableFuture bStage = aStage.thenCompose(v -> b); + CompletableFuture cStage = bStage.thenApply(v -> v); + CompletableFuture dStage = cStage.exceptionallyCompose(ex -> CompletableFuture.completedFuture("dv")); + CompletableFuture e = dStage.thenApply(v -> v); + + root.complete("rootDone"); + await().atMost(1, SECONDS).until(() -> !b.isDone()); + + e.cancel(true); + assertEquals(b.cancelCount.get(), 1); + assertTrue(b.cancelFlags.get(0)); + // exceptionallyCompose recovers to a normal value, + assertDoneOrCancelled(e); + } + + @Test + public void idempotentCancelNoThrow() { + RecordingFuture root = new RecordingFuture<>(); + CompletableFuture head = CascadeCancelCompletableFuture.fromRoot(root); + + RecordingFuture a = new RecordingFuture<>(); + CompletableFuture aStage = head.thenCompose(v -> a); + RecordingFuture b = new RecordingFuture<>(); + CompletableFuture bStage = aStage.thenCompose(v -> b); + CompletableFuture cStage = bStage.thenApply(v -> v); + RecordingFuture d = new RecordingFuture<>(); + CompletableFuture dStage = cStage.exceptionallyCompose(ex -> d); + CompletableFuture e = dStage.thenApply(v -> v); + + root.complete("rootDone"); + await().atMost(1, SECONDS).until(() -> !a.isDone()); + + e.cancel(false); + e.cancel(false); + // A should be cancelled at least once; no exception thrown + assertTrue(a.cancelCount.get() >= 1); + assertCancelledSemantics(e); + } + + private static class RecordingFuture extends CompletableFuture { + final List cancelFlags = new CopyOnWriteArrayList<>(); + final AtomicInteger cancelCount = new AtomicInteger(); + + @Override + public boolean cancel(boolean mayInterruptIfRunning) { + cancelFlags.add(mayInterruptIfRunning); + cancelCount.incrementAndGet(); + return super.cancel(mayInterruptIfRunning); + } + } +} diff --git a/bifromq-bom/pom.xml b/bifromq-bom/pom.xml index 3b2b7dbc9..398052b5e 100644 --- a/bifromq-bom/pom.xml +++ b/bifromq-bom/pom.xml @@ -118,7 +118,17 @@ org.apache.bifromq - base-kv-local-engine + base-kv-local-engine-spi + ${project.version} + + + org.apache.bifromq + base-kv-local-engine-rocksdb + ${project.version} + + + org.apache.bifromq + base-kv-local-engine-memory ${project.version} @@ -136,6 +146,16 @@ base-kv-store-client ${project.version} + + org.apache.bifromq + base-kv-store-coproc-api + ${project.version} + + + org.apache.bifromq + base-kv-split-hinter-spi + ${project.version} + org.apache.bifromq base-kv-store-server diff --git a/bifromq-deliverer/src/main/java/org/apache/bifromq/deliverer/BatchDeliveryCall.java b/bifromq-deliverer/src/main/java/org/apache/bifromq/deliverer/BatchDeliveryCall.java index 905608154..d12e7d067 100644 --- a/bifromq-deliverer/src/main/java/org/apache/bifromq/deliverer/BatchDeliveryCall.java +++ b/bifromq-deliverer/src/main/java/org/apache/bifromq/deliverer/BatchDeliveryCall.java @@ -39,6 +39,7 @@ import lombok.extern.slf4j.Slf4j; import org.apache.bifromq.basescheduler.IBatchCall; import org.apache.bifromq.basescheduler.ICallTask; +import org.apache.bifromq.basescheduler.exception.BackPressureException; import org.apache.bifromq.dist.client.IDistClient; import org.apache.bifromq.plugin.subbroker.DeliveryPack; import org.apache.bifromq.plugin.subbroker.DeliveryPackage; @@ -157,6 +158,7 @@ private CompletableFuture execute(DeliveryRequest request, while ((task = tasks.poll()) != null) { task.resultPromise().complete(BACK_PRESSURE_REJECTED); } + throw new BackPressureException("Batch delivery call back-pressured"); } default -> { assert reply.getCode() == DeliveryReply.Code.ERROR; diff --git a/bifromq-dist/bifromq-dist-client/src/main/java/org/apache/bifromq/dist/client/scheduler/BatchPubCall.java b/bifromq-dist/bifromq-dist-client/src/main/java/org/apache/bifromq/dist/client/scheduler/BatchPubCall.java index 516ca6ecc..9361f74a3 100644 --- a/bifromq-dist/bifromq-dist-client/src/main/java/org/apache/bifromq/dist/client/scheduler/BatchPubCall.java +++ b/bifromq-dist/bifromq-dist-client/src/main/java/org/apache/bifromq/dist/client/scheduler/BatchPubCall.java @@ -30,6 +30,7 @@ import org.apache.bifromq.baserpc.client.IRPCClient; import org.apache.bifromq.basescheduler.IBatchCall; import org.apache.bifromq.basescheduler.ICallTask; +import org.apache.bifromq.basescheduler.exception.BackPressureException; import org.apache.bifromq.dist.client.PubResult; import org.apache.bifromq.dist.rpc.proto.DistReply; import org.apache.bifromq.dist.rpc.proto.DistRequest; @@ -134,6 +135,7 @@ private CompletableFuture execute(Queue { assert reply.getCode() == DistReply.Code.ERROR; diff --git a/bifromq-dist/bifromq-dist-server/src/main/java/org/apache/bifromq/dist/server/DistResponsePipeline.java b/bifromq-dist/bifromq-dist-server/src/main/java/org/apache/bifromq/dist/server/DistResponsePipeline.java index 942d5ef8a..4e022d6c7 100644 --- a/bifromq-dist/bifromq-dist-server/src/main/java/org/apache/bifromq/dist/server/DistResponsePipeline.java +++ b/bifromq-dist/bifromq-dist-server/src/main/java/org/apache/bifromq/dist/server/DistResponsePipeline.java @@ -27,6 +27,8 @@ import io.grpc.stub.StreamObserver; import java.time.Duration; import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import lombok.extern.slf4j.Slf4j; import org.apache.bifromq.baseenv.MemUsage; @@ -53,6 +55,8 @@ class DistResponsePipeline extends ResponsePipeline { private final int callQueueIdx = DistQueueAllocator.allocate(); private final IEventCollector eventCollector; private final IDistWorkerCallScheduler distCallScheduler; + private final ConcurrentLinkedQueue tasks = new ConcurrentLinkedQueue<>(); + private final AtomicBoolean draining = new AtomicBoolean(false); DistResponsePipeline(IDistWorkerCallScheduler distCallScheduler, StreamObserver responseObserver, @@ -65,7 +69,8 @@ class DistResponsePipeline extends ResponsePipeline { @Override protected CompletableFuture handleRequest(String tenantId, DistRequest request) { - return distCallScheduler.schedule(new TenantPubRequest(tenantId, request.getMessagesList(), callQueueIdx)) + CompletableFuture work = distCallScheduler + .schedule(new TenantPubRequest(tenantId, request.getMessagesList(), callQueueIdx)) .handle(unwrap((fanoutByTopic, e) -> { DistReply.Builder replyBuilder = DistReply.newBuilder().setReqId(request.getReqId()); if (e != null) { @@ -109,6 +114,47 @@ protected CompletableFuture handleRequest(String tenantId, DistReques } return replyBuilder.build(); })); + ReplyTask task = new ReplyTask(request, work); + tasks.add(task); + work.whenComplete((v, e) -> drain()); + return task.onDone; + } + + private void drain() { + while (true) { + if (!draining.compareAndSet(false, true)) { + return; + } + try { + while (true) { + ReplyTask head = tasks.peek(); + if (head == null) { + break; + } + if (!head.work.isDone()) { + break; + } + tasks.poll(); + bridge(head.work, head.onDone); + } + } finally { + draining.set(false); + } + ReplyTask head = tasks.peek(); + if (head == null || !head.work.isDone()) { + return; + } + } + } + + private void bridge(CompletableFuture from, CompletableFuture to) { + from.whenComplete((v, e) -> { + if (e != null) { + to.completeExceptionally(e); + } else { + to.complete(v); + } + }); } private static class DistQueueAllocator { @@ -119,4 +165,16 @@ public static int allocate() { return IDX.getAndIncrement() % QUEUE_NUMS; } } + + private static class ReplyTask { + final DistRequest request; + final CompletableFuture work; // real work + final CompletableFuture onDone; // ordered future returned to pipeline + + ReplyTask(DistRequest request, CompletableFuture work) { + this.request = request; + this.work = work; + this.onDone = new CompletableFuture<>(); + } + } } diff --git a/bifromq-dist/bifromq-dist-server/src/main/java/org/apache/bifromq/dist/server/scheduler/TenantRangeLookupCache.java b/bifromq-dist/bifromq-dist-server/src/main/java/org/apache/bifromq/dist/server/scheduler/TenantRangeLookupCache.java index f8679c1ee..dc52b9d88 100644 --- a/bifromq-dist/bifromq-dist-server/src/main/java/org/apache/bifromq/dist/server/scheduler/TenantRangeLookupCache.java +++ b/bifromq-dist/bifromq-dist-server/src/main/java/org/apache/bifromq/dist/server/scheduler/TenantRangeLookupCache.java @@ -30,10 +30,13 @@ import com.github.benmanes.caffeine.cache.LoadingCache; import com.google.protobuf.ByteString; import java.time.Duration; +import java.util.AbstractCollection; import java.util.Collection; +import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.NavigableMap; +import java.util.Objects; import java.util.Optional; import org.apache.bifromq.basekv.client.KVRangeSetting; import org.apache.bifromq.basekv.proto.Boundary; @@ -60,7 +63,8 @@ Collection lookup(String topic, NavigableMap allCandidates = findByBoundary(tenantBoundary, effectiveRouter); - return cache.get(new CacheKey(tenantId, topic, List.copyOf(allCandidates))); + // Wrap candidates in an order-sensitive, immutable view without copying elements + return cache.get(new CacheKey(tenantId, topic, new CandidatesView(allCandidates))); } private Collection lookup(CacheKey key) { @@ -101,6 +105,72 @@ private Collection lookup(CacheKey key) { } } - private record CacheKey(String tenantId, String topic, List candidates) { + private record CacheKey(String tenantId, String topic, CandidatesView candidates) { + } + + private static final class CandidatesView extends AbstractCollection { + private final Collection delegate; + private final int size; + private final int hash; + + CandidatesView(Collection delegate) { + this.delegate = delegate; + this.size = delegate.size(); + int h = 1; + for (KVRangeSetting e : delegate) { + h = 31 * h + (e == null ? 0 : e.hashCode()); + } + this.hash = h; + } + + @Override + public Iterator iterator() { + // read-only iteration + final Iterator it = delegate.iterator(); + return new Iterator<>() { + @Override + public boolean hasNext() { + return it.hasNext(); + } + + @Override + public KVRangeSetting next() { + return it.next(); + } + }; + } + + @Override + public int size() { + return size; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (!(obj instanceof Collection other)) { + return false; + } + if (other.size() != size) { + return false; + } + Iterator it1 = this.iterator(); + Iterator it2 = other.iterator(); + while (it1.hasNext() && it2.hasNext()) { + Object o1 = it1.next(); + Object o2 = it2.next(); + if (!(Objects.equals(o1, o2))) { + return false; + } + } + return !(it1.hasNext() || it2.hasNext()); + } + + @Override + public int hashCode() { + return hash; + } } } diff --git a/bifromq-dist/bifromq-dist-server/src/test/java/org/apache/bifromq/dist/server/DistResponsePipelineTest.java b/bifromq-dist/bifromq-dist-server/src/test/java/org/apache/bifromq/dist/server/DistResponsePipelineTest.java new file mode 100644 index 000000000..8a0e025dd --- /dev/null +++ b/bifromq-dist/bifromq-dist-server/src/test/java/org/apache/bifromq/dist/server/DistResponsePipelineTest.java @@ -0,0 +1,154 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.dist.server; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.when; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; + +import io.grpc.Context; +import io.grpc.stub.ServerCallStreamObserver; +import io.micrometer.core.instrument.Timer; +import io.micrometer.core.instrument.simple.SimpleMeterRegistry; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.TimeUnit; +import lombok.SneakyThrows; +import org.apache.bifromq.baserpc.RPCContext; +import org.apache.bifromq.baserpc.metrics.IRPCMeter; +import org.apache.bifromq.baserpc.metrics.RPCMetric; +import org.apache.bifromq.dist.rpc.proto.DistReply; +import org.apache.bifromq.dist.rpc.proto.DistRequest; +import org.apache.bifromq.dist.server.scheduler.IDistWorkerCallScheduler; +import org.apache.bifromq.type.ClientInfo; +import org.apache.bifromq.type.PublisherMessagePack; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class DistResponsePipelineTest { + private AutoCloseable closeable; + + @Mock + private IDistWorkerCallScheduler scheduler; + + @Mock + private ServerCallStreamObserver responseObserver; + + @BeforeMethod + public void setup() { + closeable = MockitoAnnotations.openMocks(this); + setupContext(); + } + + private void setupContext() { + Map metadata = new HashMap<>(); + Context.current() + .withValue(RPCContext.METER_KEY_CTX_KEY, createMockMeter()) + .withValue(RPCContext.TENANT_ID_CTX_KEY, "tenantId") + .withValue(RPCContext.CUSTOM_METADATA_CTX_KEY, metadata) + .attach(); + } + + private IRPCMeter.IRPCMethodMeter createMockMeter() { + return new IRPCMeter.IRPCMethodMeter() { + @Override + public void recordCount(RPCMetric metric) { + } + + @Override + public void recordCount(RPCMetric metric, double inc) { + } + + @Override + public Timer timer(RPCMetric metric) { + return Timer.builder("dummy").register(new SimpleMeterRegistry()); + } + + @Override + public void recordSummary(RPCMetric metric, int depth) { + } + }; + } + + @SneakyThrows + @AfterMethod + public void tearDown() { + closeable.close(); + } + + @Test + public void testEmitOrderWithConcurrentBatches() throws Exception { + DistRequest r1 = DistRequest.newBuilder() + .setReqId(1) + .addMessages(publisher("tenantA", topicPack("t/1"), topicPack("t/2"))) + .build(); + DistRequest r2 = DistRequest.newBuilder() + .setReqId(2) + .addMessages(publisher("tenantA", topicPack("t/3"))) + .build(); + + CompletableFuture> f1 = new CompletableFuture<>(); + CompletableFuture> f2 = new CompletableFuture<>(); + when(scheduler.schedule(any())).thenReturn(f1).thenReturn(f2); + + DistResponsePipeline pipeline = new DistResponsePipeline(scheduler, responseObserver, event -> {}); + + CompletableFuture resp1 = pipeline.handleRequest("tenantA", r1); + CompletableFuture resp2 = pipeline.handleRequest("tenantA", r2); + + Map m2 = new HashMap<>(); + m2.put("t/3", 1); + f2.complete(m2); + + assertFalse(resp2.isDone()); + + Map m1 = new HashMap<>(); + m1.put("t/1", 1); + m1.put("t/2", 2); + f1.complete(m1); + + DistReply reply1 = resp1.get(3, TimeUnit.SECONDS); + DistReply reply2 = resp2.get(3, TimeUnit.SECONDS); + + assertEquals(reply1.getReqId(), 1L); + assertEquals(reply2.getReqId(), 2L); + assertEquals(reply1.getResultsCount(), r1.getMessagesCount()); + assertEquals(reply2.getResultsCount(), r2.getMessagesCount()); + } + + private PublisherMessagePack.TopicPack topicPack(String topic) { + return PublisherMessagePack.TopicPack.newBuilder().setTopic(topic).build(); + } + + private PublisherMessagePack publisher(String tenantId, PublisherMessagePack.TopicPack... packs) { + PublisherMessagePack.Builder b = PublisherMessagePack.newBuilder() + .setPublisher(ClientInfo.newBuilder().setTenantId(tenantId).build()); + for (PublisherMessagePack.TopicPack p : packs) { + b.addMessagePack(p); + } + return b.build(); + } +} + diff --git a/bifromq-dist/bifromq-dist-server/src/test/java/org/apache/bifromq/dist/server/DistServiceTest.java b/bifromq-dist/bifromq-dist-server/src/test/java/org/apache/bifromq/dist/server/DistServiceTest.java index b65f248fb..9cc5e858d 100644 --- a/bifromq-dist/bifromq-dist-server/src/test/java/org/apache/bifromq/dist/server/DistServiceTest.java +++ b/bifromq-dist/bifromq-dist-server/src/test/java/org/apache/bifromq/dist/server/DistServiceTest.java @@ -24,7 +24,9 @@ import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.Mockito.when; +import com.google.protobuf.Struct; import java.time.Duration; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import lombok.extern.slf4j.Slf4j; @@ -33,7 +35,6 @@ import org.apache.bifromq.basecrdt.service.CRDTServiceOptions; import org.apache.bifromq.basecrdt.service.ICRDTService; import org.apache.bifromq.basekv.client.IBaseKVStoreClient; -import org.apache.bifromq.basekv.localengine.memory.InMemKVEngineConfigurator; import org.apache.bifromq.basekv.metaservice.IBaseKVMetaService; import org.apache.bifromq.basekv.store.option.KVRangeStoreOptions; import org.apache.bifromq.basekv.utils.BoundaryUtil; @@ -48,10 +49,13 @@ import org.apache.bifromq.plugin.resourcethrottler.IResourceThrottler; import org.apache.bifromq.plugin.settingprovider.ISettingProvider; import org.apache.bifromq.plugin.settingprovider.Setting; +import org.apache.bifromq.plugin.subbroker.CheckReply; +import org.apache.bifromq.plugin.subbroker.CheckRequest; import org.apache.bifromq.plugin.subbroker.IDeliverer; import org.apache.bifromq.plugin.subbroker.ISubBroker; import org.apache.bifromq.plugin.subbroker.ISubBrokerManager; import org.mockito.Mock; +import org.mockito.Mockito; import org.mockito.MockitoAnnotations; import org.testng.annotations.AfterClass; import org.testng.annotations.BeforeClass; @@ -90,6 +94,15 @@ public void setup() { closeable = MockitoAnnotations.openMocks(this); when(subBrokerMgr.get(anyInt())).thenReturn(inboxBroker); when(inboxBroker.open(anyString())).thenReturn(inboxDeliverer); + Mockito.lenient().when(inboxBroker.check(Mockito.any())).thenAnswer(invocation -> { + // build a CheckReply with OK codes aligned with request size + CheckRequest req = invocation.getArgument(0); + CheckReply.Builder reply = CheckReply.newBuilder(); + for (int i = 0; i < req.getMatchInfoCount(); i++) { + reply.addCode(CheckReply.Code.OK); + } + return CompletableFuture.completedFuture(reply.build()); + }); bgTaskExecutor = Executors.newSingleThreadScheduledExecutor(); AgentHostOptions agentHostOpts = AgentHostOptions.builder() .addr("127.0.0.1") @@ -108,8 +121,11 @@ public void setup() { distClient = IDistClient.newBuilder().trafficService(trafficService).build(); KVRangeStoreOptions kvRangeStoreOptions = new KVRangeStoreOptions(); - kvRangeStoreOptions.setDataEngineConfigurator(new InMemKVEngineConfigurator()); - kvRangeStoreOptions.setWalEngineConfigurator(new InMemKVEngineConfigurator()); + Struct memConf = Struct.newBuilder().build(); + kvRangeStoreOptions.setDataEngineType("memory"); + kvRangeStoreOptions.setDataEngineConf(memConf); + kvRangeStoreOptions.setWalEngineType("memory"); + kvRangeStoreOptions.setWalEngineConf(memConf); workerClient = IBaseKVStoreClient .newBuilder() diff --git a/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/DistWorker.java b/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/DistWorker.java index 3dfc574a0..3efd55941 100644 --- a/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/DistWorker.java +++ b/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/DistWorker.java @@ -70,10 +70,8 @@ public DistWorker(DistWorkerBuilder builder) { builder.subBrokerManager, this.messageDeliverer, builder.settingProvider, - builder.loadEstimateWindow, builder.fanoutParallelism, - builder.inlineFanoutThreshold, - builder.fanoutSplitThreshold); + builder.inlineFanoutThreshold); Map loadedFactories = BaseHookLoader.load(IDistWorkerBalancerFactory.class); for (String factoryName : builder.balancerFactoryConfig.keySet()) { if (!loadedFactories.containsKey(factoryName)) { diff --git a/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/DistWorkerBuilder.java b/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/DistWorkerBuilder.java index a735dc366..8bb8a16f4 100644 --- a/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/DistWorkerBuilder.java +++ b/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/DistWorkerBuilder.java @@ -40,7 +40,6 @@ import org.apache.bifromq.plugin.subbroker.ISubBrokerManager; import org.apache.bifromq.sysprops.props.DistFanOutParallelism; import org.apache.bifromq.sysprops.props.DistInlineFanOutThreshold; -import org.apache.bifromq.sysprops.props.DistWorkerFanOutSplitThreshold; /** * The builder for building Dist Worker. @@ -69,9 +68,7 @@ public class DistWorkerBuilder { Duration zombieProbeDelay = Duration.ofSeconds(15); Duration balancerRetryDelay = Duration.ofSeconds(5); Map balancerFactoryConfig = new HashMap<>(); - Duration loadEstimateWindow = Duration.ofSeconds(5); int fanoutParallelism = DistFanOutParallelism.INSTANCE.get(); - int fanoutSplitThreshold = DistWorkerFanOutSplitThreshold.INSTANCE.get(); int inlineFanoutThreshold = DistInlineFanOutThreshold.INSTANCE.get(); Map attributes = new HashMap<>(); diff --git a/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/DistWorkerCoProc.java b/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/DistWorkerCoProc.java index c811d684c..19b2da1c5 100644 --- a/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/DistWorkerCoProc.java +++ b/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/DistWorkerCoProc.java @@ -59,10 +59,10 @@ import lombok.extern.slf4j.Slf4j; import org.apache.bifromq.basekv.proto.Boundary; import org.apache.bifromq.basekv.proto.KVRangeId; -import org.apache.bifromq.basekv.store.api.IKVCloseableReader; import org.apache.bifromq.basekv.store.api.IKVIterator; import org.apache.bifromq.basekv.store.api.IKVRangeCoProc; -import org.apache.bifromq.basekv.store.api.IKVReader; +import org.apache.bifromq.basekv.store.api.IKVRangeReader; +import org.apache.bifromq.basekv.store.api.IKVRangeRefreshableReader; import org.apache.bifromq.basekv.store.api.IKVWriter; import org.apache.bifromq.basekv.store.proto.ROCoProcInput; import org.apache.bifromq.basekv.store.proto.ROCoProcOutput; @@ -103,7 +103,7 @@ @Slf4j class DistWorkerCoProc implements IKVRangeCoProc { - private final Supplier readerProvider; + private final Supplier readerProvider; private final ISubscriptionCache routeCache; private final ITenantsStats tenantsState; private final IDeliverExecutorGroup deliverExecutorGroup; @@ -112,12 +112,12 @@ class DistWorkerCoProc implements IKVRangeCoProc { private transient Boundary boundary; public DistWorkerCoProc(KVRangeId id, - Supplier readerProvider, + Supplier refreshableReaderProvider, ISubscriptionCache routeCache, ITenantsStats tenantsState, IDeliverExecutorGroup deliverExecutorGroup, ISubscriptionCleaner subscriptionChecker) { - this.readerProvider = readerProvider; + this.readerProvider = refreshableReaderProvider; this.routeCache = routeCache; this.tenantsState = tenantsState; this.deliverExecutorGroup = deliverExecutorGroup; @@ -125,7 +125,7 @@ public DistWorkerCoProc(KVRangeId id, } @Override - public CompletableFuture query(ROCoProcInput input, IKVReader reader) { + public CompletableFuture query(ROCoProcInput input, IKVRangeReader reader) { try { DistServiceROCoProcInput coProcInput = input.getDistService(); switch (coProcInput.getInputCase()) { @@ -155,7 +155,10 @@ public CompletableFuture query(ROCoProcInput input, IKVReader re @SneakyThrows @Override - public Supplier mutate(RWCoProcInput input, IKVReader reader, IKVWriter writer, boolean isLeader) { + public Supplier mutate(RWCoProcInput input, + IKVRangeReader reader, + IKVWriter writer, + boolean isLeader) { DistServiceRWCoProcInput coProcInput = input.getDistService(); log.trace("Receive rw co-proc request\n{}", coProcInput); // tenantId -> topicFilter @@ -200,14 +203,13 @@ public Supplier mutate(RWCoProcInput input, IKVReader reader, IK })); } afterMutate.get().run(); - refreshFact(reader, addedMatches, removedMatches, + refreshFact(addedMatches, removedMatches, coProcInput.getTypeCase() == DistServiceRWCoProcInput.TypeCase.BATCHMATCH); return new MutationResult(output, Optional.of(Any.pack(fact))); }; } - private void refreshFact(IKVReader reader, - NavigableMap>> added, + private void refreshFact(NavigableMap>> added, NavigableMap>> removed, boolean isAdd) { if (added.isEmpty() && removed.isEmpty()) { @@ -246,9 +248,10 @@ private void refreshFact(IKVReader reader, } } if (needRefresh) { - reader.refresh(); - IKVIterator itr = reader.iterator(); - setFact(itr); + try (IKVRangeRefreshableReader reader = readerProvider.get(); IKVIterator itr = reader.iterator()) { + reader.refresh(); + setFact(itr); + } } } @@ -280,10 +283,9 @@ private void setFact(IKVIterator itr) { @Override public Any reset(Boundary boundary) { tenantsState.reset(); - try (IKVCloseableReader reader = readerProvider.get()) { + try (IKVRangeRefreshableReader reader = readerProvider.get(); IKVIterator itr = reader.iterator()) { this.boundary = boundary; routeCache.reset(boundary); - IKVIterator itr = reader.iterator(); setFact(itr); } return Any.pack(fact); @@ -301,7 +303,7 @@ public void close() { } private Runnable batchAddRoute(BatchMatchRequest request, - IKVReader reader, + IKVRangeReader reader, IKVWriter writer, boolean isLeader, Map>> newMatches, @@ -412,7 +414,7 @@ private Runnable batchAddRoute(BatchMatchRequest request, } private Runnable batchRemoveRoute(BatchUnmatchRequest request, - IKVReader reader, + IKVRangeReader reader, IKVWriter writer, boolean isLeader, Map>> removedMatches, @@ -550,152 +552,152 @@ private CompletableFuture batchDist(BatchDistRequest request) { }); } - private CompletableFuture gc(GCRequest request, IKVReader reader) { - reader.refresh(); + private CompletableFuture gc(GCRequest request, IKVRangeReader reader) { int stepUsed = Math.max(request.getStepHint(), 1); int scanQuota = request.getScanQuota() > 0 ? request.getScanQuota() : 256 * stepUsed; // subBrokerId -> delivererKey -> tenantId-> CheckRequest Map>> checkRequestBuilders = new HashMap<>(); - IKVIterator itr = reader.iterator(); - // clamp start key to current boundary when provided - if (request.hasStartKey()) { - ByteString startKey = request.getStartKey(); - if (boundary != null) { - ByteString startBoundary = BoundaryUtil.startKey(boundary); - ByteString endBoundary = BoundaryUtil.endKey(boundary); - if (BoundaryUtil.compareStartKey(startKey, startBoundary) < 0) { - startKey = startBoundary; - } else if (BoundaryUtil.compareEndKeys(startKey, endBoundary) >= 0) { - // clamp to start when beyond end - startKey = startBoundary; + try (IKVIterator itr = reader.iterator()) { + // clamp start key to current boundary when provided + if (request.hasStartKey()) { + ByteString startKey = request.getStartKey(); + if (boundary != null) { + ByteString startBoundary = BoundaryUtil.startKey(boundary); + ByteString endBoundary = BoundaryUtil.endKey(boundary); + if (BoundaryUtil.compareStartKey(startKey, startBoundary) < 0) { + startKey = startBoundary; + } else if (BoundaryUtil.compareEndKeys(startKey, endBoundary) >= 0) { + // clamp to start when beyond end + startKey = startBoundary; + } + } + if (startKey != null) { + itr.seek(startKey); + } else { + itr.seekToFirst(); } - } - if (startKey != null) { - itr.seek(startKey); } else { itr.seekToFirst(); } - } else { - itr.seekToFirst(); - } - // if range is empty, return immediately - if (!itr.isValid()) { - return CompletableFuture.completedFuture(GCReply.newBuilder() - .setReqId(request.getReqId()) - .setInspectedCount(0) - .setRemoveSuccess(0) - .setWrapped(false) - .build()); - } + // if range is empty, return immediately + if (!itr.isValid()) { + return CompletableFuture.completedFuture(GCReply.newBuilder() + .setReqId(request.getReqId()) + .setInspectedCount(0) + .setRemoveSuccess(0) + .setWrapped(false) + .build()); + } - AtomicInteger inspectedCount = new AtomicInteger(); - AtomicBoolean wrapped = new AtomicBoolean(false); - ByteString sessionStartKey = null; + AtomicInteger inspectedCount = new AtomicInteger(); + AtomicBoolean wrapped = new AtomicBoolean(false); + ByteString sessionStartKey = null; - outer: - while (true) { - if (!itr.isValid()) { - // reach tail - if (!wrapped.get()) { - itr.seekToFirst(); - if (!itr.isValid()) { - break; // still empty + outer: + while (true) { + if (!itr.isValid()) { + // reach tail + if (!wrapped.get()) { + itr.seekToFirst(); + if (!itr.isValid()) { + break; // still empty + } + wrapped.set(true); + } else { + break; } - wrapped.set(true); - } else { - break; } - } - ByteString currentKey = itr.key(); - // stop if met sessionStartKey after wrap before decoding - if (wrapped.get() && currentKey.equals(sessionStartKey)) { - break; - } + ByteString currentKey = itr.key(); + // stop if met sessionStartKey after wrap before decoding + if (wrapped.get() && currentKey.equals(sessionStartKey)) { + break; + } - if (sessionStartKey == null) { - sessionStartKey = currentKey; - } - Matching matching = buildMatchRoute(currentKey, itr.value()); - switch (matching.type()) { - case Normal -> { - if (!routeCache.isCached(matching.tenantId(), matching.matcher.getFilterLevelList())) { - NormalMatching normalMatching = ((NormalMatching) matching); - checkRequestBuilders.computeIfAbsent(normalMatching.subBrokerId(), k -> new HashMap<>()) - .computeIfAbsent(normalMatching.delivererKey(), k -> new HashMap<>()) - .computeIfAbsent(normalMatching.tenantId(), k -> CheckRequest.newBuilder() - .setTenantId(k) - .setDelivererKey(normalMatching.delivererKey())) - .addMatchInfo(((NormalMatching) matching).matchInfo()); - } + if (sessionStartKey == null) { + sessionStartKey = currentKey; } - case Group -> { - GroupMatching groupMatching = ((GroupMatching) matching); - if (!routeCache.isCached(groupMatching.tenantId(), matching.matcher.getFilterLevelList())) { - for (NormalMatching normalMatching : groupMatching.receiverList) { + Matching matching = buildMatchRoute(currentKey, itr.value()); + switch (matching.type()) { + case Normal -> { + if (!routeCache.isCached(matching.tenantId(), matching.matcher.getFilterLevelList())) { + NormalMatching normalMatching = ((NormalMatching) matching); checkRequestBuilders.computeIfAbsent(normalMatching.subBrokerId(), k -> new HashMap<>()) .computeIfAbsent(normalMatching.delivererKey(), k -> new HashMap<>()) .computeIfAbsent(normalMatching.tenantId(), k -> CheckRequest.newBuilder() .setTenantId(k) .setDelivererKey(normalMatching.delivererKey())) - .addMatchInfo(normalMatching.matchInfo()); + .addMatchInfo(((NormalMatching) matching).matchInfo()); } } + case Group -> { + GroupMatching groupMatching = ((GroupMatching) matching); + if (!routeCache.isCached(groupMatching.tenantId(), matching.matcher.getFilterLevelList())) { + for (NormalMatching normalMatching : groupMatching.receiverList) { + checkRequestBuilders.computeIfAbsent(normalMatching.subBrokerId(), k -> new HashMap<>()) + .computeIfAbsent(normalMatching.delivererKey(), k -> new HashMap<>()) + .computeIfAbsent(normalMatching.tenantId(), k -> CheckRequest.newBuilder() + .setTenantId(k) + .setDelivererKey(normalMatching.delivererKey())) + .addMatchInfo(normalMatching.matchInfo()); + } + } + } + default -> { + // never happen + } } - default -> { - // never happen + inspectedCount.incrementAndGet(); + if (inspectedCount.get() >= scanQuota) { + // stop by quota + itr.next(); + break; } - } - inspectedCount.incrementAndGet(); - if (inspectedCount.get() >= scanQuota) { - // stop by quota + + // skip over stepUsed-1 entries + int skip = stepUsed - 1; + while (skip-- > 0) { + itr.next(); + if (!itr.isValid()) { + // let outer loop handle wrap or stop + continue outer; + } + } + // move to next for next iteration itr.next(); - break; } - // skip over stepUsed-1 entries - int skip = stepUsed - 1; - while (skip-- > 0) { - itr.next(); - if (!itr.isValid()) { - // let outer loop handle wrap or stop - continue outer; + // aggregate sweep results + List> checkFutures = new ArrayList<>(); + for (int subBrokerId : checkRequestBuilders.keySet()) { + for (String delivererKey : checkRequestBuilders.get(subBrokerId).keySet()) { + for (Map.Entry entry : checkRequestBuilders.get(subBrokerId) + .get(delivererKey).entrySet()) { + checkFutures.add(subscriptionChecker.sweep(subBrokerId, entry.getValue().build())); + } } } - // move to next for next iteration - itr.next(); - } - // aggregate sweep results - List> checkFutures = new ArrayList<>(); - for (int subBrokerId : checkRequestBuilders.keySet()) { - for (String delivererKey : checkRequestBuilders.get(subBrokerId).keySet()) { - for (Map.Entry entry : checkRequestBuilders.get(subBrokerId) - .get(delivererKey).entrySet()) { - checkFutures.add(subscriptionChecker.sweep(subBrokerId, entry.getValue().build())); + CompletableFuture all = CompletableFuture.allOf(checkFutures.toArray(CompletableFuture[]::new)); + return all.thenApply(v -> { + int success = 0; + for (CompletableFuture f : checkFutures) { + success += f.join().success(); } - } + GCReply.Builder reply = GCReply.newBuilder() + .setReqId(request.getReqId()) + .setInspectedCount(inspectedCount.get()) + .setRemoveSuccess(success) + .setWrapped(wrapped.get()); + if (itr.isValid()) { + reply.setNextStartKey(itr.key()); + } + return reply.build(); + }); } - - CompletableFuture all = CompletableFuture.allOf(checkFutures.toArray(CompletableFuture[]::new)); - return all.thenApply(v -> { - int success = 0; - for (CompletableFuture f : checkFutures) { - success += f.join().success(); - } - GCReply.Builder reply = GCReply.newBuilder() - .setReqId(request.getReqId()) - .setInspectedCount(inspectedCount.get()) - .setRemoveSuccess(success) - .setWrapped(wrapped.get()); - if (itr.isValid()) { - reply.setNextStartKey(itr.key()); - } - return reply.build(); - }); } private record GlobalTopicFilter(String tenantId, RouteMatcher routeMatcher) { diff --git a/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/DistWorkerCoProcFactory.java b/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/DistWorkerCoProcFactory.java index 7ed91855e..a24b3e4dd 100644 --- a/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/DistWorkerCoProcFactory.java +++ b/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/DistWorkerCoProcFactory.java @@ -22,9 +22,6 @@ import com.google.common.util.concurrent.MoreExecutors; import io.micrometer.core.instrument.Metrics; import io.micrometer.core.instrument.binder.jvm.ExecutorServiceMetrics; -import java.time.Duration; -import java.util.List; -import java.util.Optional; import java.util.concurrent.ExecutorService; import java.util.concurrent.ForkJoinPool; import java.util.concurrent.ForkJoinWorkerThread; @@ -33,17 +30,14 @@ import java.util.function.Supplier; import lombok.extern.slf4j.Slf4j; import org.apache.bifromq.basekv.proto.KVRangeId; -import org.apache.bifromq.basekv.store.api.IKVCloseableReader; import org.apache.bifromq.basekv.store.api.IKVRangeCoProc; import org.apache.bifromq.basekv.store.api.IKVRangeCoProcFactory; -import org.apache.bifromq.basekv.store.api.IKVRangeSplitHinter; -import org.apache.bifromq.basekv.store.range.hinter.MutationKVLoadBasedSplitHinter; +import org.apache.bifromq.basekv.store.api.IKVRangeRefreshableReader; import org.apache.bifromq.basekv.utils.KVRangeIdUtil; import org.apache.bifromq.deliverer.IMessageDeliverer; import org.apache.bifromq.dist.client.IDistClient; import org.apache.bifromq.dist.worker.cache.ISubscriptionCache; import org.apache.bifromq.dist.worker.cache.SubscriptionCache; -import org.apache.bifromq.dist.worker.hinter.FanoutSplitHinter; import org.apache.bifromq.plugin.eventcollector.IEventCollector; import org.apache.bifromq.plugin.resourcethrottler.IResourceThrottler; import org.apache.bifromq.plugin.settingprovider.ISettingProvider; @@ -58,10 +52,8 @@ public class DistWorkerCoProcFactory implements IKVRangeCoProcFactory { private final ISettingProvider settingProvider; private final ISubscriptionCleaner subscriptionChecker; private final ExecutorService matchExecutor; - private final Duration loadEstWindow; private final int fanoutParallelism; private final int inlineFanOutThreshold; - private final int fanoutSplitThreshold; public DistWorkerCoProcFactory(IDistClient distClient, IEventCollector eventCollector, @@ -69,17 +61,13 @@ public DistWorkerCoProcFactory(IDistClient distClient, ISubBrokerManager subBrokerManager, IMessageDeliverer messageDeliverer, ISettingProvider settingProvider, - Duration loadEstimateWindow, int fanoutParallelism, - int inlineFanOutThreshold, - int fanoutSplitThreshold) { + int inlineFanOutThreshold) { this.eventCollector = eventCollector; this.resourceThrottler = resourceThrottler; - this.loadEstWindow = loadEstimateWindow; this.deliverer = messageDeliverer; this.settingProvider = settingProvider; this.fanoutParallelism = fanoutParallelism; - this.fanoutSplitThreshold = fanoutSplitThreshold; this.inlineFanOutThreshold = inlineFanOutThreshold; subscriptionChecker = new SubscriptionCleaner(subBrokerManager, distClient); @@ -97,19 +85,9 @@ public ForkJoinWorkerThread newThread(ForkJoinPool pool) { }, null, false), "topic-matcher"); } - @Override - public List createHinters(String clusterId, String storeId, KVRangeId id, - Supplier readerProvider) { - return List.of( - new FanoutSplitHinter(readerProvider, fanoutSplitThreshold, - "clusterId", clusterId, "storeId", storeId, "rangeId", KVRangeIdUtil.toString(id)), - new MutationKVLoadBasedSplitHinter(loadEstWindow, Optional::of, - "clusterId", clusterId, "storeId", storeId, "rangeId", KVRangeIdUtil.toString(id))); - } - @Override public IKVRangeCoProc createCoProc(String clusterId, String storeId, KVRangeId id, - Supplier rangeReaderProvider) { + Supplier rangeReaderProvider) { ISubscriptionCache routeCache = new SubscriptionCache(id, rangeReaderProvider, settingProvider, eventCollector, matchExecutor); ITenantsStats tenantsState = new TenantsStats(rangeReaderProvider, diff --git a/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/TenantsStats.java b/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/TenantsStats.java index 60c2ed316..336f88663 100644 --- a/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/TenantsStats.java +++ b/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/TenantsStats.java @@ -35,28 +35,27 @@ import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.ForkJoinPool; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.locks.StampedLock; import java.util.function.Supplier; import lombok.extern.slf4j.Slf4j; import org.apache.bifromq.basekv.proto.Boundary; -import org.apache.bifromq.basekv.store.api.IKVCloseableReader; import org.apache.bifromq.basekv.store.api.IKVIterator; +import org.apache.bifromq.basekv.store.api.IKVRangeRefreshableReader; @Slf4j class TenantsStats implements ITenantsStats { private final Map tenantStatsMap = new ConcurrentHashMap<>(); - private final Supplier readerSupplier; + private final Supplier readerSupplier; private final String[] tags; // ultra-simple async queue and single drainer private final ConcurrentLinkedQueue taskQueue = new ConcurrentLinkedQueue<>(); private final AtomicBoolean draining = new AtomicBoolean(false); - private transient Boundary boundary; + private final AtomicBoolean closed = new AtomicBoolean(false); + private final StampedLock closeLock = new StampedLock(); - TenantsStats(Supplier readerSupplier, String... tags) { + TenantsStats(Supplier readerSupplier, String... tags) { this.readerSupplier = readerSupplier; this.tags = tags; - try (IKVCloseableReader reader = readerSupplier.get()) { - boundary = reader.boundary(); - } } @Override @@ -121,21 +120,34 @@ public void reset() { @Override public void close() { - CompletableFuture closeFuture = new CompletableFuture<>(); - taskQueue.offer(() -> { - tenantStatsMap.values().forEach(TenantStats::destroy); - tenantStatsMap.clear(); - closeFuture.complete(null); - }); - trigger(); - closeFuture.join(); + long stamp = closeLock.writeLock(); + try { + if (closed.compareAndSet(false, true)) { + CompletableFuture closeFuture = new CompletableFuture<>(); + taskQueue.offer(() -> { + tenantStatsMap.values().forEach(TenantStats::destroy); + tenantStatsMap.clear(); + closeFuture.complete(null); + }); + trigger(); + closeFuture.join(); + } + } finally { + closeLock.unlock(stamp); + } } private Supplier getSpaceUsageProvider(String tenantId) { return () -> { - try (IKVCloseableReader reader = readerSupplier.get()) { + long stamp = closeLock.readLock(); + if (closed.get()) { + closeLock.unlock(stamp); + return 0; + } + try (IKVRangeRefreshableReader reader = readerSupplier.get()) { ByteString tenantStartKey = tenantBeginKey(tenantId); - Boundary tenantSection = intersect(boundary, toBoundary(tenantStartKey, upperBound(tenantStartKey))); + Boundary tenantSection = intersect(reader.boundary(), + toBoundary(tenantStartKey, upperBound(tenantStartKey))); if (isNULLRange(tenantSection)) { return 0; } @@ -143,6 +155,8 @@ private Supplier getSpaceUsageProvider(String tenantId) { } catch (Exception e) { log.error("Unexpected error", e); return 0; + } finally { + closeLock.unlock(stamp); } }; } @@ -213,13 +227,11 @@ private void drain() { } private void doReset() { - try (IKVCloseableReader reader = readerSupplier.get()) { + try (IKVRangeRefreshableReader reader = readerSupplier.get(); IKVIterator itr = reader.iterator()) { tenantStatsMap.values().forEach(TenantStats::destroy); tenantStatsMap.clear(); reader.refresh(); - boundary = reader.boundary(); // enqueue full reload task; don't block caller - IKVIterator itr = reader.iterator(); for (itr.seekToFirst(); itr.isValid(); itr.next()) { String tenantId = parseTenantId(itr.key()); byte flag = parseFlag(itr.key()); diff --git a/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/cache/SubscriptionCache.java b/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/cache/SubscriptionCache.java index 89fb89429..dd2e20dea 100644 --- a/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/cache/SubscriptionCache.java +++ b/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/cache/SubscriptionCache.java @@ -43,7 +43,7 @@ import lombok.extern.slf4j.Slf4j; import org.apache.bifromq.basekv.proto.Boundary; import org.apache.bifromq.basekv.proto.KVRangeId; -import org.apache.bifromq.basekv.store.api.IKVCloseableReader; +import org.apache.bifromq.basekv.store.api.IKVRangeRefreshableReader; import org.apache.bifromq.basekv.utils.KVRangeIdUtil; import org.apache.bifromq.dist.worker.cache.task.RefreshEntriesTask; import org.apache.bifromq.dist.worker.schema.cache.Matching; @@ -62,7 +62,7 @@ public class SubscriptionCache implements ISubscriptionCache { private volatile Boundary boundary; public SubscriptionCache(KVRangeId id, - Supplier rangeReaderProvider, + Supplier rangeReaderProvider, ISettingProvider settingProvider, IEventCollector eventCollector, Executor matchExecutor) { diff --git a/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/cache/TenantRouteCacheFactory.java b/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/cache/TenantRouteCacheFactory.java index 00104f731..b305a0a17 100644 --- a/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/cache/TenantRouteCacheFactory.java +++ b/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/cache/TenantRouteCacheFactory.java @@ -26,7 +26,7 @@ import java.util.concurrent.Executor; import java.util.function.Supplier; import org.apache.bifromq.basekv.proto.KVRangeId; -import org.apache.bifromq.basekv.store.api.IKVCloseableReader; +import org.apache.bifromq.basekv.store.api.IKVRangeRefreshableReader; import org.apache.bifromq.plugin.eventcollector.IEventCollector; import org.apache.bifromq.plugin.settingprovider.ISettingProvider; @@ -34,12 +34,12 @@ class TenantRouteCacheFactory implements ITenantRouteCacheFactory { private final ISettingProvider settingProvider; private final IEventCollector eventCollector; private final Executor matchExecutor; - private final ThreadLocalKVReader threadLocalReader; + private final Supplier readerSupplier; private final Timer internalMatchTimer; private final Duration expiry; private final Duration fanoutCheckInterval; - public TenantRouteCacheFactory(Supplier readerSupplier, + public TenantRouteCacheFactory(Supplier readerSupplier, ISettingProvider settingProvider, IEventCollector eventCollector, Duration expiry, @@ -49,7 +49,7 @@ public TenantRouteCacheFactory(Supplier readerSupplier, this.settingProvider = settingProvider; this.eventCollector = eventCollector; this.matchExecutor = matchExecutor; - this.threadLocalReader = new ThreadLocalKVReader(readerSupplier); + this.readerSupplier = readerSupplier; this.expiry = expiry; this.fanoutCheckInterval = fanoutCheckInterval; internalMatchTimer = Timer.builder("dist.match.internal") @@ -66,13 +66,12 @@ public Duration expiry() { @Override public ITenantRouteCache create(KVRangeId rangeId, String tenantId) { return new TenantRouteCache(rangeId, tenantId, - new TenantRouteMatcher(tenantId, threadLocalReader, eventCollector, internalMatchTimer), + new TenantRouteMatcher(tenantId, readerSupplier, eventCollector, internalMatchTimer), settingProvider, expiry, fanoutCheckInterval, matchExecutor); } @Override public void close() { - threadLocalReader.close(); Metrics.globalRegistry.remove(internalMatchTimer); } } diff --git a/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/cache/TenantRouteMatcher.java b/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/cache/TenantRouteMatcher.java index 5a3ee9409..611ba4ad1 100644 --- a/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/cache/TenantRouteMatcher.java +++ b/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/cache/TenantRouteMatcher.java @@ -38,7 +38,7 @@ import java.util.function.Supplier; import org.apache.bifromq.basekv.proto.Boundary; import org.apache.bifromq.basekv.store.api.IKVIterator; -import org.apache.bifromq.basekv.store.api.IKVReader; +import org.apache.bifromq.basekv.store.api.IKVRangeRefreshableReader; import org.apache.bifromq.dist.trie.ITopicFilterIterator; import org.apache.bifromq.dist.trie.ThreadLocalTopicFilterIterator; import org.apache.bifromq.dist.trie.TopicTrieNode; @@ -51,11 +51,11 @@ class TenantRouteMatcher implements ITenantRouteMatcher { private final String tenantId; private final Timer timer; - private final Supplier kvReaderSupplier; + private final Supplier kvReaderSupplier; private final IEventCollector eventCollector; public TenantRouteMatcher(String tenantId, - Supplier kvReaderSupplier, + Supplier kvReaderSupplier, IEventCollector eventCollector, Timer timer) { this.tenantId = tenantId; @@ -77,85 +77,86 @@ public Map matchAll(Set topics, new MatchedRoutes(tenantId, topic, eventCollector, maxPersistentFanoutCount, maxGroupFanoutCount)); }); - IKVReader rangeReader = kvReaderSupplier.get(); - rangeReader.refresh(); - - ByteString tenantStartKey = tenantBeginKey(tenantId); - Boundary tenantBoundary = - intersect(toBoundary(tenantStartKey, upperBound(tenantStartKey)), rangeReader.boundary()); - if (isNULLRange(tenantBoundary)) { - return matchedRoutes; - } - try (ITopicFilterIterator expansionSetItr = - ThreadLocalTopicFilterIterator.get(topicTrieBuilder.build())) { - expansionSetItr.init(topicTrieBuilder.build()); - Map, Set> matchedTopicFilters = new HashMap<>(); - IKVIterator itr = rangeReader.iterator(); - // track seek - itr.seek(tenantBoundary.getStartKey()); - int probe = 0; - while (itr.isValid() && compare(itr.key(), tenantBoundary.getEndKey()) < 0) { - // track itr.key() - Matching matching = buildMatchRoute(itr.key(), itr.value()); - // key: topic - Set matchedTopics = matchedTopicFilters.get(matching.matcher.getFilterLevelList()); - if (matchedTopics == null) { - List seekTopicFilter = matching.matcher.getFilterLevelList(); - expansionSetItr.seek(seekTopicFilter); - if (expansionSetItr.isValid()) { - List topicFilterToMatch = expansionSetItr.key(); - if (topicFilterToMatch.equals(seekTopicFilter)) { - Set backingTopics = new HashSet<>(); - for (Set topicSet : expansionSetItr.value().values()) { - for (String topic : topicSet) { - MatchedRoutes matchResult = (MatchedRoutes) matchedRoutes.computeIfAbsent(topic, - k -> new MatchedRoutes(tenantId, k, eventCollector, maxPersistentFanoutCount, - maxGroupFanoutCount)); - switch (matching.type()) { - case Normal -> matchResult.addNormalMatching((NormalMatching) matching); - case Group -> matchResult.putGroupMatching((GroupMatching) matching); - default -> { - // never happen + try (IKVRangeRefreshableReader rangeReader = kvReaderSupplier.get()) { + ByteString tenantStartKey = tenantBeginKey(tenantId); + Boundary tenantBoundary = + intersect(toBoundary(tenantStartKey, upperBound(tenantStartKey)), rangeReader.boundary()); + if (isNULLRange(tenantBoundary)) { + return matchedRoutes; + } + TopicTrieNode topicTrieNode = topicTrieBuilder.build(); + try (ITopicFilterIterator expansionSetItr = ThreadLocalTopicFilterIterator.get(topicTrieNode); + IKVIterator itr = rangeReader.iterator(tenantBoundary) + ) { + expansionSetItr.init(topicTrieNode); + Map, Set> matchedTopicFilters = new HashMap<>(); + // track seek + itr.seek(tenantBoundary.getStartKey()); + int probe = 0; + while (itr.isValid() && compare(itr.key(), tenantBoundary.getEndKey()) < 0) { + // track itr.key() + Matching matching = buildMatchRoute(itr.key(), itr.value()); + // key: topic + Set matchedTopics = matchedTopicFilters.get(matching.matcher.getFilterLevelList()); + if (matchedTopics == null) { + List seekTopicFilter = matching.matcher.getFilterLevelList(); + expansionSetItr.seek(seekTopicFilter); + if (expansionSetItr.isValid()) { + List topicFilterToMatch = expansionSetItr.key(); + if (topicFilterToMatch.equals(seekTopicFilter)) { + Set backingTopics = new HashSet<>(); + for (Set topicSet : expansionSetItr.value().values()) { + for (String topic : topicSet) { + MatchedRoutes matchResult = (MatchedRoutes) matchedRoutes.computeIfAbsent(topic, + k -> new MatchedRoutes(tenantId, k, eventCollector, + maxPersistentFanoutCount, + maxGroupFanoutCount)); + switch (matching.type()) { + case Normal -> matchResult.addNormalMatching((NormalMatching) matching); + case Group -> matchResult.putGroupMatching((GroupMatching) matching); + default -> { + // never happen + } } + backingTopics.add(topic); } - backingTopics.add(topic); } - } - matchedTopicFilters.put(matching.matcher.getFilterLevelList(), backingTopics); - itr.next(); - probe = 0; - } else { - // next() is much cheaper than seek(), we probe following 20 entries - if (probe++ < 20) { - // probe next + matchedTopicFilters.put(matching.matcher.getFilterLevelList(), backingTopics); itr.next(); + probe = 0; } else { - // seek to match next topic filter - ByteString nextMatch = tenantRouteStartKey(tenantId, topicFilterToMatch); - itr.seek(nextMatch); + // next() is much cheaper than seek(), we probe following 20 entries + if (probe++ < 20) { + // probe next + itr.next(); + } else { + // seek to match next topic filter + ByteString nextMatch = tenantRouteStartKey(tenantId, topicFilterToMatch); + itr.seek(nextMatch); + } } + } else { + break; // no more topic filter to match, stop here } } else { - break; // no more topic filter to match, stop here - } - } else { - itr.next(); - for (String topic : matchedTopics) { - MatchedRoutes matchResult = (MatchedRoutes) matchedRoutes.computeIfAbsent(topic, - k -> new MatchedRoutes(tenantId, k, eventCollector, maxPersistentFanoutCount, - maxGroupFanoutCount)); - switch (matching.type()) { - case Normal -> matchResult.addNormalMatching((NormalMatching) matching); - case Group -> matchResult.putGroupMatching((GroupMatching) matching); - default -> { - // never happen + itr.next(); + for (String topic : matchedTopics) { + MatchedRoutes matchResult = (MatchedRoutes) matchedRoutes.computeIfAbsent(topic, + k -> new MatchedRoutes(tenantId, k, eventCollector, maxPersistentFanoutCount, + maxGroupFanoutCount)); + switch (matching.type()) { + case Normal -> matchResult.addNormalMatching((NormalMatching) matching); + case Group -> matchResult.putGroupMatching((GroupMatching) matching); + default -> { + // never happen + } } } } } + sample.stop(timer); + return matchedRoutes; } - sample.stop(timer); - return matchedRoutes; } } } diff --git a/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/cache/ThreadLocalKVReader.java b/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/cache/ThreadLocalKVReader.java index e43ba0ff9..61c154ebe 100644 --- a/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/cache/ThreadLocalKVReader.java +++ b/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/cache/ThreadLocalKVReader.java @@ -14,35 +14,34 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.dist.worker.cache; -import org.apache.bifromq.basekv.store.api.IKVCloseableReader; -import org.apache.bifromq.basekv.store.api.IKVReader; import com.google.common.collect.Sets; import java.util.Set; import java.util.function.Supplier; +import org.apache.bifromq.basekv.store.api.IKVRangeRefreshableReader; -class ThreadLocalKVReader implements Supplier { - private final Set threadReaders = Sets.newConcurrentHashSet(); - private final ThreadLocal threadLocalReader; +class ThreadLocalKVReader implements Supplier { + private final Set threadReaders = Sets.newConcurrentHashSet(); + private final ThreadLocal threadLocalReader; - public ThreadLocalKVReader(Supplier readerProvider) { + public ThreadLocalKVReader(Supplier readerProvider) { this.threadLocalReader = ThreadLocal.withInitial(() -> { - IKVCloseableReader reader = readerProvider.get(); + IKVRangeRefreshableReader reader = readerProvider.get(); threadReaders.add(reader); return reader; }); } @Override - public IKVReader get() { + public IKVRangeRefreshableReader get() { return threadLocalReader.get(); } public void close() { - threadReaders.forEach(IKVCloseableReader::close); + threadReaders.forEach(IKVRangeRefreshableReader::close); } } diff --git a/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/hinter/FanoutSplitHinter.java b/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/hinter/FanoutSplitHinter.java index 7c9bc27dc..58885cf88 100644 --- a/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/hinter/FanoutSplitHinter.java +++ b/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/hinter/FanoutSplitHinter.java @@ -23,30 +23,27 @@ import static org.apache.bifromq.dist.worker.schema.KVSchemaUtil.toNormalRouteKey; import static org.apache.bifromq.dist.worker.schema.KVSchemaUtil.toReceiverUrl; -import org.apache.bifromq.basekv.proto.Boundary; -import org.apache.bifromq.basekv.proto.SplitHint; -import org.apache.bifromq.basekv.store.api.IKVCloseableReader; -import org.apache.bifromq.basekv.store.api.IKVIterator; -import org.apache.bifromq.basekv.store.api.IKVLoadRecord; -import org.apache.bifromq.basekv.store.api.IKVRangeSplitHinter; -import org.apache.bifromq.basekv.store.api.IKVReader; -import org.apache.bifromq.basekv.store.proto.ROCoProcInput; -import org.apache.bifromq.basekv.store.proto.RWCoProcInput; -import org.apache.bifromq.basekv.utils.BoundaryUtil; -import org.apache.bifromq.dist.rpc.proto.BatchMatchRequest; -import org.apache.bifromq.dist.rpc.proto.BatchUnmatchRequest; -import org.apache.bifromq.type.RouteMatcher; -import com.google.common.collect.Sets; import com.google.protobuf.ByteString; import io.micrometer.core.instrument.Gauge; import io.micrometer.core.instrument.Metrics; import java.util.HashMap; import java.util.Map; import java.util.Optional; -import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.function.Supplier; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.basekv.proto.Boundary; +import org.apache.bifromq.basekv.proto.SplitHint; +import org.apache.bifromq.basekv.store.api.IKVIterator; +import org.apache.bifromq.basekv.store.api.IKVRangeReader; +import org.apache.bifromq.basekv.store.proto.ROCoProcInput; +import org.apache.bifromq.basekv.store.proto.RWCoProcInput; +import org.apache.bifromq.basekv.store.range.hinter.IKVLoadRecord; +import org.apache.bifromq.basekv.store.range.hinter.IKVRangeSplitHinter; +import org.apache.bifromq.basekv.utils.BoundaryUtil; +import org.apache.bifromq.dist.rpc.proto.BatchMatchRequest; +import org.apache.bifromq.dist.rpc.proto.BatchUnmatchRequest; +import org.apache.bifromq.type.RouteMatcher; @Slf4j public class FanoutSplitHinter implements IKVRangeSplitHinter { @@ -54,25 +51,19 @@ public class FanoutSplitHinter implements IKVRangeSplitHinter { public static final String LOAD_TYPE_FANOUT_TOPIC_FILTERS = "fanout_topicfilters"; public static final String LOAD_TYPE_FANOUT_SCALE = "fanout_scale"; private final int splitAtScale; - private final Supplier readerSupplier; - private final Set threadReaders = Sets.newConcurrentHashSet(); - private final ThreadLocal threadLocalKVReader; + private final Supplier readerSupplier; // key: matchRecordKeyPrefix, value: splitKey private final Map fanoutSplitKeys = new ConcurrentHashMap<>(); private final Gauge fanOutTopicFiltersGauge; private final Gauge fanOutScaleGauge; private volatile Boundary boundary; - - public FanoutSplitHinter(Supplier readerSupplier, int splitAtScale, String... tags) { + public FanoutSplitHinter(Supplier readerSupplier, int splitAtScale, String... tags) { this.splitAtScale = splitAtScale; this.readerSupplier = readerSupplier; - threadLocalKVReader = ThreadLocal.withInitial(() -> { - IKVCloseableReader reader = readerSupplier.get(); - threadReaders.add(reader); - return reader; - }); - boundary = readerSupplier.get().boundary(); + try (IKVRangeReader reader = readerSupplier.get()) { + boundary = reader.boundary(); + } fanOutTopicFiltersGauge = Gauge.builder("dist.fanout.topicfilters", fanoutSplitKeys::size) .tags(tags) .register(Metrics.globalRegistry); @@ -173,40 +164,39 @@ public SplitHint estimate() { @Override public void close() { - threadReaders.forEach(IKVCloseableReader::close); Metrics.globalRegistry.remove(fanOutTopicFiltersGauge); Metrics.globalRegistry.remove(fanOutScaleGauge); } private void doEstimate(Map routeKeyLoads) { Map splitCandidate = new HashMap<>(); - routeKeyLoads.forEach((matchRecordKeyPrefix, recordEst) -> { - long dataSize = (threadLocalKVReader.get() - .size(Boundary.newBuilder() + try (IKVRangeReader reader = readerSupplier.get()) { + routeKeyLoads.forEach((matchRecordKeyPrefix, recordEst) -> { + long dataSize = (reader.size(Boundary.newBuilder() .setStartKey(matchRecordKeyPrefix) .setEndKey(BoundaryUtil.upperBound(matchRecordKeyPrefix)) .build())) - recordEst.tombstoneSize(); - long fanOutScale = dataSize / recordEst.avgRecordSize(); - if (fanOutScale >= splitAtScale) { - splitCandidate.put(matchRecordKeyPrefix, new RangeEstimation(dataSize, recordEst.avgRecordSize())); - } else if (fanoutSplitKeys.containsKey(matchRecordKeyPrefix) && fanOutScale < 0.5 * splitAtScale) { - fanoutSplitKeys.remove(matchRecordKeyPrefix); - } - }); - if (!splitCandidate.isEmpty()) { - try (IKVCloseableReader reader = readerSupplier.get()) { - for (ByteString routeKey : splitCandidate.keySet()) { - RangeEstimation recEst = splitCandidate.get(routeKey); - fanoutSplitKeys.computeIfAbsent(routeKey, k -> { - IKVIterator itr = reader.iterator(); - int i = 0; - for (itr.seek(routeKey); itr.isValid(); itr.next()) { - if (i++ >= splitAtScale) { - return new FanOutSplit(recEst, itr.key()); + long fanOutScale = dataSize / recordEst.avgRecordSize(); + if (fanOutScale >= splitAtScale) { + splitCandidate.put(matchRecordKeyPrefix, new RangeEstimation(dataSize, recordEst.avgRecordSize())); + } else if (fanoutSplitKeys.containsKey(matchRecordKeyPrefix) && fanOutScale < 0.5 * splitAtScale) { + fanoutSplitKeys.remove(matchRecordKeyPrefix); + } + }); + if (!splitCandidate.isEmpty()) { + try (IKVIterator itr = reader.iterator()) { + for (ByteString routeKey : splitCandidate.keySet()) { + RangeEstimation recEst = splitCandidate.get(routeKey); + fanoutSplitKeys.computeIfAbsent(routeKey, k -> { + int i = 0; + for (itr.seek(routeKey); itr.isValid(); itr.next()) { + if (i++ >= splitAtScale) { + return new FanOutSplit(recEst, itr.key()); + } } - } - return null; - }); + return null; + }); + } } } } diff --git a/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/hinter/FanoutSplitHinterFactory.java b/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/hinter/FanoutSplitHinterFactory.java new file mode 100644 index 000000000..f0e3aab15 --- /dev/null +++ b/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/hinter/FanoutSplitHinterFactory.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.dist.worker.hinter; + +import com.google.protobuf.Struct; +import com.google.protobuf.Value; +import org.apache.bifromq.basekv.store.range.hinter.IKVRangeSplitHinter; +import org.apache.bifromq.basekv.store.range.hinter.IKVRangeSplitHinterFactory; +import org.apache.bifromq.basekv.store.range.hinter.SplitHinterContext; + +public class FanoutSplitHinterFactory implements IKVRangeSplitHinterFactory { + private static final String CONF_SPLIT_THRESHOLD = "splitThreshold"; + + @Override + public IKVRangeSplitHinter create(SplitHinterContext ctx, Struct conf) { + int threshold = 100000; // default + if (conf != null && conf.getFieldsOrDefault(CONF_SPLIT_THRESHOLD, Value.getDefaultInstance()).hasNumberValue()) { + threshold = (int) conf.getFieldsOrThrow(CONF_SPLIT_THRESHOLD).getNumberValue(); + } + return new FanoutSplitHinter(ctx.getReaderProvider(), threshold, ctx.getTags()); + } +} + diff --git a/bifromq-dist/bifromq-dist-worker/src/main/resources/META-INF/services/org.apache.bifromq.basekv.store.range.hinter.IKVRangeSplitHinterFactory b/bifromq-dist/bifromq-dist-worker/src/main/resources/META-INF/services/org.apache.bifromq.basekv.store.range.hinter.IKVRangeSplitHinterFactory new file mode 100644 index 000000000..42348e41d --- /dev/null +++ b/bifromq-dist/bifromq-dist-worker/src/main/resources/META-INF/services/org.apache.bifromq.basekv.store.range.hinter.IKVRangeSplitHinterFactory @@ -0,0 +1 @@ +org.apache.bifromq.dist.worker.hinter.FanoutSplitHinterFactory diff --git a/bifromq-dist/bifromq-dist-worker/src/test/java/org/apache/bifromq/dist/worker/DistWorkerCoProcGCTest.java b/bifromq-dist/bifromq-dist-worker/src/test/java/org/apache/bifromq/dist/worker/DistWorkerCoProcGCTest.java index ffde18994..90d74cd85 100644 --- a/bifromq-dist/bifromq-dist-worker/src/test/java/org/apache/bifromq/dist/worker/DistWorkerCoProcGCTest.java +++ b/bifromq-dist/bifromq-dist-worker/src/test/java/org/apache/bifromq/dist/worker/DistWorkerCoProcGCTest.java @@ -40,8 +40,9 @@ import java.util.concurrent.CompletableFuture; import java.util.function.Supplier; import org.apache.bifromq.basekv.proto.KVRangeId; -import org.apache.bifromq.basekv.store.api.IKVCloseableReader; import org.apache.bifromq.basekv.store.api.IKVIterator; +import org.apache.bifromq.basekv.store.api.IKVRangeReader; +import org.apache.bifromq.basekv.store.api.IKVRangeRefreshableReader; import org.apache.bifromq.basekv.store.proto.ROCoProcInput; import org.apache.bifromq.basekv.store.proto.ROCoProcOutput; import org.apache.bifromq.dist.rpc.proto.DistServiceROCoProcInput; @@ -61,8 +62,10 @@ public class DistWorkerCoProcGCTest { private ITenantsStats tenantsState; private IDeliverExecutorGroup deliverExecutorGroup; private ISubscriptionCleaner subscriptionChecker; - private Supplier readerProvider; - private IKVCloseableReader reader; + private Supplier readerProvider; + private Supplier refreshableReaderProvider; + private IKVRangeReader reader; + private IKVRangeRefreshableReader refreshableReader; private DistWorkerCoProc coProc; private List currentData = new ArrayList<>(); @@ -81,13 +84,20 @@ public void setUp() { deliverExecutorGroup = mock(IDeliverExecutorGroup.class); subscriptionChecker = mock(ISubscriptionCleaner.class); readerProvider = mock(Supplier.class); - reader = mock(IKVCloseableReader.class); + refreshableReaderProvider = mock(Supplier.class); + reader = mock(IKVRangeReader.class); + refreshableReader = mock(IKVRangeRefreshableReader.class); when(reader.boundary()).thenReturn(FULL_BOUNDARY); when(readerProvider.get()).thenReturn(reader); when(reader.iterator()).thenReturn(new FakeIterator(java.util.List.of())); + + when(refreshableReader.boundary()).thenReturn(FULL_BOUNDARY); + when(refreshableReaderProvider.get()).thenReturn(refreshableReader); + when(refreshableReader.iterator()).thenReturn(new FakeIterator(java.util.List.of())); + when(routeCache.isCached(anyString(), anyList())).thenReturn(false); coProc = new DistWorkerCoProc(KVRangeId.newBuilder().setId(1).setEpoch(1).build(), - readerProvider, routeCache, tenantsState, deliverExecutorGroup, subscriptionChecker); + refreshableReaderProvider, routeCache, tenantsState, deliverExecutorGroup, subscriptionChecker); coProc.reset(FULL_BOUNDARY); coProc.onLeader(true); } @@ -287,5 +297,10 @@ public void seekForPrev(ByteString key) { } idx = (i >= 0) ? i : -1; } + + @Override + public void close() { + + } } } diff --git a/bifromq-dist/bifromq-dist-worker/src/test/java/org/apache/bifromq/dist/worker/DistWorkerCoProcTest.java b/bifromq-dist/bifromq-dist-worker/src/test/java/org/apache/bifromq/dist/worker/DistWorkerCoProcTest.java index 864f5b073..631298843 100644 --- a/bifromq-dist/bifromq-dist-worker/src/test/java/org/apache/bifromq/dist/worker/DistWorkerCoProcTest.java +++ b/bifromq-dist/bifromq-dist-worker/src/test/java/org/apache/bifromq/dist/worker/DistWorkerCoProcTest.java @@ -48,9 +48,10 @@ import lombok.SneakyThrows; import org.apache.bifromq.basekv.proto.Boundary; import org.apache.bifromq.basekv.proto.KVRangeId; -import org.apache.bifromq.basekv.store.api.IKVCloseableReader; import org.apache.bifromq.basekv.store.api.IKVIterator; import org.apache.bifromq.basekv.store.api.IKVRangeCoProc; +import org.apache.bifromq.basekv.store.api.IKVRangeReader; +import org.apache.bifromq.basekv.store.api.IKVRangeRefreshableReader; import org.apache.bifromq.basekv.store.api.IKVWriter; import org.apache.bifromq.basekv.store.proto.ROCoProcInput; import org.apache.bifromq.basekv.store.proto.ROCoProcOutput; @@ -86,8 +87,9 @@ public class DistWorkerCoProcTest { private ITenantsStats tenantsState; private IDeliverExecutorGroup deliverExecutorGroup; private ISubscriptionCleaner subscriptionChecker; - private Supplier readerProvider; - private IKVCloseableReader reader; + private Supplier refreshableReaderProvider; + private IKVRangeRefreshableReader refreshableReader; + private IKVRangeReader reader; private IKVWriter writer; private IKVIterator iterator; private KVRangeId rangeId; @@ -99,16 +101,20 @@ public void setUp() { tenantsState = mock(ITenantsStats.class); deliverExecutorGroup = mock(IDeliverExecutorGroup.class); subscriptionChecker = mock(ISubscriptionCleaner.class); - readerProvider = mock(Supplier.class); - reader = mock(IKVCloseableReader.class); + refreshableReaderProvider = mock(Supplier.class); + reader = mock(IKVRangeReader.class); + refreshableReader = mock(IKVRangeRefreshableReader.class); iterator = mock(IKVIterator.class); writer = mock(IKVWriter.class); rangeId = KVRangeId.newBuilder().setId(1).setEpoch(1).build(); - when(readerProvider.get()).thenReturn(reader); + when(refreshableReaderProvider.get()).thenReturn(refreshableReader); + when(refreshableReader.boundary()).thenReturn(FULL_BOUNDARY); + when(refreshableReader.iterator()).thenReturn(iterator); + when(reader.boundary()).thenReturn(FULL_BOUNDARY); when(reader.iterator()).thenReturn(iterator); when(iterator.isValid()).thenReturn(false); - distWorkerCoProc = new DistWorkerCoProc(rangeId, readerProvider, routeCache, tenantsState, deliverExecutorGroup, + distWorkerCoProc = new DistWorkerCoProc(rangeId, refreshableReaderProvider, routeCache, tenantsState, deliverExecutorGroup, subscriptionChecker); distWorkerCoProc.reset(FULL_BOUNDARY); distWorkerCoProc.onLeader(true); @@ -303,7 +309,7 @@ public void testAddRouteExceedFirstRouteTriggersRefresh() { verify(routeCache, atLeast(1)).refresh(any()); verify(tenantsState, times(1)).incNormalRoutes(eq("tenant0"), eq(1)); - verify(reader, times(1)).refresh(); + verify(refreshableReader, times(1)).refresh(); } @SneakyThrows @@ -346,7 +352,7 @@ public void testAddRouteExceedLastRouteTriggersRefresh() { verify(routeCache, atLeast(1)).refresh(any()); verify(tenantsState, times(1)).incNormalRoutes(eq("tenantC"), eq(1)); - verify(reader, times(1)).refresh(); + verify(refreshableReader, times(1)).refresh(); } @SneakyThrows @@ -389,7 +395,7 @@ public void testRemoveRouteAffectFirstRouteTriggersRefresh() { && m.get("tenantA").routes.keySet() .contains(TopicUtil.from("topicA")))); verify(tenantsState, times(1)).decNormalRoutes(eq("tenantA"), eq(1)); - verify(reader, times(1)).refresh(); + verify(refreshableReader, times(1)).refresh(); } @SneakyThrows @@ -431,7 +437,7 @@ public void testRemoveRouteAffectLastRouteTriggersRefresh() { verify(routeCache, times(1)).refresh(argThat(m -> m.containsKey("tenantB") && m.get("tenantB").routes.keySet().contains(TopicUtil.from("topicB")))); verify(tenantsState, times(1)).decNormalRoutes(eq("tenantB"), eq(1)); - verify(reader, times(1)).refresh(); + verify(refreshableReader, times(1)).refresh(); } @Test diff --git a/bifromq-dist/bifromq-dist-worker/src/test/java/org/apache/bifromq/dist/worker/DistWorkerTest.java b/bifromq-dist/bifromq-dist-worker/src/test/java/org/apache/bifromq/dist/worker/DistWorkerTest.java index a75b6166f..b3ac82b99 100644 --- a/bifromq-dist/bifromq-dist-worker/src/test/java/org/apache/bifromq/dist/worker/DistWorkerTest.java +++ b/bifromq-dist/bifromq-dist-worker/src/test/java/org/apache/bifromq/dist/worker/DistWorkerTest.java @@ -20,6 +20,9 @@ package org.apache.bifromq.dist.worker; import static org.apache.bifromq.basekv.client.KVRangeRouterUtil.findByKey; +import static org.apache.bifromq.basekv.localengine.StructUtil.toValue; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.DB_CHECKPOINT_ROOT_DIR; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.DB_ROOT_DIR; import static org.apache.bifromq.dist.worker.schema.KVSchemaUtil.tenantBeginKey; import static org.apache.bifromq.dist.worker.schema.KVSchemaUtil.toGroupRouteKey; import static org.apache.bifromq.dist.worker.schema.KVSchemaUtil.toNormalRouteKey; @@ -40,6 +43,7 @@ import static org.testng.Assert.assertTrue; import com.google.protobuf.ByteString; +import com.google.protobuf.Struct; import io.micrometer.core.instrument.Metrics; import io.micrometer.core.instrument.simple.SimpleMeterRegistry; import java.io.File; @@ -68,8 +72,6 @@ import org.apache.bifromq.basehlc.HLC; import org.apache.bifromq.basekv.client.IBaseKVStoreClient; import org.apache.bifromq.basekv.client.KVRangeSetting; -import org.apache.bifromq.basekv.localengine.rocksdb.RocksDBCPableKVEngineConfigurator; -import org.apache.bifromq.basekv.localengine.rocksdb.RocksDBWALableKVEngineConfigurator; import org.apache.bifromq.basekv.metaservice.IBaseKVMetaService; import org.apache.bifromq.basekv.store.option.KVRangeStoreOptions; import org.apache.bifromq.basekv.store.proto.KVRangeROReply; @@ -206,12 +208,18 @@ public void setup() { String uuid = UUID.randomUUID().toString(); KVRangeStoreOptions options = new KVRangeStoreOptions(); - ((RocksDBCPableKVEngineConfigurator) options.getDataEngineConfigurator()) - .dbCheckpointRootDir(Paths.get(dbRootDir.toString(), DB_CHECKPOINT_DIR_NAME, uuid) - .toString()) - .dbRootDir(Paths.get(dbRootDir.toString(), DB_NAME, uuid).toString()); - ((RocksDBWALableKVEngineConfigurator) options.getWalEngineConfigurator()) - .dbRootDir(Paths.get(dbRootDir.toString(), DB_WAL_NAME, uuid).toString()); + Struct dataConf = options.getDataEngineConf().toBuilder() + .putFields(DB_ROOT_DIR, toValue(Paths.get(dbRootDir.toString(), DB_NAME, uuid).toString())) + .putFields(DB_CHECKPOINT_ROOT_DIR, + toValue(Paths.get(dbRootDir.toString(), DB_CHECKPOINT_DIR_NAME, uuid).toString())) + .build(); + options.setDataEngineType(options.getDataEngineType()); + options.setDataEngineConf(dataConf); + Struct walConf = options.getWalEngineConf().toBuilder() + .putFields(DB_ROOT_DIR, toValue(Paths.get(dbRootDir.toString(), DB_WAL_NAME, uuid).toString())) + .build(); + options.setWalEngineType(options.getWalEngineType()); + options.setWalEngineConf(walConf); storeClient = IBaseKVStoreClient .newBuilder() diff --git a/bifromq-dist/bifromq-dist-worker/src/test/java/org/apache/bifromq/dist/worker/TenantsStatesTest.java b/bifromq-dist/bifromq-dist-worker/src/test/java/org/apache/bifromq/dist/worker/TenantsStatesTest.java index 6ff3df04e..2cef30e3a 100644 --- a/bifromq-dist/bifromq-dist-worker/src/test/java/org/apache/bifromq/dist/worker/TenantsStatesTest.java +++ b/bifromq-dist/bifromq-dist-worker/src/test/java/org/apache/bifromq/dist/worker/TenantsStatesTest.java @@ -29,8 +29,8 @@ import java.util.function.Supplier; import lombok.SneakyThrows; -import org.apache.bifromq.basekv.store.api.IKVCloseableReader; import org.apache.bifromq.basekv.store.api.IKVIterator; +import org.apache.bifromq.basekv.store.api.IKVRangeRefreshableReader; import org.mockito.Mock; import org.mockito.MockitoAnnotations; import org.testng.annotations.AfterMethod; @@ -39,9 +39,9 @@ public class TenantsStatesTest extends MeterTest { @Mock - private Supplier readerSupplier; + private Supplier readerSupplier; @Mock - private IKVCloseableReader reader; + private IKVRangeRefreshableReader reader; @Mock private IKVIterator iterator; private AutoCloseable closeable; @@ -122,6 +122,9 @@ public void testClose() { assertGauge(tenantId, MqttRouteNumGauge); assertGauge(tenantId, MqttSharedSubNumGauge); + // ensure reader is created and closed via gauge sampling + assertGaugeValue(tenantId, MqttRouteSpaceGauge, 1); + tenantsState.close(); assertNoGauge(tenantId, MqttRouteSpaceGauge); assertNoGauge(tenantId, MqttRouteNumGauge); diff --git a/bifromq-dist/bifromq-dist-worker/src/test/java/org/apache/bifromq/dist/worker/cache/SubscriptionCacheTest.java b/bifromq-dist/bifromq-dist-worker/src/test/java/org/apache/bifromq/dist/worker/cache/SubscriptionCacheTest.java index 461a90f18..d6cd11f40 100644 --- a/bifromq-dist/bifromq-dist-worker/src/test/java/org/apache/bifromq/dist/worker/cache/SubscriptionCacheTest.java +++ b/bifromq-dist/bifromq-dist-worker/src/test/java/org/apache/bifromq/dist/worker/cache/SubscriptionCacheTest.java @@ -46,10 +46,8 @@ import java.util.concurrent.CompletableFuture; import java.util.concurrent.Executor; import java.util.concurrent.Executors; -import java.util.function.Supplier; import org.apache.bifromq.basekv.proto.Boundary; import org.apache.bifromq.basekv.proto.KVRangeId; -import org.apache.bifromq.basekv.store.api.IKVCloseableReader; import org.apache.bifromq.basekv.utils.KVRangeIdUtil; import org.apache.bifromq.dist.worker.Comparators; import org.apache.bifromq.dist.worker.cache.task.AddRoutesTask; @@ -66,7 +64,6 @@ public class SubscriptionCacheTest { private SubscriptionCache cache; private ITenantRouteCacheFactory tenantRouteCacheFactoryMock; private ITenantRouteCache tenantRouteCacheMock; - private Supplier readerSupplierMock; private Executor matchExecutor; private Ticker tickerMock; @@ -74,7 +71,6 @@ public class SubscriptionCacheTest { public void setUp() { tenantRouteCacheFactoryMock = mock(ITenantRouteCacheFactory.class); tenantRouteCacheMock = mock(ITenantRouteCache.class); - readerSupplierMock = mock(Supplier.class); matchExecutor = Executors.newSingleThreadExecutor(); tickerMock = mock(Ticker.class); diff --git a/bifromq-dist/bifromq-dist-worker/src/test/java/org/apache/bifromq/dist/worker/cache/TenantRouteMatcherTest.java b/bifromq-dist/bifromq-dist-worker/src/test/java/org/apache/bifromq/dist/worker/cache/TenantRouteMatcherTest.java index 9bf6938c3..060f05493 100644 --- a/bifromq-dist/bifromq-dist-worker/src/test/java/org/apache/bifromq/dist/worker/cache/TenantRouteMatcherTest.java +++ b/bifromq-dist/bifromq-dist-worker/src/test/java/org/apache/bifromq/dist/worker/cache/TenantRouteMatcherTest.java @@ -44,8 +44,10 @@ import java.util.concurrent.CopyOnWriteArrayList; import java.util.function.Supplier; import org.apache.bifromq.basekv.proto.Boundary; +import org.apache.bifromq.basekv.proto.State; +import org.apache.bifromq.basekv.raft.proto.ClusterConfig; import org.apache.bifromq.basekv.store.api.IKVIterator; -import org.apache.bifromq.basekv.store.api.IKVReader; +import org.apache.bifromq.basekv.store.api.IKVRangeRefreshableReader; import org.apache.bifromq.basekv.utils.BoundaryUtil; import org.apache.bifromq.dist.rpc.proto.RouteGroup; import org.apache.bifromq.dist.worker.schema.cache.GroupMatching; @@ -247,7 +249,7 @@ public void isolateRoutesByTenant() { kvData.put(toNormalRouteKey(OTHER_TENANT, otherTenantRoute.matcher, otherTenantRoute.receiverUrl()), BSUtil.toByteString(otherTenantRoute.incarnation())); - Supplier readerSupplier = () -> new TreeMapKVReader(kvData); + Supplier readerSupplier = () -> new TreeMapKVReader(kvData); TenantRouteMatcher matcherTenant = new TenantRouteMatcher(TENANT_ID, readerSupplier, eventCollector, matchTimer); @@ -339,7 +341,7 @@ public void triggerGroupFanoutThrottling() { assertEquals(event.maxCount(), 1); } - private static final class TreeMapKVReader implements IKVReader { + private static final class TreeMapKVReader implements IKVRangeRefreshableReader { private final NavigableMap data; private TreeMapKVIterator lastIterator; @@ -347,6 +349,21 @@ private TreeMapKVReader(NavigableMap data) { this.data = data; } + @Override + public long version() { + return 0; + } + + @Override + public State state() { + return State.newBuilder().setType(State.StateType.Normal).build(); + } + + @Override + public long lastAppliedIndex() { + return 0; + } + @Override public Boundary boundary() { if (data.isEmpty()) { @@ -357,6 +374,11 @@ public Boundary boundary() { return toBoundary(start, end); } + @Override + public ClusterConfig clusterConfig() { + return ClusterConfig.newBuilder().build(); + } + @Override public long size(Boundary boundary) { if (data.isEmpty()) { @@ -390,6 +412,27 @@ public IKVIterator iterator() { return lastIterator; } + @Override + public IKVIterator iterator(Boundary boundary) { + // create iterator limited by boundary + ByteString start = BoundaryUtil.startKey(boundary); + ByteString end = BoundaryUtil.endKey(boundary); + NavigableMap sub = data; + if (start != null) { + sub = sub.tailMap(start, true); + } + if (end != null) { + sub = sub.headMap(end, false); + } + lastIterator = new TreeMapKVIterator(sub); + return lastIterator; + } + + @Override + public void close() { + + } + @Override public void refresh() { // no-op for in-memory stub @@ -473,6 +516,11 @@ public void seekForPrev(ByteString key) { seekCount++; } + @Override + public void close() { + + } + int getSeekCount() { return seekCount; } diff --git a/bifromq-dist/bifromq-dist-worker/src/test/java/org/apache/bifromq/dist/worker/cache/ThreadLocalKVReaderTest.java b/bifromq-dist/bifromq-dist-worker/src/test/java/org/apache/bifromq/dist/worker/cache/ThreadLocalKVReaderTest.java index 4fdb39e25..6d0d79143 100644 --- a/bifromq-dist/bifromq-dist-worker/src/test/java/org/apache/bifromq/dist/worker/cache/ThreadLocalKVReaderTest.java +++ b/bifromq-dist/bifromq-dist-worker/src/test/java/org/apache/bifromq/dist/worker/cache/ThreadLocalKVReaderTest.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.dist.worker.cache; @@ -28,29 +28,28 @@ import static org.testng.Assert.assertSame; import static org.testng.Assert.assertTrue; -import org.apache.bifromq.basekv.store.api.IKVCloseableReader; -import org.apache.bifromq.basekv.store.api.IKVReader; import java.util.HashSet; import java.util.Set; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.function.Supplier; +import org.apache.bifromq.basekv.store.api.IKVRangeRefreshableReader; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; public class ThreadLocalKVReaderTest { - private Supplier readerSupplierMock; - private IKVCloseableReader readerMock1; - private IKVCloseableReader readerMock2; + private Supplier readerSupplierMock; + private IKVRangeRefreshableReader readerMock1; + private IKVRangeRefreshableReader readerMock2; private ThreadLocalKVReader threadLocalKVReader; @BeforeMethod void setUp() { readerSupplierMock = mock(Supplier.class); - readerMock1 = mock(IKVCloseableReader.class); - readerMock2 = mock(IKVCloseableReader.class); + readerMock1 = mock(IKVRangeRefreshableReader.class); + readerMock2 = mock(IKVRangeRefreshableReader.class); // 模拟 readerSupplier 返回不同的 reader 实例 when(readerSupplierMock.get()) @@ -62,16 +61,16 @@ void setUp() { @Test void singleThreadGet() { - IKVReader reader = threadLocalKVReader.get(); + IKVRangeRefreshableReader reader = threadLocalKVReader.get(); assertSame(readerMock1, reader); - IKVReader readerAgain = threadLocalKVReader.get(); + IKVRangeRefreshableReader readerAgain = threadLocalKVReader.get(); assertSame(readerMock1, readerAgain); } @Test void multiThreadGet() throws InterruptedException { - Set readers = new HashSet<>(); + Set readers = new HashSet<>(); CountDownLatch latch = new CountDownLatch(2); ExecutorService executorService = Executors.newFixedThreadPool(2); diff --git a/bifromq-inbox/bifromq-inbox-coproc-proto/src/main/proto/inboxservice/InboxStoreCoProc.proto b/bifromq-inbox/bifromq-inbox-coproc-proto/src/main/proto/inboxservice/InboxStoreCoProc.proto index 0d93c3c60..3aae19bdd 100644 --- a/bifromq-inbox/bifromq-inbox-coproc-proto/src/main/proto/inboxservice/InboxStoreCoProc.proto +++ b/bifromq-inbox/bifromq-inbox-coproc-proto/src/main/proto/inboxservice/InboxStoreCoProc.proto @@ -234,7 +234,23 @@ message InsertRequest{ } message BatchInsertRequest{ + message SubRef{ + repeated MatchedRoute matchedRoute = 1; + uint32 messagePackIndex = 2; + } + + message InsertRef{ + string tenantId = 1; + string inboxId = 2; + uint64 incarnation = 3; + repeated SubRef subRef = 4; + } + + // legacy non-compact format: will be removed in next major version repeated InsertRequest request = 1; + // new compact format + repeated commontype.TopicMessagePack topicMessagePack = 2; + repeated InsertRef insertRef = 3; } message InsertResult{ @@ -362,4 +378,4 @@ message InboxServiceROCoProcOutput{ GCReply gc = 7; BatchFetchInboxStateReply fetchInboxState = 8; } -} \ No newline at end of file +} diff --git a/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/FetchSignalSender.java b/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/FetchSignalSender.java new file mode 100644 index 000000000..dab48f853 --- /dev/null +++ b/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/FetchSignalSender.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.inbox.server; + +import io.micrometer.core.instrument.Metrics; +import io.micrometer.core.instrument.binder.jvm.ExecutorServiceMetrics; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.ForkJoinPool; +import java.util.concurrent.ForkJoinWorkerThread; +import java.util.concurrent.atomic.AtomicInteger; +import org.apache.bifromq.baseenv.EnvProvider; + +class FetchSignalSender { + public static ExecutorService INSTANCE = ExecutorServiceMetrics.monitor(Metrics.globalRegistry, + new ForkJoinPool(Math.max(2, EnvProvider.INSTANCE.availableProcessors() / 4), + new ForkJoinPool.ForkJoinWorkerThreadFactory() { + final AtomicInteger index = new AtomicInteger(0); + + @Override + public ForkJoinWorkerThread newThread(ForkJoinPool pool) { + ForkJoinWorkerThread worker = ForkJoinPool.defaultForkJoinWorkerThreadFactory.newThread(pool); + worker.setName(String.format("fetch-signaler-%d", index.incrementAndGet())); + worker.setDaemon(true); + return worker; + } + }, null, false), "fetch-signaler"); +} diff --git a/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/FetcherSignaler.java b/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/FetcherSignaler.java index 9389a4146..16de6b6ee 100644 --- a/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/FetcherSignaler.java +++ b/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/FetcherSignaler.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.inbox.server; @@ -29,10 +29,10 @@ public FetcherSignaler(IInboxFetcherRegistry registry) { } @Override - public void afterWrite(TenantInboxInstance tenantInboxInstance, String delivererKey) { + public void afterWrite(TenantInboxInstance tenantInboxInstance, String delivererKey, long now) { for (IInboxFetcher fetcher : registry.get(tenantInboxInstance.tenantId(), delivererKey)) { if (fetcher.signalFetch(tenantInboxInstance.instance().inboxId(), - tenantInboxInstance.instance().incarnation())) { + tenantInboxInstance.instance().incarnation(), now)) { break; } } diff --git a/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/IInboxFetcher.java b/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/IInboxFetcher.java index 8c754d65c..7994f2a48 100644 --- a/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/IInboxFetcher.java +++ b/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/IInboxFetcher.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.inbox.server; @@ -26,7 +26,7 @@ public interface IInboxFetcher { String delivererKey(); - boolean signalFetch(String inboxId, long incarnation); + boolean signalFetch(String inboxId, long incarnation, long now); void close(); } diff --git a/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/InboxFetchPipeline.java b/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/InboxFetchPipeline.java index b80211acf..230012058 100644 --- a/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/InboxFetchPipeline.java +++ b/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/InboxFetchPipeline.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.inbox.server; @@ -23,14 +23,6 @@ import static org.apache.bifromq.inbox.util.PipelineUtil.PIPELINE_ATTR_KEY_DELIVERERKEY; import static org.apache.bifromq.inbox.util.PipelineUtil.PIPELINE_ATTR_KEY_ID; -import org.apache.bifromq.baserpc.server.AckStream; -import org.apache.bifromq.basescheduler.exception.BackPressureException; -import org.apache.bifromq.basescheduler.exception.BatcherUnavailableException; -import org.apache.bifromq.inbox.rpc.proto.InboxFetchHint; -import org.apache.bifromq.inbox.rpc.proto.InboxFetched; -import org.apache.bifromq.inbox.server.scheduler.FetchRequest; -import org.apache.bifromq.inbox.storage.proto.BatchFetchRequest; -import org.apache.bifromq.inbox.storage.proto.Fetched; import io.grpc.stub.StreamObserver; import io.reactivex.rxjava3.disposables.Disposable; import java.util.Collections; @@ -44,6 +36,14 @@ import java.util.concurrent.atomic.AtomicLong; import lombok.ToString; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.baserpc.server.AckStream; +import org.apache.bifromq.basescheduler.exception.BackPressureException; +import org.apache.bifromq.basescheduler.exception.BatcherUnavailableException; +import org.apache.bifromq.inbox.rpc.proto.InboxFetchHint; +import org.apache.bifromq.inbox.rpc.proto.InboxFetched; +import org.apache.bifromq.inbox.server.scheduler.FetchRequest; +import org.apache.bifromq.inbox.storage.proto.BatchFetchRequest; +import org.apache.bifromq.inbox.storage.proto.Fetched; @Slf4j final class InboxFetchPipeline extends AckStream implements IInboxFetcher { @@ -123,15 +123,15 @@ public void send(InboxFetched message) { } @Override - public boolean signalFetch(String inboxId, long incarnation) { + public boolean signalFetch(String inboxId, long incarnation, long now) { log.trace("Signal fetch: tenantId={}, inboxId={}", tenantId, inboxId); // signal fetch won't refresh expiry Set sessionIds = inboxSessionMap.getOrDefault(new InboxId(inboxId, incarnation), Collections.emptySet()); for (Long sessionId : sessionIds) { FetchState fetchState = inboxFetchSessions.get(sessionId); - if (fetchState != null) { + if (fetchState != null && fetchState.signalFetchTS.get() < now) { fetchState.hasMore.set(true); - fetchState.signalFetchTS.set(System.nanoTime()); + fetchState.signalFetchTS.set(now); fetch(fetchState); } } @@ -155,7 +155,9 @@ private void fetch(FetchState fetchState) { if (closed) { return; } - if (fetchState.fetching.compareAndSet(false, true)) { + if (fetchState.hasMore.get() + && fetchState.downStreamCapacity.get() > 0 + && fetchState.fetching.compareAndSet(false, true)) { long sessionId = fetchState.sessionId; String inboxId = fetchState.inboxId; long incarnation = fetchState.incarnation; diff --git a/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/InboxWriter.java b/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/InboxWriter.java index 77c0d4280..9e7f73b62 100644 --- a/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/InboxWriter.java +++ b/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/InboxWriter.java @@ -24,6 +24,7 @@ import java.time.Duration; import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; @@ -64,8 +65,8 @@ class InboxWriter implements InboxWriterPipeline.ISendRequestHandler { @Override public CompletableFuture handle(SendRequest request) { - Map> matchInfosByInbox = new HashMap<>(); - Map> subMsgPacksByInbox = new HashMap<>(); + Map> matchInfosByInbox = new LinkedHashMap<>(); + Map> subMsgPacksByInbox = new LinkedHashMap<>(); // break DeliveryPack into SubMessagePack by each TenantInboxInstance for (String tenantId : request.getRequest().getPackageMap().keySet()) { for (DeliveryPack pack : request.getRequest().getPackageMap().get(tenantId).getPackList()) { diff --git a/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/InboxWriterPipeline.java b/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/InboxWriterPipeline.java index 70cdf2f62..b7e879d26 100644 --- a/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/InboxWriterPipeline.java +++ b/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/InboxWriterPipeline.java @@ -14,13 +14,19 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.inbox.server; import static org.apache.bifromq.inbox.util.PipelineUtil.PIPELINE_ATTR_KEY_DELIVERERKEY; +import io.grpc.stub.StreamObserver; +import java.time.Duration; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.concurrent.atomic.AtomicBoolean; +import lombok.extern.slf4j.Slf4j; import org.apache.bifromq.baseenv.MemUsage; import org.apache.bifromq.baserpc.server.ResponsePipeline; import org.apache.bifromq.inbox.record.TenantInboxInstance; @@ -30,10 +36,6 @@ import org.apache.bifromq.sysprops.props.IngressSlowDownDirectMemoryUsage; import org.apache.bifromq.sysprops.props.IngressSlowDownHeapMemoryUsage; import org.apache.bifromq.sysprops.props.MaxSlowDownTimeoutSeconds; -import io.grpc.stub.StreamObserver; -import java.time.Duration; -import java.util.concurrent.CompletableFuture; -import lombok.extern.slf4j.Slf4j; @Slf4j class InboxWriterPipeline extends ResponsePipeline { @@ -43,6 +45,8 @@ class InboxWriterPipeline extends ResponsePipeline { private final IWriteCallback writeCallback; private final ISendRequestHandler handler; private final String delivererKey; + private final ConcurrentLinkedQueue tasks = new ConcurrentLinkedQueue<>(); + private final AtomicBoolean draining = new AtomicBoolean(false); public InboxWriterPipeline(IWriteCallback writeCallback, ISendRequestHandler handler, @@ -57,26 +61,96 @@ public InboxWriterPipeline(IWriteCallback writeCallback, @Override protected CompletableFuture handleRequest(String ignore, SendRequest request) { log.trace("Received inbox write request: deliverer={}, \n{}", delivererKey, request); + return submitAndTrigger(request); + } + + private CompletableFuture submitAndTrigger(SendRequest request) { + WriteTask task = new WriteTask(request); + tasks.add(task); + task.replyFuture.whenComplete((v, e) -> drain()); + bridge(doWrite(task.request), task.replyFuture); + return task.onDone; + } + + private void drain() { + // Drain in FIFO order without recursion + while (true) { + if (!draining.compareAndSet(false, true)) { + return; + } + try { + while (true) { + WriteTask head = tasks.peek(); + if (head == null) { + // nothing to emit + break; + } + if (!head.replyFuture.isDone()) { + // head not ready, wait for its completion callback to re-trigger drain + break; + } + // head is ready, emit and remove + tasks.poll(); + bridge(head.replyFuture, head.onDone); + } + } finally { + draining.set(false); + } + // loop to check again in case new head becomes ready right after releasing the flag + WriteTask head = tasks.peek(); + if (head == null || !head.replyFuture.isDone()) { + return; + } + } + } + + private void bridge(CompletableFuture from, CompletableFuture to) { + from.whenComplete((v, e) -> { + if (e != null) { + to.completeExceptionally(e); + } else { + to.complete(v); + } + }); + } + + private CompletableFuture doWrite(SendRequest request) { return handler.handle(request) .thenApply(v -> { - v.getReply().getResultMap() - .forEach((tenantId, deliveryResults) -> - deliveryResults.getResultList() - .forEach(result -> { - if (result.getCode() == DeliveryResult.Code.OK) { - writeCallback.afterWrite(TenantInboxInstance.from(tenantId, result.getMatchInfo()), - delivererKey); - } - })); + FetchSignalSender.INSTANCE.execute(() -> { + long now = System.nanoTime(); + v.getReply().getResultMap() + .forEach((tenantId, deliveryResults) -> + deliveryResults.getResultList() + .forEach(result -> { + if (result.getCode() == DeliveryResult.Code.OK) { + writeCallback.afterWrite( + TenantInboxInstance.from(tenantId, result.getMatchInfo()), + delivererKey, now); + } + })); + }); return v; }); } interface IWriteCallback { - void afterWrite(TenantInboxInstance tenantInboxInstance, String delivererKey); + void afterWrite(TenantInboxInstance tenantInboxInstance, String delivererKey, long now); } interface ISendRequestHandler { CompletableFuture handle(SendRequest request); } + + private static class WriteTask { + final SendRequest request; + final CompletableFuture replyFuture; + final CompletableFuture onDone; + + WriteTask(SendRequest request) { + this.request = request; + this.replyFuture = new CompletableFuture<>(); + this.onDone = new CompletableFuture<>(); + } + } } diff --git a/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/scheduler/BatchInsertCall.java b/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/scheduler/BatchInsertCall.java index 8d4f81acd..cc21046f6 100644 --- a/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/scheduler/BatchInsertCall.java +++ b/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/scheduler/BatchInsertCall.java @@ -14,11 +14,17 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.inbox.server.scheduler; +import java.util.HashSet; +import java.util.IdentityHashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Queue; +import java.util.Set; import org.apache.bifromq.basekv.client.IMutationPipeline; import org.apache.bifromq.basekv.client.scheduler.BatchMutationCall; import org.apache.bifromq.basekv.client.scheduler.MutationCallBatcherKey; @@ -31,9 +37,8 @@ import org.apache.bifromq.inbox.storage.proto.InboxServiceRWCoProcInput; import org.apache.bifromq.inbox.storage.proto.InsertRequest; import org.apache.bifromq.inbox.storage.proto.InsertResult; -import java.util.HashSet; -import java.util.Queue; -import java.util.Set; +import org.apache.bifromq.inbox.storage.proto.SubMessagePack; +import org.apache.bifromq.type.TopicMessagePack; class BatchInsertCall extends BatchMutationCall { protected BatchInsertCall(IMutationPipeline pipeline, MutationCallBatcherKey batcherKey) { @@ -49,7 +54,38 @@ protected MutationCallTaskBatch newBatch(long ver) protected RWCoProcInput makeBatch( Iterable> callTasks) { BatchInsertRequest.Builder reqBuilder = BatchInsertRequest.newBuilder(); - callTasks.forEach(call -> reqBuilder.addRequest(call.call())); + // legacy non-compact format for backward compatibility + // callTasks.forEach(call -> reqBuilder.addRequest(call.call())); + + // build message pool and insert references; + IdentityHashMap poolIndex = new IdentityHashMap<>(); + List pool = new LinkedList<>(); + List insertRefs = new LinkedList<>(); + + for (ICallTask call : callTasks) { + InsertRequest req = call.call(); + BatchInsertRequest.InsertRef.Builder refBuilder = BatchInsertRequest.InsertRef.newBuilder() + .setTenantId(req.getTenantId()) + .setInboxId(req.getInboxId()) + .setIncarnation(req.getIncarnation()); + for (SubMessagePack subPack : req.getMessagePackList()) { + TopicMessagePack msgPack = subPack.getMessages(); + Integer idx = poolIndex.get(msgPack); + if (idx == null) { + idx = pool.size(); + poolIndex.put(msgPack, idx); + pool.add(msgPack); + } + BatchInsertRequest.SubRef.Builder subRef = BatchInsertRequest.SubRef.newBuilder() + .addAllMatchedRoute(subPack.getMatchedRouteList()) + .setMessagePackIndex(idx); + refBuilder.addSubRef(subRef.build()); + } + insertRefs.add(refBuilder.build()); + } + reqBuilder.addAllTopicMessagePack(pool); + reqBuilder.addAllInsertRef(insertRefs); + long reqId = System.nanoTime(); return RWCoProcInput.newBuilder() .setInboxService(InboxServiceRWCoProcInput.newBuilder() diff --git a/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/scheduler/InboxCheckSubScheduler.java b/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/scheduler/InboxCheckSubScheduler.java index cb39d2961..e19bc5e4b 100644 --- a/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/scheduler/InboxCheckSubScheduler.java +++ b/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/scheduler/InboxCheckSubScheduler.java @@ -35,7 +35,7 @@ public InboxCheckSubScheduler(IBaseKVStoreClient inboxStoreClient) { @Override protected boolean isLinearizable(CheckMatchInfo request) { - return true; + return false; } @Override diff --git a/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/scheduler/InboxExistScheduler.java b/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/scheduler/InboxExistScheduler.java index b5a67a90f..415f8056e 100644 --- a/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/scheduler/InboxExistScheduler.java +++ b/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/scheduler/InboxExistScheduler.java @@ -14,19 +14,19 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.inbox.server.scheduler; import static org.apache.bifromq.inbox.store.schema.KVSchemaUtil.inboxStartKeyPrefix; +import com.google.protobuf.ByteString; +import lombok.extern.slf4j.Slf4j; import org.apache.bifromq.basekv.client.IBaseKVStoreClient; import org.apache.bifromq.inbox.rpc.proto.ExistReply; import org.apache.bifromq.inbox.rpc.proto.ExistRequest; import org.apache.bifromq.sysprops.props.InboxCheckQueuesPerRange; -import com.google.protobuf.ByteString; -import lombok.extern.slf4j.Slf4j; @Slf4j public class InboxExistScheduler extends InboxReadScheduler @@ -37,7 +37,7 @@ public InboxExistScheduler(IBaseKVStoreClient inboxStoreClient) { @Override protected boolean isLinearizable(ExistRequest request) { - return true; + return false; } @Override diff --git a/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/scheduler/InboxFetchScheduler.java b/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/scheduler/InboxFetchScheduler.java index 956eccccd..622054b1b 100644 --- a/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/scheduler/InboxFetchScheduler.java +++ b/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/scheduler/InboxFetchScheduler.java @@ -14,19 +14,20 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.inbox.server.scheduler; import static org.apache.bifromq.inbox.store.schema.KVSchemaUtil.inboxInstanceStartKey; +import com.google.common.hash.Hashing; +import com.google.protobuf.ByteString; +import java.nio.charset.StandardCharsets; +import lombok.extern.slf4j.Slf4j; import org.apache.bifromq.basekv.client.IBaseKVStoreClient; import org.apache.bifromq.inbox.storage.proto.Fetched; import org.apache.bifromq.sysprops.props.InboxFetchQueuesPerRange; -import com.google.protobuf.ByteString; -import java.util.Objects; -import lombok.extern.slf4j.Slf4j; @Slf4j public class InboxFetchScheduler extends InboxReadScheduler @@ -37,16 +38,20 @@ public InboxFetchScheduler(IBaseKVStoreClient inboxStoreClient) { @Override protected int selectQueue(FetchRequest request) { - int idx = Objects.hash(request.tenantId(), request.inboxId(), request.incarnation()) % queuesPerRange; - if (idx < 0) { - idx += queuesPerRange; - } - return idx; + // use Murmur3_32 to improve distribution and reduce low-bit modulo bias + int hash = Hashing.murmur3_32_fixed() + .newHasher() + .putString(request.tenantId(), StandardCharsets.UTF_8) + .putString(request.inboxId(), StandardCharsets.UTF_8) + .putLong(request.incarnation()) + .hash() + .asInt(); + return Math.floorMod(hash, queuesPerRange); } @Override protected boolean isLinearizable(FetchRequest request) { - return true; + return false; } @Override diff --git a/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/scheduler/InboxFetchStateScheduler.java b/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/scheduler/InboxFetchStateScheduler.java index ce1595b61..55c52ea71 100644 --- a/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/scheduler/InboxFetchStateScheduler.java +++ b/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/scheduler/InboxFetchStateScheduler.java @@ -39,7 +39,7 @@ public InboxFetchStateScheduler(IBaseKVStoreClient inboxStoreClient) { @Override protected boolean isLinearizable(InboxStateRequest request) { - return true; + return false; } @Override diff --git a/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/scheduler/InboxReadScheduler.java b/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/scheduler/InboxReadScheduler.java index 54daba5e9..898d01fd1 100644 --- a/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/scheduler/InboxReadScheduler.java +++ b/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/scheduler/InboxReadScheduler.java @@ -14,19 +14,19 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.inbox.server.scheduler; +import com.google.common.base.Preconditions; +import java.time.Duration; +import java.util.concurrent.ThreadLocalRandom; import org.apache.bifromq.basekv.client.IBaseKVStoreClient; import org.apache.bifromq.basekv.client.scheduler.BatchQueryCall; import org.apache.bifromq.basekv.client.scheduler.IBatchQueryCallBuilder; import org.apache.bifromq.basekv.client.scheduler.QueryCallScheduler; import org.apache.bifromq.sysprops.props.DataPlaneMaxBurstLatencyMillis; -import com.google.common.base.Preconditions; -import java.time.Duration; -import java.util.concurrent.ThreadLocalRandom; public abstract class InboxReadScheduler> extends QueryCallScheduler { @@ -34,7 +34,7 @@ public abstract class InboxReadScheduler batchQueryCallBuilder, int queuesPerRange, IBaseKVStoreClient inboxStoreClient) { - super(batchQueryCallBuilder, Duration.ofSeconds(DataPlaneMaxBurstLatencyMillis.INSTANCE.get()).toNanos(), + super(batchQueryCallBuilder, Duration.ofMillis(DataPlaneMaxBurstLatencyMillis.INSTANCE.get()).toNanos(), inboxStoreClient); Preconditions.checkArgument(queuesPerRange > 0, "Queues per range must be positive"); this.queuesPerRange = queuesPerRange; diff --git a/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/scheduler/InboxSendLWTScheduler.java b/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/scheduler/InboxSendLWTScheduler.java index 0602e32cc..dd40ccb2d 100644 --- a/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/scheduler/InboxSendLWTScheduler.java +++ b/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/scheduler/InboxSendLWTScheduler.java @@ -21,12 +21,12 @@ import static org.apache.bifromq.inbox.store.schema.KVSchemaUtil.inboxStartKeyPrefix; +import com.google.protobuf.ByteString; +import lombok.extern.slf4j.Slf4j; import org.apache.bifromq.basekv.client.IBaseKVStoreClient; import org.apache.bifromq.inbox.rpc.proto.SendLWTReply; import org.apache.bifromq.inbox.rpc.proto.SendLWTRequest; import org.apache.bifromq.sysprops.props.InboxCheckQueuesPerRange; -import com.google.protobuf.ByteString; -import lombok.extern.slf4j.Slf4j; @Slf4j public class InboxSendLWTScheduler extends InboxReadScheduler @@ -37,7 +37,7 @@ public InboxSendLWTScheduler(IBaseKVStoreClient inboxStoreClient) { @Override protected boolean isLinearizable(SendLWTRequest request) { - return true; + return false; } @Override diff --git a/bifromq-inbox/bifromq-inbox-server/src/test/java/org/apache/bifromq/inbox/server/InboxExpiryTest.java b/bifromq-inbox/bifromq-inbox-server/src/test/java/org/apache/bifromq/inbox/server/InboxExpiryTest.java index 33ee10fed..8bc96796a 100644 --- a/bifromq-inbox/bifromq-inbox-server/src/test/java/org/apache/bifromq/inbox/server/InboxExpiryTest.java +++ b/bifromq-inbox/bifromq-inbox-server/src/test/java/org/apache/bifromq/inbox/server/InboxExpiryTest.java @@ -169,7 +169,8 @@ public void lwtRetryOnError() { @Test(groups = "integration") public void lwtAfterDetach() { - clearInvocations(distClient); + // ensure no cross-retry or cross-case pollution + clearInvocations(eventCollector, retainClient, distClient); long now = HLC.INST.getPhysical(); long reqId = System.nanoTime(); String tenantId = "traffic-" + System.nanoTime(); @@ -202,12 +203,12 @@ public void lwtAfterDetach() { .setClient(clientInfo) .setNow(now) .build()).join(); - verify(distClient, timeout(2000).times(1)) + verify(distClient, timeout(4000).times(1)) .pub(anyLong(), eq(lwt.getTopic()), argThat(m -> m.getPubQoS() == QoS.AT_LEAST_ONCE && m.getPayload().equals(lwt.getMessage().getPayload())), any()); - verify(eventCollector, timeout(2000)).report(argThat(e -> e.type() == EventType.WILL_DISTED)); + verify(eventCollector, timeout(4000)).report(argThat(e -> e.type() == EventType.WILL_DISTED)); } @Test(groups = "integration") diff --git a/bifromq-inbox/bifromq-inbox-server/src/test/java/org/apache/bifromq/inbox/server/InboxServiceTest.java b/bifromq-inbox/bifromq-inbox-server/src/test/java/org/apache/bifromq/inbox/server/InboxServiceTest.java index bf560d9d8..f28d418a6 100644 --- a/bifromq-inbox/bifromq-inbox-server/src/test/java/org/apache/bifromq/inbox/server/InboxServiceTest.java +++ b/bifromq-inbox/bifromq-inbox-server/src/test/java/org/apache/bifromq/inbox/server/InboxServiceTest.java @@ -26,6 +26,7 @@ import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.Mockito.when; +import com.google.protobuf.Struct; import java.lang.reflect.Method; import java.time.Duration; import java.util.concurrent.CompletableFuture; @@ -38,7 +39,6 @@ import org.apache.bifromq.basecrdt.service.CRDTServiceOptions; import org.apache.bifromq.basecrdt.service.ICRDTService; import org.apache.bifromq.basekv.client.IBaseKVStoreClient; -import org.apache.bifromq.basekv.localengine.memory.InMemKVEngineConfigurator; import org.apache.bifromq.basekv.metaservice.IBaseKVMetaService; import org.apache.bifromq.basekv.store.option.KVRangeStoreOptions; import org.apache.bifromq.basekv.utils.BoundaryUtil; @@ -119,8 +119,11 @@ public void setup() { inboxClient = IInboxClient.newBuilder().trafficService(trafficService).build(); KVRangeStoreOptions kvRangeStoreOptions = new KVRangeStoreOptions(); - kvRangeStoreOptions.setDataEngineConfigurator(new InMemKVEngineConfigurator()); - kvRangeStoreOptions.setWalEngineConfigurator(new InMemKVEngineConfigurator()); + Struct memConf = Struct.newBuilder().build(); + kvRangeStoreOptions.setDataEngineType("memory"); + kvRangeStoreOptions.setDataEngineConf(memConf); + kvRangeStoreOptions.setWalEngineType("memory"); + kvRangeStoreOptions.setWalEngineConf(memConf); bgTaskExecutor = Executors.newSingleThreadScheduledExecutor(); inboxStoreClient = IBaseKVStoreClient.newBuilder() .clusterId(IInboxStore.CLUSTER_NAME) diff --git a/bifromq-inbox/bifromq-inbox-server/src/test/java/org/apache/bifromq/inbox/server/InboxWriterPipelineTest.java b/bifromq-inbox/bifromq-inbox-server/src/test/java/org/apache/bifromq/inbox/server/InboxWriterPipelineTest.java index 8925a25ab..b731d06c9 100644 --- a/bifromq-inbox/bifromq-inbox-server/src/test/java/org/apache/bifromq/inbox/server/InboxWriterPipelineTest.java +++ b/bifromq-inbox/bifromq-inbox-server/src/test/java/org/apache/bifromq/inbox/server/InboxWriterPipelineTest.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.inbox.server; @@ -22,10 +22,22 @@ import static org.apache.bifromq.inbox.server.Fixtures.matchInfo; import static org.apache.bifromq.inbox.server.Fixtures.sendRequest; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyLong; import static org.mockito.Mockito.doNothing; import static org.mockito.Mockito.when; import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import io.grpc.Context; +import io.grpc.stub.ServerCallStreamObserver; +import io.micrometer.core.instrument.Timer; +import io.micrometer.core.instrument.simple.SimpleMeterRegistry; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.TimeUnit; +import lombok.SneakyThrows; +import lombok.extern.slf4j.Slf4j; import org.apache.bifromq.baseenv.MemUsage; import org.apache.bifromq.baserpc.RPCContext; import org.apache.bifromq.baserpc.metrics.IRPCMeter; @@ -38,15 +50,6 @@ import org.apache.bifromq.plugin.subbroker.DeliveryResults; import org.apache.bifromq.sysprops.props.IngressSlowDownDirectMemoryUsage; import org.apache.bifromq.sysprops.props.IngressSlowDownHeapMemoryUsage; -import io.grpc.Context; -import io.grpc.stub.ServerCallStreamObserver; -import io.micrometer.core.instrument.Timer; -import io.micrometer.core.instrument.simple.SimpleMeterRegistry; -import java.util.HashMap; -import java.util.Map; -import java.util.concurrent.CompletableFuture; -import lombok.SneakyThrows; -import lombok.extern.slf4j.Slf4j; import org.mockito.Mock; import org.mockito.MockedStatic; import org.mockito.Mockito; @@ -123,7 +126,7 @@ public void handleRequestDeliveryError() { SendReply mockSendReply = SendReply.newBuilder().setReqId(1) .setReply(DeliveryReply.newBuilder().setCode(DeliveryReply.Code.ERROR).build()).build(); when(inboxWriter.handle(any())).thenReturn(CompletableFuture.completedFuture(mockSendReply)); - doNothing().when(fetcherSignaler).afterWrite(any(), any()); + doNothing().when(fetcherSignaler).afterWrite(any(), any(), anyLong()); InboxWriterPipeline writerPipeline = new InboxWriterPipeline(fetcherSignaler, inboxWriter, responseObserver); SendReply sendReply = writerPipeline.handleRequest("_", sendRequest()).join(); assertEquals(sendReply, mockSendReply); @@ -132,7 +135,7 @@ public void handleRequestDeliveryError() { private void testHandleRequest(DeliveryResult.Code code) { SendReply mockSendReply = createSendReply(code); when(inboxWriter.handle(any())).thenReturn(CompletableFuture.completedFuture(mockSendReply)); - doNothing().when(fetcherSignaler).afterWrite(any(), any()); + doNothing().when(fetcherSignaler).afterWrite(any(), any(), anyLong()); InboxWriterPipeline writerPipeline = new InboxWriterPipeline(fetcherSignaler, inboxWriter, responseObserver); SendReply sendReply = writerPipeline.handleRequest("_", sendRequest()).join(); assertEquals(sendReply, mockSendReply); @@ -168,7 +171,7 @@ public void testConstructorHeapMemoryUsageAllNotCatch() { private void testMemoryUsageThresholdExceed() { when(inboxWriter.handle(any())).thenReturn(CompletableFuture.completedFuture(SendReply.getDefaultInstance())); - doNothing().when(fetcherSignaler).afterWrite(any(), any()); + doNothing().when(fetcherSignaler).afterWrite(any(), any(), anyLong()); try (MockedStatic mocked = Mockito.mockStatic(MemUsage.class)) { mocked.when(MemUsage::local).thenReturn(memUsage); SendRequest sendRequest = SendRequest.getDefaultInstance(); @@ -180,4 +183,31 @@ private void testMemoryUsageThresholdExceed() { } } -} \ No newline at end of file + @Test + public void testFIFOResponseOrder() throws Exception { + CompletableFuture f1 = new CompletableFuture<>(); + CompletableFuture f2 = new CompletableFuture<>(); + + when(inboxWriter.handle(any())).thenReturn(f1).thenReturn(f2); + doNothing().when(fetcherSignaler).afterWrite(any(), any(), anyLong()); + + InboxWriterPipeline writerPipeline = new InboxWriterPipeline(fetcherSignaler, inboxWriter, responseObserver); + + CompletableFuture r1 = writerPipeline.handleRequest("_", sendRequest()); + CompletableFuture r2 = writerPipeline.handleRequest("_", sendRequest()); + + SendReply reply2 = SendReply.newBuilder().setReqId(2) + .setReply(DeliveryReply.newBuilder().setCode(DeliveryReply.Code.OK).build()).build(); + f2.complete(reply2); + + assertFalse(r2.isDone()); + + SendReply reply1 = SendReply.newBuilder().setReqId(1) + .setReply(DeliveryReply.newBuilder().setCode(DeliveryReply.Code.OK).build()).build(); + f1.complete(reply1); + + assertEquals(reply1, r1.get(3, TimeUnit.SECONDS)); + assertEquals(reply2, r2.get(3, TimeUnit.SECONDS)); + } + +} diff --git a/bifromq-inbox/bifromq-inbox-store/src/main/java/org/apache/bifromq/inbox/store/InboxStore.java b/bifromq-inbox/bifromq-inbox-store/src/main/java/org/apache/bifromq/inbox/store/InboxStore.java index 1e8a0452f..b8794178a 100644 --- a/bifromq-inbox/bifromq-inbox-store/src/main/java/org/apache/bifromq/inbox/store/InboxStore.java +++ b/bifromq-inbox/bifromq-inbox-store/src/main/java/org/apache/bifromq/inbox/store/InboxStore.java @@ -77,7 +77,6 @@ class InboxStore implements IInboxStore { builder.resourceThrottler, builder.detachTimeout, builder.metaCacheExpireTime, - builder.loadEstimateWindow, builder.expireRateLimit); Map loadedFactories = BaseHookLoader.load(IInboxStoreBalancerFactory.class); for (String factoryName : builder.balancerFactoryConfig.keySet()) { diff --git a/bifromq-inbox/bifromq-inbox-store/src/main/java/org/apache/bifromq/inbox/store/InboxStoreBuilder.java b/bifromq-inbox/bifromq-inbox-store/src/main/java/org/apache/bifromq/inbox/store/InboxStoreBuilder.java index fa4345a7e..fc3bd8552 100644 --- a/bifromq-inbox/bifromq-inbox-store/src/main/java/org/apache/bifromq/inbox/store/InboxStoreBuilder.java +++ b/bifromq-inbox/bifromq-inbox-store/src/main/java/org/apache/bifromq/inbox/store/InboxStoreBuilder.java @@ -72,7 +72,6 @@ public class InboxStoreBuilder { Map balancerFactoryConfig = new HashMap<>(); Duration detachTimeout = Duration.ofSeconds(PersistentSessionDetachTimeoutSecond.INSTANCE.get()); Duration metaCacheExpireTime = Duration.ofSeconds(InboxMetaCacheExpirySeconds.INSTANCE.get()); - Duration loadEstimateWindow = Duration.ofSeconds(5); int expireRateLimit = 1000; Duration gcInterval = Duration.ofMinutes(5); Map attributes = new HashMap<>(); diff --git a/bifromq-inbox/bifromq-inbox-store/src/main/java/org/apache/bifromq/inbox/store/InboxStoreCoProc.java b/bifromq-inbox/bifromq-inbox-store/src/main/java/org/apache/bifromq/inbox/store/InboxStoreCoProc.java index c3dadae0b..53a3abb39 100644 --- a/bifromq-inbox/bifromq-inbox-store/src/main/java/org/apache/bifromq/inbox/store/InboxStoreCoProc.java +++ b/bifromq-inbox/bifromq-inbox-store/src/main/java/org/apache/bifromq/inbox/store/InboxStoreCoProc.java @@ -22,7 +22,6 @@ import static java.time.Duration.ofSeconds; import static org.apache.bifromq.basekv.utils.BoundaryUtil.upperBound; import static org.apache.bifromq.inbox.store.canon.TenantIdCanon.TENANT_ID_INTERNER; -import static org.apache.bifromq.inbox.store.schema.KVSchemaUtil.bufferedMsgKey; import static org.apache.bifromq.inbox.store.schema.KVSchemaUtil.inboxInstanceStartKey; import static org.apache.bifromq.inbox.store.schema.KVSchemaUtil.inboxStartKeyPrefix; import static org.apache.bifromq.inbox.store.schema.KVSchemaUtil.isInboxInstanceKey; @@ -30,7 +29,6 @@ import static org.apache.bifromq.inbox.store.schema.KVSchemaUtil.parseInboxInstanceStartKeyPrefix; import static org.apache.bifromq.inbox.store.schema.KVSchemaUtil.parseSeq; import static org.apache.bifromq.inbox.store.schema.KVSchemaUtil.parseTenantId; -import static org.apache.bifromq.inbox.store.schema.KVSchemaUtil.qos0MsgKey; import static org.apache.bifromq.inbox.store.schema.KVSchemaUtil.qos0QueuePrefix; import static org.apache.bifromq.inbox.store.schema.KVSchemaUtil.sendBufferPrefix; import static org.apache.bifromq.inbox.store.schema.KVSchemaUtil.tenantBeginKeyPrefix; @@ -69,10 +67,10 @@ import org.apache.bifromq.basehlc.HLC; import org.apache.bifromq.basekv.proto.Boundary; import org.apache.bifromq.basekv.proto.KVRangeId; -import org.apache.bifromq.basekv.store.api.IKVCloseableReader; import org.apache.bifromq.basekv.store.api.IKVIterator; import org.apache.bifromq.basekv.store.api.IKVRangeCoProc; -import org.apache.bifromq.basekv.store.api.IKVReader; +import org.apache.bifromq.basekv.store.api.IKVRangeReader; +import org.apache.bifromq.basekv.store.api.IKVRangeRefreshableReader; import org.apache.bifromq.basekv.store.api.IKVWriter; import org.apache.bifromq.basekv.store.proto.ROCoProcInput; import org.apache.bifromq.basekv.store.proto.ROCoProcOutput; @@ -186,7 +184,7 @@ final class InboxStoreCoProc implements IKVRangeCoProc { ISettingProvider settingProvider, IEventCollector eventCollector, IResourceThrottler resourceThrottler, - Supplier rangeReaderProvider, + Supplier rangeReaderProvider, Duration detachTimeout, Duration metaCacheExpireTime, int expireRateLimit) { @@ -226,7 +224,7 @@ private static int getExpireSeconds(Duration expireTime, InboxMetadata latestInb } @Override - public CompletableFuture query(ROCoProcInput input, IKVReader reader) { + public CompletableFuture query(ROCoProcInput input, IKVRangeReader reader) { try { InboxServiceROCoProcInput coProcInput = input.getInboxService(); InboxServiceROCoProcOutput.Builder outputBuilder = InboxServiceROCoProcOutput.newBuilder() @@ -256,7 +254,8 @@ public CompletableFuture query(ROCoProcInput input, IKVReader re } @Override - public Supplier mutate(RWCoProcInput input, IKVReader reader, IKVWriter writer, boolean isLeader) { + public Supplier mutate(RWCoProcInput input, IKVRangeReader reader, IKVWriter writer, + boolean isLeader) { InboxServiceRWCoProcInput coProcInput = input.getInboxService(); InboxServiceRWCoProcOutput.Builder outputBuilder = InboxServiceRWCoProcOutput.newBuilder() .setReqId(coProcInput.getReqId()); @@ -327,20 +326,16 @@ public void close() { delayTaskRunner.shutdown(); } - private CompletableFuture batchExist(BatchExistRequest request, IKVReader reader) { - reader.refresh(); - IKVIterator itr = reader.iterator(); + private CompletableFuture batchExist(BatchExistRequest request, IKVRangeReader reader) { BatchExistReply.Builder replyBuilder = BatchExistReply.newBuilder(); for (BatchExistRequest.Params params : request.getParamsList()) { - Optional latest = getLatestInboxVersion(params.getTenantId(), params.getInboxId(), reader, - itr); - + Optional latest = getLatestInboxVersion(params.getTenantId(), params.getInboxId(), reader); replyBuilder.addExist(latest.isPresent() && !hasExpired(latest.get(), params.getNow())); } return CompletableFuture.completedFuture(replyBuilder.build()); } - private CompletableFuture batchCheckSub(BatchCheckSubRequest request, IKVReader reader) { + private CompletableFuture batchCheckSub(BatchCheckSubRequest request, IKVRangeReader reader) { BatchCheckSubReply.Builder replyBuilder = BatchCheckSubReply.newBuilder(); for (BatchCheckSubRequest.Params params : request.getParamsList()) { Optional metadataOpt = inboxMetaCache.get(params.getTenantId(), params.getInboxId(), @@ -364,9 +359,7 @@ private CompletableFuture batchCheckSub(BatchCheckSubRequest return CompletableFuture.completedFuture(replyBuilder.build()); } - private CompletableFuture batchFetch(BatchFetchRequest request, IKVReader reader) { - reader.refresh(); - IKVIterator itr = reader.iterator(); + private CompletableFuture batchFetch(BatchFetchRequest request, IKVRangeReader reader) { BatchFetchReply.Builder replyBuilder = BatchFetchReply.newBuilder(); for (BatchFetchRequest.Params params : request.getParamsList()) { replyBuilder.addResult(fetch(params, reader)); @@ -374,10 +367,10 @@ private CompletableFuture batchFetch(BatchFetchRequest request, return CompletableFuture.completedFuture(replyBuilder.build()); } - private Fetched fetch(BatchFetchRequest.Params params, IKVReader reader) { + private Fetched fetch(BatchFetchRequest.Params params, IKVRangeReader reader) { Fetched.Builder replyBuilder = Fetched.newBuilder(); Optional inboxMetadataOpt = inboxMetaCache.get(params.getTenantId(), params.getInboxId(), - params.getIncarnation(), this.inboxMetadataProvider(reader)); + params.getIncarnation(), inboxMetadataProvider(reader)); if (inboxMetadataOpt.isEmpty()) { replyBuilder.setResult(Fetched.Result.NO_INBOX); return replyBuilder.build(); @@ -407,45 +400,58 @@ private void fetchFromInbox(ByteString inboxInstStartKey, long nextSeq, BiFunction keyGenerator, BiConsumer messageConsumer, - IKVReader reader, + IKVRangeReader reader, Fetched.Builder replyBuilder) { if (startFetchFromSeq < nextSeq) { - while (startSeq < nextSeq && fetchCount > 0) { - ByteString startKey = keyGenerator.apply(inboxInstStartKey, startSeq); - Optional msgListData = reader.get(startKey); - // the startSeq may not reflect the latest seq of the first message when query is non-linearized + // locate the first record to scan + long currSeq = startSeq; + Optional currData = Optional.empty(); + if (startFetchFromSeq > startSeq) { + Optional pointed = reader.get(keyGenerator.apply(inboxInstStartKey, startFetchFromSeq)); + if (pointed.isPresent()) { + currSeq = startFetchFromSeq; // jump to next chunk directly + currData = pointed; // use pointed chunk as first record + } + } + if (currData.isEmpty()) { + // find first message chunk + currData = reader.get(keyGenerator.apply(inboxInstStartKey, currSeq)); + // the currSeq may not reflect the latest seq of the first message when query is non-linearized // it may point to the message was committed. - if (msgListData.isEmpty()) { - startSeq++; - continue; + while (currData.isEmpty() && currSeq < nextSeq) { + currSeq++; + currData = reader.get(keyGenerator.apply(inboxInstStartKey, currSeq)); } - List messageList = ZeroCopyParser.parse(msgListData.get(), InboxMessageList.parser()) - .getMessageList(); + // if current record not exists, nothing to scan + if (currData.isEmpty()) { + return; + } + } + // scan forward from located record + while (currData.isPresent() && fetchCount > 0) { + List messageList = ZeroCopyParser.parse(currData.get(), + InboxMessageList.parser()).getMessageList(); long lastSeq = messageList.get(messageList.size() - 1).getSeq(); if (lastSeq >= startFetchFromSeq) { for (InboxMessage inboxMsg : messageList) { if (inboxMsg.getSeq() >= startFetchFromSeq) { messageConsumer.accept(replyBuilder, inboxMsg); fetchCount--; - if (fetchCount == 0) { - break; - } + // keep returning messages in current chunk even if exceeded fetchCount } } } - startSeq = lastSeq + 1; + currSeq = lastSeq + 1; + currData = reader.get(keyGenerator.apply(inboxInstStartKey, currSeq)); } } } private CompletableFuture batchFetchInboxState(BatchFetchInboxStateRequest request, - IKVReader reader) { - reader.refresh(); - IKVIterator itr = reader.iterator(); + IKVRangeReader reader) { BatchFetchInboxStateReply.Builder replyBuilder = BatchFetchInboxStateReply.newBuilder(); for (BatchFetchInboxStateRequest.Params params : request.getParamsList()) { - Optional latest = getLatestInboxVersion(params.getTenantId(), params.getInboxId(), reader, - itr); + Optional latest = getLatestInboxVersion(params.getTenantId(), params.getInboxId(), reader); if (latest.isEmpty()) { replyBuilder.addResult(BatchFetchInboxStateReply.Result.newBuilder() .setCode(BatchFetchInboxStateReply.Result.Code.NO_INBOX) @@ -495,7 +501,7 @@ private InboxState toInboxState(InboxMetadata metadata) { return stateBuilder.build(); } - private CompletableFuture batchSendLWT(BatchSendLWTRequest request, IKVReader reader) { + private CompletableFuture batchSendLWT(BatchSendLWTRequest request, IKVRangeReader reader) { List> sendLWTFutures = new ArrayList<>(request.getParamsCount()); for (BatchSendLWTRequest.Params params : request.getParamsList()) { Optional metadataOpt = inboxMetaCache.get(params.getTenantId(), params.getInboxId(), @@ -697,10 +703,8 @@ private CompletableFuture retain(long reqId, LWT lwt, Client private Runnable batchAttach(BatchAttachRequest request, BatchAttachReply.Builder replyBuilder, boolean isLeader, - IKVReader reader, + IKVRangeReader reader, IKVWriter writer) { - reader.refresh(); - IKVIterator itr = reader.iterator(); Map> toBeTracked = new HashMap<>(); Set toBeCanceled = new HashSet<>(); Map> toBeEnsured = new HashMap<>(); @@ -708,7 +712,7 @@ private Runnable batchAttach(BatchAttachRequest request, String tenantId = params.getClient().getTenantId(); String inboxId = params.getInboxId(); long now = params.getNow(); - SortedMap inboxInstances = getAllInboxVersions(tenantId, inboxId, itr); + SortedMap inboxInstances = getAllInboxVersions(tenantId, inboxId, reader); if (inboxInstances.isEmpty() || hasExpired(inboxInstances.get(inboxInstances.lastKey()), now)) { long incarnation = params.getIncarnation(); ByteString metadataKey = inboxInstanceStartKey(tenantId, inboxId, incarnation); @@ -811,16 +815,14 @@ private Runnable batchAttach(BatchAttachRequest request, private Runnable batchDetach(BatchDetachRequest request, BatchDetachReply.Builder replyBuilder, boolean isLeader, - IKVReader reader, + IKVRangeReader reader, IKVWriter writer) { - reader.refresh(); - IKVIterator itr = reader.iterator(); Map> toBeUpdated = new HashMap<>(); Map> toBeEnsured = new HashMap<>(); Map> toBeScheduled = new HashMap<>(); for (BatchDetachRequest.Params params : request.getParamsList()) { SortedMap inboxVersions = getAllInboxVersions(params.getTenantId(), - params.getInboxId(), itr); + params.getInboxId(), reader); if (inboxVersions.isEmpty()) { replyBuilder.addCode(BatchDetachReply.Code.NO_INBOX); continue; @@ -911,11 +913,9 @@ private Runnable batchDetach(BatchDetachRequest request, private Runnable batchDelete(BatchDeleteRequest request, BatchDeleteReply.Builder replyBuilder, boolean isLeader, - IKVReader reader, + IKVRangeReader reader, IKVWriter writer) { Map> toBeRemoved = new HashMap<>(); - reader.refresh(); - IKVIterator itr = reader.iterator(); Map> dropedQoS0Msgs = new HashMap<>(); Map> dropedBufferedMsg = new HashMap<>(); for (BatchDeleteRequest.Params params : request.getParamsList()) { @@ -932,12 +932,13 @@ private Runnable batchDelete(BatchDeleteRequest request, continue; } InboxMetadata metadata = metadataOpt.get(); - Optional latestMetadata = getLatestInboxVersion(params.getTenantId(), params.getInboxId(), - reader, itr); + Optional latestMetadata = getLatestInboxVersion(params.getTenantId(), + params.getInboxId(), + reader); if (latestMetadata.isEmpty()) { log.warn("Inconsistent state: inbox instance disappeared during deletion"); } - clearInboxInstance(metadata, itr, reader, writer, isLeader, + clearInboxInstance(metadata, reader, writer, isLeader, dropedQoS0Msgs.computeIfAbsent(metadata, k -> new LinkedList<>()), dropedBufferedMsg.computeIfAbsent(metadata, k -> new LinkedList<>())); toBeRemoved.computeIfAbsent(params.getTenantId(), k -> new HashMap<>()) @@ -954,8 +955,9 @@ private Runnable batchDelete(BatchDeleteRequest request, Message msg = topicMsg.getMessage(); for (String topicFilter : inboxMsg.getMatchedTopicFilterMap().keySet()) { TopicFilterOption option = inboxMsg.getMatchedTopicFilterMap().get(topicFilter); - boolean isRetain = topicMsg.getMessage().getIsRetained() || option.getRetainAsPublished() - && msg.getIsRetain(); + boolean isRetain = + topicMsg.getMessage().getIsRetained() || option.getRetainAsPublished() + && msg.getIsRetain(); eventCollector.report(getLocal(QoS0Dropped.class) .reason(DropReason.SessionClosed) .reqId(msg.getMessageId()) @@ -977,8 +979,9 @@ private Runnable batchDelete(BatchDeleteRequest request, TopicFilterOption option = inboxMsg.getMatchedTopicFilterMap().get(topicFilter); QoS finalQos = QoS.forNumber(Math.min(topicMsg.getMessage().getPubQoS().getNumber(), option.getQos().getNumber())); - boolean isRetain = topicMsg.getMessage().getIsRetained() || option.getRetainAsPublished() - && msg.getIsRetain(); + boolean isRetain = + topicMsg.getMessage().getIsRetained() || option.getRetainAsPublished() + && msg.getIsRetain(); if (finalQos == QoS.AT_LEAST_ONCE) { eventCollector.report(getLocal(QoS1Dropped.class) .reason(DropReason.SessionClosed) @@ -1011,7 +1014,7 @@ private Runnable batchDelete(BatchDeleteRequest request, private Runnable batchSub(BatchSubRequest request, BatchSubReply.Builder replyBuilder, boolean isLeader, - IKVReader reader, + IKVRangeReader reader, IKVWriter writer) { Map> toBeCached = new HashMap<>(); Map addedSubCounts = new HashMap<>(); @@ -1059,7 +1062,7 @@ private Runnable batchSub(BatchSubRequest request, private Runnable batchUnsub(BatchUnsubRequest request, BatchUnsubReply.Builder replyBuilder, boolean isLeader, - IKVReader reader, + IKVRangeReader reader, IKVWriter write) { Map> toBeCached = new HashMap<>(); Map removedSubCounts = new HashMap<>(); @@ -1101,55 +1104,49 @@ private Runnable batchUnsub(BatchUnsubRequest request, } private void clearInboxInstance(InboxMetadata metadata, - IKVIterator itr, - IKVReader reader, + IKVRangeReader reader, IKVWriter writer, boolean isLeader, List dropedQoS0MsgList, List dropedBufferedMsgList) { ByteString startKey = inboxInstanceStartKey(metadata.getClient().getTenantId(), metadata.getInboxId(), metadata.getIncarnation()); + // iterate by existing chunk keys and collect drop lists on leader if (metadata.getQos0NextSeq() > 0) { - // find lowest seq of qos0 message - itr.seek(qos0QueuePrefix(startKey)); - if (itr.isValid() && itr.key().startsWith(startKey)) { - for (long s = parseSeq(startKey, itr.key()); s < metadata.getQos0NextSeq(); s++) { - ByteString qos0MsgKey = qos0MsgKey(startKey, s); - if (isLeader) { - Optional inboxMsgListBytes = reader.get(qos0MsgKey); - if (inboxMsgListBytes.isEmpty()) { - log.warn( - "Inconsistent state: empty qos0 msg list: tenantId={}, inboxId={}, incar={}, seq={}", - metadata.getClient().getTenantId(), metadata.getInboxId(), metadata.getIncarnation(), - s); - continue; - } - dropedQoS0MsgList.addAll(parseInboxMessageList(inboxMsgListBytes.get()).getMessageList()); + ByteString qos0Prefix = qos0QueuePrefix(startKey); + Boundary qos0MsgBound = Boundary.newBuilder() + .setStartKey(qos0Prefix) + .setEndKey(upperBound(qos0Prefix)) + .build(); + if (isLeader) { + try (IKVIterator it = reader.iterator(qos0MsgBound)) { + for (it.seek(qos0Prefix); it.isValid() && it.key().startsWith(qos0Prefix); it.next()) { + // parse once per chunk key + dropedQoS0MsgList.addAll(parseInboxMessageList(it.value()).getMessageList()); } - writer.delete(qos0MsgKey); } } + // clear the whole qos0 queue range in one shot + writer.clear(Boundary.newBuilder().setStartKey(qos0Prefix).setEndKey(upperBound(qos0Prefix)).build()); } if (metadata.getSendBufferNextSeq() > 0) { - itr.seek(sendBufferPrefix(startKey)); - if (itr.isValid() && itr.key().startsWith(startKey)) { - for (long s = parseSeq(startKey, itr.key()); s < metadata.getSendBufferNextSeq(); s++) { - if (isLeader) { - ByteString bufferedMsgKey = bufferedMsgKey(startKey, s); - Optional inboxMsgListBytes = reader.get(bufferedMsgKey); - if (inboxMsgListBytes.isEmpty()) { - log.warn( - "Inconsistent state: empty buffer msg list: tenantId={}, inboxId={}, incar={}, seq={}", - metadata.getClient().getTenantId(), metadata.getInboxId(), metadata.getIncarnation(), - s); - continue; - } - dropedBufferedMsgList.addAll(parseInboxMessageList(inboxMsgListBytes.get()).getMessageList()); + ByteString bufPrefix = sendBufferPrefix(startKey); + Boundary bufferedMsgBound = Boundary.newBuilder() + .setStartKey(bufPrefix) + .setEndKey(upperBound(bufPrefix)) + .build(); + if (isLeader) { + try (IKVIterator it = reader.iterator(bufferedMsgBound)) { + for (it.seek(bufPrefix); it.isValid() && it.key().startsWith(bufPrefix); it.next()) { + // parse once per chunk key + dropedBufferedMsgList.addAll(parseInboxMessageList(it.value()).getMessageList()); } - writer.delete(bufferedMsgKey(startKey, s)); } } + // clear the whole buffered queue range in one shot + writer.clear(bufferedMsgBound); } + // finally remove the inbox instance start key which points to metadata writer.delete(startKey); } @@ -1163,26 +1160,169 @@ private InboxMessageList parseInboxMessageList(ByteString value) { } @SneakyThrows - private CompletableFuture gc(GCRequest request, IKVReader reader) { - List> expireFutures = new LinkedList<>(); - reader.refresh(); - IKVIterator itr = reader.iterator(); - for (itr.seekToFirst(); itr.isValid(); ) { - String tenantId = parseTenantId(itr.key()); - expireFutures.add(expireTenant(tenantId, Duration.ZERO, request.getNow(), itr)); - itr.seek(upperBound(tenantBeginKeyPrefix(tenantId))); + private CompletableFuture gc(GCRequest request, IKVRangeReader reader) { + try (IKVIterator itr = reader.iterator()) { + List> expireFutures = new LinkedList<>(); + for (itr.seekToFirst(); itr.isValid(); ) { + String tenantId = parseTenantId(itr.key()); + expireFutures.add(expireTenant(tenantId, Duration.ZERO, request.getNow(), reader)); + itr.seek(upperBound(tenantBeginKeyPrefix(tenantId))); + } + return CompletableFuture.allOf(expireFutures.toArray(CompletableFuture[]::new)) + .thenApply(v -> GCReply.newBuilder().build()); } - return CompletableFuture.allOf(expireFutures.toArray(CompletableFuture[]::new)) - .thenApply(v -> GCReply.newBuilder().build()); } private Runnable batchInsert(BatchInsertRequest request, BatchInsertReply.Builder replyBuilder, boolean isLeader, - IKVReader reader, + IKVRangeReader reader, IKVWriter writer) { - reader.refresh(); - IKVIterator itr = reader.iterator(); + // route to new or legacy implementation for better readability + if (request.getInsertRefCount() > 0) { + return batchInsertCompactLayout(request, replyBuilder, isLeader, reader, writer); + } + return batchInsertLegacy(request, replyBuilder, isLeader, reader, writer); + } + + // handle insert with message pool and explicit references + private Runnable batchInsertCompactLayout(BatchInsertRequest request, + BatchInsertReply.Builder replyBuilder, + boolean isLeader, + IKVRangeReader reader, + IKVWriter writer) { + Map> toBeCached = new HashMap<>(); + Map> dropCountMap = new HashMap<>(); + Map dropOldestMap = new HashMap<>(); + List pool = request.getTopicMessagePackList(); + for (BatchInsertRequest.InsertRef ref : request.getInsertRefList()) { + Optional metadataOpt = inboxMetaCache.get(ref.getTenantId(), ref.getInboxId(), + ref.getIncarnation(), this.inboxMetadataProvider(reader)); + if (metadataOpt.isEmpty()) { + replyBuilder.addResult(InsertResult.newBuilder().setCode(InsertResult.Code.NO_INBOX).build()); + continue; + } + InboxMetadata metadata = metadataOpt.get(); + List qos0MsgList = new ArrayList<>(); + List bufferMsgList = new ArrayList<>(); + Set insertResults = new HashSet<>(); + for (BatchInsertRequest.SubRef subRef : ref.getSubRefList()) { + int index = subRef.getMessagePackIndex(); + if (index < 0 || index >= pool.size()) { + log.warn("Invalid messagePackIndex: {} for tenantId={}, inboxId={}, inc={}", + index, ref.getTenantId(), ref.getInboxId(), ref.getIncarnation()); + continue; + } + TopicMessagePack topicMsgPack = pool.get(index); + Map qos0TopicFilters = new HashMap<>(); + Map qos1TopicFilters = new HashMap<>(); + Map qos2TopicFilters = new HashMap<>(); + for (MatchedRoute matchedRoute : subRef.getMatchedRouteList()) { + long matchedIncarnation = matchedRoute.getIncarnation(); + TopicFilterOption tfOption = metadata.getTopicFiltersMap().get(matchedRoute.getTopicFilter()); + if (tfOption == null) { + insertResults.add(InsertResult.SubStatus.newBuilder() + .setMatchedRoute(matchedRoute) + .setRejected(true) + .build()); + } else { + if (tfOption.getIncarnation() > matchedIncarnation) { + log.debug( + "Receive message from previous subscription: topicFilter={}, inc={}, prevInc={}", + matchedRoute, tfOption.getIncarnation(), matchedIncarnation); + insertResults.add(InsertResult.SubStatus.newBuilder() + .setMatchedRoute(matchedRoute) + .setRejected(true) + .build()); + } else { + insertResults.add(InsertResult.SubStatus.newBuilder() + .setMatchedRoute(matchedRoute) + .setRejected(false) + .build()); + } + switch (tfOption.getQos()) { + case AT_MOST_ONCE -> qos0TopicFilters.put(matchedRoute.getTopicFilter(), tfOption); + case AT_LEAST_ONCE -> qos1TopicFilters.put(matchedRoute.getTopicFilter(), tfOption); + case EXACTLY_ONCE -> qos2TopicFilters.put(matchedRoute.getTopicFilter(), tfOption); + default -> { + // never happens + } + } + } + } + if (qos0TopicFilters.isEmpty() && qos1TopicFilters.isEmpty() && qos2TopicFilters.isEmpty()) { + continue; + } + String topic = topicMsgPack.getTopic(); + for (TopicMessagePack.PublisherPack publisherPack : topicMsgPack.getMessageList()) { + for (Message message : publisherPack.getMessageList()) { + ClientInfo publisher = publisherPack.getPublisher(); + switch (message.getPubQoS()) { + case AT_MOST_ONCE -> { + Map topicFilters = new HashMap<>(); + topicFilters.putAll(qos0TopicFilters); + topicFilters.putAll(qos1TopicFilters); + topicFilters.putAll(qos2TopicFilters); + qos0MsgList.add(new SubMessage(topic, publisher, message, topicFilters)); + } + case AT_LEAST_ONCE, EXACTLY_ONCE -> { + if (!qos0TopicFilters.isEmpty()) { + qos0MsgList.add(new SubMessage(topic, publisher, message, qos0TopicFilters)); + } + if (!qos1TopicFilters.isEmpty() || !qos2TopicFilters.isEmpty()) { + Map topicFilters = new HashMap<>(); + topicFilters.putAll(qos1TopicFilters); + topicFilters.putAll(qos2TopicFilters); + bufferMsgList.add(new SubMessage(topic, publisher, message, topicFilters)); + } + } + default -> { + // never happens + } + } + } + } + } + + InboxMetadata.Builder metadataBuilder = metadata.toBuilder(); + dropOldestMap.put(metadata.getClient(), metadata.getDropOldest()); + ByteString inboxInstStartKey = inboxInstanceStartKey(ref.getTenantId(), ref.getInboxId(), + ref.getIncarnation()); + Map dropCounts = insertInbox(inboxInstStartKey, qos0MsgList, bufferMsgList, + metadataBuilder, reader, writer); + metadata = metadataBuilder.build(); + + Map aggregated = dropCountMap.computeIfAbsent(metadata.getClient(), k -> new HashMap<>()); + dropCounts.forEach((qos, count) -> aggregated.compute(qos, (k, v) -> v == null ? count : v + count)); + + replyBuilder.addResult(InsertResult.newBuilder() + .setCode(InsertResult.Code.OK) + .addAllResult(insertResults) + .build()); + + writer.put(inboxInstStartKey, metadata.toByteString()); + toBeCached.computeIfAbsent(ref.getTenantId(), k -> new HashMap<>()).put(metadata, false); + } + return () -> { + updateTenantStates(toBeCached, isLeader); + dropCountMap.forEach((client, dropCounts) -> dropCounts.forEach((qos, count) -> { + if (count > 0) { + eventCollector.report(getLocal(Overflowed.class) + .oldest(dropOldestMap.get(client)) + .isQoS0(qos == QoS.AT_MOST_ONCE) + .clientInfo(client) + .dropCount(count)); + } + })); + }; + } + + // handle legacy format with embedded SubMessagePack per InsertRequest + private Runnable batchInsertLegacy(BatchInsertRequest request, + BatchInsertReply.Builder replyBuilder, + boolean isLeader, + IKVRangeReader reader, + IKVWriter writer) { Map> toBeCached = new HashMap<>(); Map> dropCountMap = new HashMap<>(); Map dropOldestMap = new HashMap<>(); @@ -1213,7 +1353,8 @@ private Runnable batchInsert(BatchInsertRequest request, } else { if (tfOption.getIncarnation() > matchedIncarnation) { // messages from old sub incarnation - log.debug("Receive message from previous subscription: topicFilter={}, inc={}, prevInc={}", + log.debug( + "Receive message from previous subscription: topicFilter={}, inc={}, prevInc={}", matchedRoute, tfOption.getIncarnation(), matchedIncarnation); insertResults.add(InsertResult.SubStatus.newBuilder() .setMatchedRoute(matchedRoute) @@ -1277,8 +1418,9 @@ private Runnable batchInsert(BatchInsertRequest request, dropOldestMap.put(metadata.getClient(), metadata.getDropOldest()); ByteString inboxInstStartKey = inboxInstanceStartKey(params.getTenantId(), params.getInboxId(), params.getIncarnation()); - Map dropCounts = insertInbox(inboxInstStartKey, qos0MsgList, bufferMsgList, metadataBuilder, - itr, writer); + Map dropCounts = insertInbox(inboxInstStartKey, qos0MsgList, bufferMsgList, + metadataBuilder, + reader, writer); metadata = metadataBuilder.build(); Map aggregated = dropCountMap.computeIfAbsent(metadata.getClient(), k -> new HashMap<>()); @@ -1316,7 +1458,7 @@ private Map insertInbox(ByteString inboxKeyPrefix, List qos0MsgList, List bufferedMsgList, InboxMetadata.Builder metaBuilder, - IKVIterator itr, + IKVRangeReader reader, IKVWriter writer) { Map dropCounts = new HashMap<>(); if (!qos0MsgList.isEmpty()) { @@ -1324,7 +1466,7 @@ private Map insertInbox(ByteString inboxKeyPrefix, long nextSeq = metaBuilder.getQos0NextSeq(); int dropCount = insertToInbox(inboxKeyPrefix, startSeq, nextSeq, metaBuilder.getLimit(), metaBuilder.getDropOldest(), KVSchemaUtil::qos0MsgKey, metaBuilder::setQos0StartSeq, - metaBuilder::setQos0NextSeq, qos0MsgList, itr, writer); + metaBuilder::setQos0NextSeq, qos0MsgList, reader, writer); if (dropCount > 0) { dropCounts.put(QoS.AT_MOST_ONCE, dropCount); } @@ -1334,7 +1476,7 @@ private Map insertInbox(ByteString inboxKeyPrefix, long nextSeq = metaBuilder.getSendBufferNextSeq(); int dropCount = insertToInbox(inboxKeyPrefix, startSeq, nextSeq, metaBuilder.getLimit(), false, KVSchemaUtil::bufferedMsgKey, metaBuilder::setSendBufferStartSeq, metaBuilder::setSendBufferNextSeq, - bufferedMsgList, itr, writer); + bufferedMsgList, reader, writer); if (dropCount > 0) { dropCounts.put(QoS.AT_LEAST_ONCE, dropCount); } @@ -1352,7 +1494,7 @@ private int insertToInbox(ByteString inboxKeyPrefix, Function startSeqSetter, Function nextSeqSetter, List messages, - IKVIterator itr, + IKVRangeReader reader, IKVWriter writer) { int newMsgCount = messages.size(); int currCount = (int) (nextSeq - startSeq); @@ -1370,32 +1512,38 @@ private int insertToInbox(ByteString inboxKeyPrefix, writer.insert(keyGenerator.apply(inboxKeyPrefix, startSeq + dropCount), buildInboxMessageList(startSeq + dropCount, messages).toByteString()); } else { - // drop partially - itr.seekForPrev(keyGenerator.apply(inboxKeyPrefix, startSeq + dropCount)); - long beginSeq = parseSeq(inboxKeyPrefix, itr.key()); - List msgList = ZeroCopyParser.parse(itr.value(), InboxMessageList.parser()) - .getMessageList(); - InboxMessageList.Builder msgListBuilder = InboxMessageList.newBuilder(); - List subMsgList = msgList.subList((int) (startSeq + dropCount - beginSeq), - msgList.size()); - if (!subMsgList.isEmpty()) { - msgListBuilder.addAllMessage(subMsgList).addAllMessage( - buildInboxMessageList(subMsgList.get(subMsgList.size() - 1).getSeq() + 1, - messages).getMessageList()); - } else { - msgListBuilder.addAllMessage( - buildInboxMessageList(startSeq + dropCount, messages).getMessageList()); - } - writer.clear(Boundary.newBuilder().setStartKey(keyGenerator.apply(inboxKeyPrefix, startSeq)) - .setEndKey(keyGenerator.apply(inboxKeyPrefix, startSeq + dropCount)).build()); - if (beginSeq == startSeq + dropCount) { - // override existing key - writer.put(keyGenerator.apply(inboxKeyPrefix, startSeq + dropCount), - msgListBuilder.build().toByteString()); - } else { - // insert new key - writer.insert(keyGenerator.apply(inboxKeyPrefix, startSeq + dropCount), - msgListBuilder.build().toByteString()); + try (IKVIterator itr = reader.iterator(Boundary.newBuilder() + .setStartKey(inboxKeyPrefix) + .setEndKey(upperBound(inboxKeyPrefix)) + .build()) + ) { + // drop partially + itr.seekForPrev(keyGenerator.apply(inboxKeyPrefix, startSeq + dropCount)); + long beginSeq = parseSeq(inboxKeyPrefix, itr.key()); + List msgList = ZeroCopyParser.parse(itr.value(), InboxMessageList.parser()) + .getMessageList(); + InboxMessageList.Builder msgListBuilder = InboxMessageList.newBuilder(); + List subMsgList = msgList.subList((int) (startSeq + dropCount - beginSeq), + msgList.size()); + if (!subMsgList.isEmpty()) { + msgListBuilder.addAllMessage(subMsgList).addAllMessage( + buildInboxMessageList(subMsgList.get(subMsgList.size() - 1).getSeq() + 1, + messages).getMessageList()); + } else { + msgListBuilder.addAllMessage( + buildInboxMessageList(startSeq + dropCount, messages).getMessageList()); + } + writer.clear(Boundary.newBuilder().setStartKey(keyGenerator.apply(inboxKeyPrefix, startSeq)) + .setEndKey(keyGenerator.apply(inboxKeyPrefix, startSeq + dropCount)).build()); + if (beginSeq == startSeq + dropCount) { + // override existing key + writer.put(keyGenerator.apply(inboxKeyPrefix, startSeq + dropCount), + msgListBuilder.build().toByteString()); + } else { + // insert new key + writer.insert(keyGenerator.apply(inboxKeyPrefix, startSeq + dropCount), + msgListBuilder.build().toByteString()); + } } } startSeq += dropCount; @@ -1423,10 +1571,14 @@ private int insertToInbox(ByteString inboxKeyPrefix, private InboxMessageList buildInboxMessageList(long beginSeq, List subMessages) { InboxMessageList.Builder listBuilder = InboxMessageList.newBuilder(); for (SubMessage subMessage : subMessages) { - listBuilder.addMessage( - InboxMessage.newBuilder().setSeq(beginSeq).putAllMatchedTopicFilter(subMessage.matchedTopicFilters) - .setMsg(TopicMessage.newBuilder().setTopic(subMessage.topic).setPublisher(subMessage.publisher) - .setMessage(subMessage.message).build()).build()); + listBuilder.addMessage(InboxMessage.newBuilder() + .setSeq(beginSeq) + .putAllMatchedTopicFilter(subMessage.matchedTopicFilters) + .setMsg(TopicMessage.newBuilder() + .setTopic(subMessage.topic) + .setPublisher(subMessage.publisher) + .setMessage(subMessage.message) + .build()).build()); beginSeq++; } return listBuilder.build(); @@ -1436,12 +1588,12 @@ private InboxMessageList buildInboxMessageList(long beginSeq, List s private Runnable batchCommit(BatchCommitRequest request, BatchCommitReply.Builder replyBuilder, boolean isLeader, - IKVReader reader, + IKVRangeReader reader, IKVWriter writer) { Map> toBeCached = new HashMap<>(); for (BatchCommitRequest.Params params : request.getParamsList()) { Optional metadataOpt = inboxMetaCache.get(params.getTenantId(), params.getInboxId(), - params.getVersion().getIncarnation(), this.inboxMetadataProvider(reader)); + params.getVersion().getIncarnation(), inboxMetadataProvider(reader)); if (metadataOpt.isEmpty()) { replyBuilder.addCode(BatchCommitReply.Code.NO_INBOX); continue; @@ -1466,7 +1618,7 @@ private Runnable batchCommit(BatchCommitRequest request, private void commitInbox(ByteString scopedInboxId, BatchCommitRequest.Params params, InboxMetadata.Builder metaBuilder, - IKVReader reader, + IKVRangeReader reader, IKVWriter writer) { if (params.hasQos0UpToSeq()) { long startSeq = metaBuilder.getQos0StartSeq(); @@ -1491,9 +1643,28 @@ private void commitToInbox(ByteString scopedInboxId, long commitSeq, BiFunction keyGenerator, Function metadataSetter, - IKVReader reader, + IKVRangeReader reader, IKVWriter writer) { if (startSeq <= commitSeq && commitSeq < nextSeq) { + // Fast path 1: delete directly when startSeq equals commitSeq + // This path favors performance by removing the chunk at startSeq + if (startSeq == commitSeq) { + writer.delete(keyGenerator.apply(scopedInboxId, startSeq)); + metadataSetter.apply(startSeq + 1); + return; + } + + // Fast path 2: use deleteRange when commitSeq + 1 exists + // This removes all chunks whose key is in [startSeq, commitSeq + 1) + Optional nextChunk = reader.get(keyGenerator.apply(scopedInboxId, commitSeq + 1)); + if (nextChunk.isPresent()) { + writer.clear(Boundary.newBuilder() + .setStartKey(keyGenerator.apply(scopedInboxId, startSeq)) + .setEndKey(keyGenerator.apply(scopedInboxId, commitSeq + 1)) + .build()); + metadataSetter.apply(commitSeq + 1); + return; + } while (startSeq <= commitSeq) { ByteString msgKey = keyGenerator.apply(scopedInboxId, startSeq); Optional msgListData = reader.get(msgKey); @@ -1519,52 +1690,56 @@ private void commitToInbox(ByteString scopedInboxId, } } - private CompletableFuture expireTenant(ExpireTenantRequest request, IKVReader reader) { - reader.refresh(); - IKVIterator itr = reader.iterator(); - return expireTenant(request.getTenantId(), ofSeconds(request.getExpirySeconds()), request.getNow(), itr) + private CompletableFuture expireTenant(ExpireTenantRequest request, IKVRangeReader reader) { + return expireTenant(request.getTenantId(), ofSeconds(request.getExpirySeconds()), request.getNow(), reader) .thenApply(v -> ExpireTenantReply.newBuilder().build()); } - private CompletableFuture expireTenant(String tenantId, Duration expireTime, long now, IKVIterator itr) { + private CompletableFuture expireTenant(String tenantId, Duration expireTime, long now, + IKVRangeReader reader) { String inboxId = null; ByteString beginKeyPrefix = tenantBeginKeyPrefix(tenantId); SortedMap inboxVersions = new TreeMap<>(); List> onlineCheckFutures = new LinkedList<>(); int probe = 0; - for (itr.seek(beginKeyPrefix); itr.isValid() && itr.key().startsWith(beginKeyPrefix); ) { - if (isInboxInstanceStartKey(itr.key())) { - try { - InboxMetadata inboxMetadata = InboxMetadata.parseFrom(itr.value()); - if (inboxId == null) { - inboxId = inboxMetadata.getInboxId(); - inboxVersions.put(inboxMetadata.getIncarnation(), inboxMetadata); - } else if (inboxId.equals(inboxMetadata.getInboxId())) { - inboxVersions.put(inboxMetadata.getIncarnation(), inboxMetadata); - } else { - // collect all inbox versions - checkInboxOnline(tenantId, expireTime, now, inboxVersions, onlineCheckFutures); - // move on to next inbox - inboxVersions.clear(); - inboxId = inboxMetadata.getInboxId(); - inboxVersions.put(inboxMetadata.getIncarnation(), inboxMetadata); + try (IKVIterator itr = reader.iterator(Boundary.newBuilder() + .setStartKey(beginKeyPrefix) + .setEndKey(upperBound(beginKeyPrefix)) + .build())) { + for (itr.seek(beginKeyPrefix); itr.isValid() && itr.key().startsWith(beginKeyPrefix); ) { + if (isInboxInstanceStartKey(itr.key())) { + try { + InboxMetadata inboxMetadata = InboxMetadata.parseFrom(itr.value()); + if (inboxId == null) { + inboxId = inboxMetadata.getInboxId(); + inboxVersions.put(inboxMetadata.getIncarnation(), inboxMetadata); + } else if (inboxId.equals(inboxMetadata.getInboxId())) { + inboxVersions.put(inboxMetadata.getIncarnation(), inboxMetadata); + } else { + // collect all inbox versions + checkInboxOnline(tenantId, expireTime, now, inboxVersions, onlineCheckFutures); + // move on to next inbox + inboxVersions.clear(); + inboxId = inboxMetadata.getInboxId(); + inboxVersions.put(inboxMetadata.getIncarnation(), inboxMetadata); + } + } catch (InvalidProtocolBufferException e) { + log.error("Unexpected error", e); + } finally { + itr.next(); + probe++; } - } catch (InvalidProtocolBufferException e) { - log.error("Unexpected error", e); - } finally { - itr.next(); - probe++; - } - } else { - if (probe < 20) { - itr.next(); - probe++; } else { - if (isInboxInstanceKey(itr.key())) { - itr.seek(upperBound(parseInboxInstanceStartKeyPrefix(itr.key()))); - } else { + if (probe < 20) { itr.next(); probe++; + } else { + if (isInboxInstanceKey(itr.key())) { + itr.seek(upperBound(parseInboxInstanceStartKeyPrefix(itr.key()))); + } else { + itr.next(); + probe++; + } } } } @@ -1698,68 +1873,79 @@ private boolean hasExpired(InboxMetadata metadata, long nowTS) { return Duration.ofMillis(metadata.getDetachedAt()).plusSeconds(metadata.getExpirySeconds()).toMillis() < nowTS; } - private Optional getLatestInboxVersion(String tenantId, String inboxId, IKVReader reader, - IKVIterator itr) { + private Optional getLatestInboxVersion(String tenantId, String inboxId, IKVRangeReader reader) { ByteString inboxStartKey = inboxStartKeyPrefix(tenantId, inboxId); - itr.seekForPrev(upperBound(inboxStartKey)); - if (!itr.isValid() || !itr.key().startsWith(inboxStartKey)) { - return Optional.empty(); - } - Optional inboxMetaBytes = reader.get(parseInboxInstanceStartKeyPrefix(itr.key())); - if (inboxMetaBytes.isEmpty()) { - log.warn("Inconsistent state: missing inbox meta: tenantId={}, inboxId={}", tenantId, inboxId); - return Optional.empty(); - } - try { - return Optional.of(InboxMetadata.parseFrom(inboxMetaBytes.get())); - } catch (InvalidProtocolBufferException e) { - log.error("Unexpected error", e); - return Optional.empty(); + try (IKVIterator itr = reader.iterator(Boundary.newBuilder() + .setStartKey(inboxStartKey) + .setEndKey(upperBound(inboxStartKey)) + .build()) + ) { + itr.seekToLast(); + if (!itr.isValid() || !itr.key().startsWith(inboxStartKey)) { + return Optional.empty(); + } + Optional inboxMetaBytes = reader.get(parseInboxInstanceStartKeyPrefix(itr.key())); + if (inboxMetaBytes.isEmpty()) { + log.warn("Inconsistent state: missing inbox meta: tenantId={}, inboxId={}", tenantId, inboxId); + return Optional.empty(); + } + try { + return Optional.of(InboxMetadata.parseFrom(inboxMetaBytes.get())); + } catch (InvalidProtocolBufferException e) { + log.error("Unexpected error", e); + return Optional.empty(); + } } } - private SortedMap getAllInboxVersions(String tenantId, String inboxId, IKVIterator itr) { + private SortedMap getAllInboxVersions(String tenantId, String inboxId, IKVRangeReader reader) { int probe = 0; SortedMap inboxInstances = new ConcurrentSkipListMap<>(); ByteString inboxStartKey = inboxStartKeyPrefix(tenantId, inboxId); - for (itr.seek(inboxStartKey); itr.isValid(); ) { - if (itr.key().startsWith(inboxStartKey)) { - if (isInboxInstanceStartKey(itr.key())) { - probe = 0; - try { - InboxMetadata inboxMetadata = InboxMetadata.parseFrom(itr.value()); - inboxInstances.put(inboxMetadata.getIncarnation(), inboxMetadata); - } catch (InvalidProtocolBufferException e) { - log.error("Unexpected error", e); - } finally { - itr.next(); - probe++; - } - } else { - if (probe < 20) { - itr.next(); - probe++; + try (IKVIterator itr = reader.iterator(Boundary.newBuilder() + .setStartKey(inboxStartKey) + .setEndKey(upperBound(inboxStartKey)) + .build()) + ) { + for (itr.seek(inboxStartKey); itr.isValid(); ) { + if (itr.key().startsWith(inboxStartKey)) { + if (isInboxInstanceStartKey(itr.key())) { + probe = 0; + try { + InboxMetadata inboxMetadata = InboxMetadata.parseFrom(itr.value()); + inboxInstances.put(inboxMetadata.getIncarnation(), inboxMetadata); + } catch (InvalidProtocolBufferException e) { + log.error("Unexpected error", e); + } finally { + itr.next(); + probe++; + } } else { - if (isInboxInstanceKey(itr.key())) { - itr.seek(upperBound(parseInboxInstanceStartKeyPrefix(itr.key()))); - } else { + if (probe < 20) { itr.next(); probe++; + } else { + if (isInboxInstanceKey(itr.key())) { + itr.seek(upperBound(parseInboxInstanceStartKeyPrefix(itr.key()))); + } else { + itr.next(); + probe++; + } } } + } else { + break; } - } else { - break; } } return inboxInstances; } - private IInboxMetaCache.InboxMetadataProvider inboxMetadataProvider(IKVReader reader) { + private IInboxMetaCache.InboxMetadataProvider inboxMetadataProvider(IKVRangeReader reader) { return (tenantId, inboxId, incarnation) -> getInboxVersion(tenantId, inboxId, incarnation, reader); } - private InboxMetadata getInboxVersion(String tenantId, String inboxId, long incarnation, IKVReader reader) { + private InboxMetadata getInboxVersion(String tenantId, String inboxId, long incarnation, IKVRangeReader reader) { ByteString inboxInstanceMetaKey = inboxInstanceStartKey(tenantId, inboxId, incarnation); Optional metaBytes = reader.get(inboxInstanceMetaKey); if (metaBytes.isEmpty()) { diff --git a/bifromq-inbox/bifromq-inbox-store/src/main/java/org/apache/bifromq/inbox/store/InboxStoreCoProcFactory.java b/bifromq-inbox/bifromq-inbox-store/src/main/java/org/apache/bifromq/inbox/store/InboxStoreCoProcFactory.java index 9cf4d83ab..a759f44c1 100644 --- a/bifromq-inbox/bifromq-inbox-store/src/main/java/org/apache/bifromq/inbox/store/InboxStoreCoProcFactory.java +++ b/bifromq-inbox/bifromq-inbox-store/src/main/java/org/apache/bifromq/inbox/store/InboxStoreCoProcFactory.java @@ -28,18 +28,13 @@ import com.google.protobuf.ByteString; import java.time.Duration; -import java.util.Collections; -import java.util.List; import java.util.Optional; import java.util.function.Supplier; import org.apache.bifromq.basekv.proto.Boundary; import org.apache.bifromq.basekv.proto.KVRangeId; -import org.apache.bifromq.basekv.store.api.IKVCloseableReader; import org.apache.bifromq.basekv.store.api.IKVRangeCoProc; import org.apache.bifromq.basekv.store.api.IKVRangeCoProcFactory; -import org.apache.bifromq.basekv.store.api.IKVRangeSplitHinter; -import org.apache.bifromq.basekv.store.range.hinter.MutationKVLoadBasedSplitHinter; -import org.apache.bifromq.basekv.utils.KVRangeIdUtil; +import org.apache.bifromq.basekv.store.api.IKVRangeRefreshableReader; import org.apache.bifromq.dist.client.IDistClient; import org.apache.bifromq.inbox.client.IInboxClient; import org.apache.bifromq.plugin.eventcollector.IEventCollector; @@ -58,7 +53,6 @@ public class InboxStoreCoProcFactory implements IKVRangeCoProcFactory { private final IResourceThrottler resourceThrottler; private final Duration detachTimeout; private final Duration metaCacheExpireTime; - private final Duration loadEstWindow; private final int expireRateLimit; @@ -71,7 +65,6 @@ public InboxStoreCoProcFactory(IDistClient distClient, IResourceThrottler resourceThrottler, Duration detachTimeout, Duration metaCacheExpireTime, - Duration loadEstimateWindow, int expireRateLimit) { this.distClient = distClient; this.inboxClient = inboxClient; @@ -82,34 +75,27 @@ public InboxStoreCoProcFactory(IDistClient distClient, this.resourceThrottler = resourceThrottler; this.detachTimeout = detachTimeout; this.metaCacheExpireTime = metaCacheExpireTime; - this.loadEstWindow = loadEstimateWindow; this.expireRateLimit = expireRateLimit; } @Override - public List createHinters(String clusterId, String storeId, KVRangeId id, - Supplier rangeReaderProvider) { - // load-based hinter only split range around up to the inbox bucket boundary - return Collections.singletonList(new MutationKVLoadBasedSplitHinter(loadEstWindow, key -> { - ByteString splitKey = upperBound(parseInboxBucketPrefix(key)); - if (splitKey != null) { - Boundary boundary = rangeReaderProvider.get().boundary(); - if (compareStartKey(startKey(boundary), splitKey) < 0 - && compareEndKeys(splitKey, endKey(boundary)) < 0) { - return Optional.of(splitKey); - } + public Optional toSplitKey(ByteString key, Boundary boundary) { + // align split key to inbox bucket upper bound within the current boundary + ByteString splitKey = upperBound(parseInboxBucketPrefix(key)); + if (splitKey != null) { + if (compareStartKey(startKey(boundary), splitKey) < 0 + && compareEndKeys(splitKey, endKey(boundary)) < 0) { + return Optional.of(splitKey); } - return Optional.empty(); - }, - "clusterId", clusterId, "storeId", storeId, "rangeId", - KVRangeIdUtil.toString(id))); + } + return Optional.empty(); } @Override public IKVRangeCoProc createCoProc(String clusterId, String storeId, KVRangeId id, - Supplier rangeReaderProvider) { + Supplier rangeReaderProvider) { return new InboxStoreCoProc(clusterId, storeId, id, diff --git a/bifromq-inbox/bifromq-inbox-store/src/main/java/org/apache/bifromq/inbox/store/TenantStats.java b/bifromq-inbox/bifromq-inbox-store/src/main/java/org/apache/bifromq/inbox/store/TenantStats.java index 8536fd19e..837b1434a 100644 --- a/bifromq-inbox/bifromq-inbox-store/src/main/java/org/apache/bifromq/inbox/store/TenantStats.java +++ b/bifromq-inbox/bifromq-inbox-store/src/main/java/org/apache/bifromq/inbox/store/TenantStats.java @@ -27,7 +27,7 @@ import java.util.function.Supplier; import org.apache.bifromq.metrics.ITenantMeter; -public class TenantStats { +class TenantStats { private final LongAdder sessionCount = new LongAdder(); private final LongAdder subCount = new LongAdder(); diff --git a/bifromq-inbox/bifromq-inbox-store/src/main/java/org/apache/bifromq/inbox/store/TenantsStats.java b/bifromq-inbox/bifromq-inbox-store/src/main/java/org/apache/bifromq/inbox/store/TenantsStats.java index 654b411d6..344fa3a7a 100644 --- a/bifromq-inbox/bifromq-inbox-store/src/main/java/org/apache/bifromq/inbox/store/TenantsStats.java +++ b/bifromq-inbox/bifromq-inbox-store/src/main/java/org/apache/bifromq/inbox/store/TenantsStats.java @@ -37,29 +37,28 @@ import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.ForkJoinPool; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.locks.StampedLock; import java.util.function.Supplier; import lombok.extern.slf4j.Slf4j; import org.apache.bifromq.basekv.proto.Boundary; -import org.apache.bifromq.basekv.store.api.IKVCloseableReader; import org.apache.bifromq.basekv.store.api.IKVIterator; +import org.apache.bifromq.basekv.store.api.IKVRangeRefreshableReader; import org.apache.bifromq.inbox.storage.proto.InboxMetadata; @Slf4j public class TenantsStats implements ITenantStats { private final Map tenantStatsMap = new ConcurrentHashMap<>(); - private final Supplier readerSupplier; + private final Supplier readerSupplier; private final String[] tags; // ultra-simple async queue and single drainer private final ConcurrentLinkedQueue taskQueue = new ConcurrentLinkedQueue<>(); private final AtomicBoolean draining = new AtomicBoolean(false); - private transient Boundary boundary; + private final AtomicBoolean closed = new AtomicBoolean(false); + private final StampedLock closeLock = new StampedLock(); - TenantsStats(Supplier readerSupplier, String... tags) { + TenantsStats(Supplier readerSupplier, String... tags) { this.readerSupplier = readerSupplier; this.tags = tags; - try (IKVCloseableReader reader = readerSupplier.get()) { - boundary = reader.boundary(); - } } @Override @@ -88,18 +87,25 @@ public void reset(Boundary boundary) { @Override public void close() { - CompletableFuture closeFuture = new CompletableFuture<>(); - // Ensure gauges are unregistered and internal state cleared on close - taskQueue.offer(() -> { - try { - tenantStatsMap.values().forEach(TenantStats::destroy); - tenantStatsMap.clear(); - } finally { - closeFuture.complete(null); + long stamp = closeLock.writeLock(); + try { + if (closed.compareAndSet(false, true)) { + CompletableFuture closeFuture = new CompletableFuture<>(); + // Ensure gauges are unregistered and internal state cleared on close + taskQueue.offer(() -> { + try { + tenantStatsMap.values().forEach(TenantStats::destroy); + tenantStatsMap.clear(); + } finally { + closeFuture.complete(null); + } + }); + trigger(); + closeFuture.join(); } - }); - trigger(); - closeFuture.join(); + } finally { + closeLock.unlock(stamp); + } } private void trigger() { @@ -166,10 +172,15 @@ private void doAddSubCount(String tenantId, int delta) { private Supplier getTenantUsedSpaceProvider(String tenantId) { return () -> { - try (IKVCloseableReader reader = readerSupplier.get()) { + long stamp = closeLock.readLock(); + if (closed.get()) { + closeLock.unlock(stamp); + return 0; + } + try (IKVRangeRefreshableReader reader = readerSupplier.get()) { ByteString startKey = tenantBeginKeyPrefix(tenantId); ByteString endKey = upperBound(startKey); - Boundary tenantBoundary = intersect(boundary, toBoundary(startKey, endKey)); + Boundary tenantBoundary = intersect(reader.boundary(), toBoundary(startKey, endKey)); if (isNULLRange(tenantBoundary)) { return 0; } @@ -177,6 +188,8 @@ private Supplier getTenantUsedSpaceProvider(String tenantId) { } catch (Exception e) { log.error("Failed to get used space for tenant:{}", tenantId, e); return 0; + } finally { + closeLock.unlock(stamp); } }; } @@ -184,10 +197,8 @@ private Supplier getTenantUsedSpaceProvider(String tenantId) { private void doReset(Boundary boundary) { tenantStatsMap.values().forEach(TenantStats::destroy); tenantStatsMap.clear(); - try (IKVCloseableReader reader = readerSupplier.get()) { - this.boundary = boundary; + try (IKVRangeRefreshableReader reader = readerSupplier.get(); IKVIterator itr = reader.iterator()) { reader.refresh(); - IKVIterator itr = reader.iterator(); for (itr.seekToFirst(); itr.isValid(); ) { String tenantId = parseTenantId(itr.key()); loadStats(tenantId, itr); diff --git a/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/InboxDeleteClearAllTest.java b/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/InboxDeleteClearAllTest.java new file mode 100644 index 000000000..91dd24f0f --- /dev/null +++ b/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/InboxDeleteClearAllTest.java @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.inbox.store; + +import static org.testng.Assert.assertEquals; + +import org.apache.bifromq.inbox.storage.proto.BatchAttachRequest; +import org.apache.bifromq.inbox.storage.proto.BatchDeleteReply; +import org.apache.bifromq.inbox.storage.proto.BatchDeleteRequest; +import org.apache.bifromq.inbox.storage.proto.BatchFetchRequest; +import org.apache.bifromq.inbox.storage.proto.BatchSubRequest; +import org.apache.bifromq.inbox.storage.proto.Fetched; +import org.apache.bifromq.inbox.storage.proto.InboxVersion; +import org.apache.bifromq.inbox.storage.proto.InsertRequest; +import org.apache.bifromq.inbox.storage.proto.MatchedRoute; +import org.apache.bifromq.inbox.storage.proto.SubMessagePack; +import org.apache.bifromq.type.ClientInfo; +import org.apache.bifromq.type.QoS; +import org.apache.bifromq.type.TopicFilterOption; +import org.apache.bifromq.type.TopicMessagePack; +import org.testng.annotations.Test; + +public class InboxDeleteClearAllTest extends InboxStoreTest { + + @Test(groups = "integration") + public void clearInboxRemovesAllMessages() { + String tenantId = "tenant-" + System.nanoTime(); + String inboxId = "inbox-" + System.nanoTime(); + long incarnation = System.nanoTime(); + ClientInfo client = ClientInfo.newBuilder().setTenantId(tenantId).build(); + + InboxVersion version = requestAttach(BatchAttachRequest.Params.newBuilder() + .setInboxId(inboxId) + .setIncarnation(incarnation) + .setExpirySeconds(30) + .setLimit(1000) + .setClient(client) + .setNow(0) + .build()).get(0); + + // subscribe QoS0 and QoS1 filters + requestSub(BatchSubRequest.Params.newBuilder() + .setTenantId(tenantId) + .setInboxId(inboxId) + .setVersion(version) + .setTopicFilter("/qos0") + .setOption(TopicFilterOption.newBuilder().setIncarnation(1L).setQos(QoS.AT_MOST_ONCE).build()) + .setMaxTopicFilters(100) + .setNow(0) + .build()); + requestSub(BatchSubRequest.Params.newBuilder() + .setTenantId(tenantId) + .setInboxId(inboxId) + .setVersion(version) + .setTopicFilter("/qos1") + .setOption(TopicFilterOption.newBuilder().setIncarnation(1L).setQos(QoS.AT_LEAST_ONCE).build()) + .setMaxTopicFilters(100) + .setNow(0) + .build()); + + // insert many messages in multiple chunks for both queues + for (int i = 0; i < 200; i++) { + requestInsert(InsertRequest.newBuilder() + .setTenantId(tenantId) + .setInboxId(inboxId) + .setIncarnation(incarnation) + .addMessagePack(SubMessagePack.newBuilder() + .addMatchedRoute(MatchedRoute.newBuilder().setTopicFilter("/qos0").setIncarnation(1L).build()) + .setMessages(TopicMessagePack.newBuilder() + .setTopic("/qos0") + .addMessage(message(QoS.AT_MOST_ONCE, "m0-" + i)) + .build()) + .build()) + .addMessagePack(SubMessagePack.newBuilder() + .addMatchedRoute(MatchedRoute.newBuilder().setTopicFilter("/qos1").setIncarnation(1L).build()) + .setMessages(TopicMessagePack.newBuilder() + .setTopic("/qos1") + .addMessage(message(QoS.AT_LEAST_ONCE, "m1-" + i)) + .build()) + .build()) + .build()); + } + + // delete inbox + BatchDeleteReply.Result delResult = requestDelete(BatchDeleteRequest.Params.newBuilder() + .setTenantId(tenantId) + .setInboxId(inboxId) + .setVersion(version) + .build()).get(0); + assertEquals(delResult.getCode(), BatchDeleteReply.Code.OK); + + // fetch after delete should return NO_INBOX + Fetched fetched = requestFetch(BatchFetchRequest.Params.newBuilder() + .setTenantId(tenantId) + .setInboxId(inboxId) + .setIncarnation(incarnation) + .setMaxFetch(1000) + .build()).get(0); + assertEquals(fetched.getResult(), Fetched.Result.NO_INBOX); + } +} + diff --git a/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/InboxInsertNewFormatTest.java b/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/InboxInsertNewFormatTest.java new file mode 100644 index 000000000..4ffa83743 --- /dev/null +++ b/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/InboxInsertNewFormatTest.java @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.inbox.store; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; + +import com.google.protobuf.ByteString; +import java.util.List; +import java.util.concurrent.ThreadLocalRandom; +import org.apache.bifromq.inbox.storage.proto.BatchAttachRequest; +import org.apache.bifromq.inbox.storage.proto.BatchFetchRequest; +import org.apache.bifromq.inbox.storage.proto.BatchInsertRequest; +import org.apache.bifromq.inbox.storage.proto.Fetched; +import org.apache.bifromq.inbox.storage.proto.InboxServiceRWCoProcInput; +import org.apache.bifromq.inbox.storage.proto.InboxServiceRWCoProcOutput; +import org.apache.bifromq.inbox.storage.proto.InboxVersion; +import org.apache.bifromq.inbox.storage.proto.InsertResult; +import org.apache.bifromq.inbox.storage.proto.MatchedRoute; +import org.apache.bifromq.type.ClientInfo; +import org.apache.bifromq.type.Message; +import org.apache.bifromq.type.QoS; +import org.apache.bifromq.type.TopicFilterOption; +import org.apache.bifromq.type.TopicMessagePack; +import org.testng.annotations.Test; + +public class InboxInsertNewFormatTest extends InboxStoreTest { + @Test(groups = "integration") + public void insertWithMessagePool() { + long now = 0; + String tenantId = "tenantId-" + System.nanoTime(); + String inboxId = "inboxId-" + System.nanoTime(); + long incarnation = System.nanoTime(); + String topic = "/a/b/c"; + ClientInfo client = ClientInfo.newBuilder().setTenantId(tenantId).build(); + InboxVersion version = requestAttach(BatchAttachRequest.Params.newBuilder() + .setInboxId(inboxId) + .setIncarnation(incarnation) + .setExpirySeconds(10) + .setLimit(10) + .setClient(client) + .setNow(now) + .build()).get(0); + + requestSub(org.apache.bifromq.inbox.storage.proto.BatchSubRequest.Params.newBuilder() + .setTenantId(tenantId) + .setInboxId(inboxId) + .setVersion(version) + .setTopicFilter(topic) + .setOption(TopicFilterOption.newBuilder().setIncarnation(1L).setQos(QoS.AT_LEAST_ONCE).build()) + .setMaxTopicFilters(100) + .setNow(now) + .build()); + + TopicMessagePack.PublisherPack pubPack = TopicMessagePack.PublisherPack.newBuilder() + .addMessage(Message.newBuilder().setMessageId(System.nanoTime()).setPubQoS(QoS.AT_LEAST_ONCE).build()) + .build(); + TopicMessagePack topicMessagePack = TopicMessagePack.newBuilder().setTopic(topic).addMessage(pubPack).build(); + + BatchInsertRequest.SubRef subRef = BatchInsertRequest.SubRef.newBuilder() + .addMatchedRoute(MatchedRoute.newBuilder().setTopicFilter(topic).setIncarnation(1L).build()) + .setMessagePackIndex(0) + .build(); + + BatchInsertRequest.InsertRef insertRef = BatchInsertRequest.InsertRef.newBuilder() + .setTenantId(tenantId) + .setInboxId(inboxId) + .setIncarnation(incarnation) + .addSubRef(subRef) + .build(); + + BatchInsertRequest req = BatchInsertRequest.newBuilder() + .addTopicMessagePack(topicMessagePack) + .addInsertRef(insertRef) + .build(); + + long reqId = ThreadLocalRandom.current().nextInt(); + ByteString routeKey = org.apache.bifromq.inbox.store.schema.KVSchemaUtil.inboxStartKeyPrefix(tenantId, inboxId); + InboxServiceRWCoProcInput input = MessageUtil.buildInsertRequest(reqId, req); + InboxServiceRWCoProcOutput output = mutate(routeKey, input); + assertTrue(output.hasBatchInsert()); + assertEquals(output.getReqId(), reqId); + List results = output.getBatchInsert().getResultList(); + assertEquals(results.size(), 1); + assertEquals(results.get(0).getCode(), InsertResult.Code.OK); + + Fetched fetched = requestFetch(BatchFetchRequest.Params.newBuilder() + .setTenantId(tenantId) + .setInboxId(inboxId) + .setIncarnation(incarnation) + .setMaxFetch(10) + .build()).get(0); + if (fetched.getResult() == Fetched.Result.OK) { + assertEquals(fetched.getSendBufferMsgCount(), 1); + assertEquals(fetched.getSendBufferMsg(0).getMsg().getMessage(), pubPack.getMessage(0)); + } + } +} + diff --git a/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/InboxInsertTest.java b/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/InboxInsertTest.java index e70fe507c..76009309a 100644 --- a/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/InboxInsertTest.java +++ b/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/InboxInsertTest.java @@ -360,7 +360,6 @@ protected void fetchWithMaxLimit(QoS qos) { .setMaxFetch(1) .build()) .get(0); - assertEquals(msgCountGetter(qos).apply(fetched), 1); assertEquals(msgGetter(qos).apply(fetched, 0).getMsg().getMessage(), msg1.getMessage(0)); fetched = requestFetch(BatchFetchRequest.Params.newBuilder() @@ -465,9 +464,10 @@ protected void fetchWithStartAfter(QoS qos) { if (qos == AT_MOST_ONCE) { assertEquals(msgCountGetter(qos).apply(fetched), 5); } else { - // limit only apply buffered messages - assertEquals(msgCountGetter(qos).apply(fetched), 1); + // return whole chunk even if exceeding maxFetch + assertEquals(msgCountGetter(qos).apply(fetched), 2); assertEquals(msgGetter(qos).apply(fetched, 0).getMsg().getMessage(), msg2.getMessage(0)); + assertEquals(msgGetter(qos).apply(fetched, 1).getMsg().getMessage(), msg3.getMessage(0)); } paramsBuilder = BatchFetchRequest.Params.newBuilder() @@ -1056,5 +1056,161 @@ private BiFunction msgGetter(QoS qos) { default -> Fetched::getSendBufferMsg; }; } -} + @Test(groups = "integration") + public void commitDeleteRangeSendBuffer() { + long now = 0; + String tenantId = "tenantId-" + System.nanoTime(); + String inboxId = "inboxId-" + System.nanoTime(); + long incarnation = System.nanoTime(); + String topicFilter = "/a/b/c"; + ClientInfo client = ClientInfo.newBuilder().setTenantId(tenantId).build(); + InboxVersion inboxVersion = requestAttach(BatchAttachRequest.Params.newBuilder() + .setInboxId(inboxId) + .setIncarnation(incarnation) + .setExpirySeconds(2) + .setLimit(10) + .setClient(client) + .setNow(now) + .build()) + .get(0); + + requestSub(BatchSubRequest.Params.newBuilder() + .setTenantId(tenantId) + .setInboxId(inboxId) + .setVersion(inboxVersion) + .setTopicFilter(topicFilter) + .setOption(TopicFilterOption.newBuilder().setQos(QoS.AT_LEAST_ONCE).build()) + .setMaxTopicFilters(100) + .setNow(now) + .build()); + + TopicMessagePack.PublisherPack msg0 = message(QoS.AT_LEAST_ONCE, "m0"); + TopicMessagePack.PublisherPack msg1 = message(QoS.AT_LEAST_ONCE, "m1"); + TopicMessagePack.PublisherPack msg2 = message(QoS.AT_LEAST_ONCE, "m2"); + + // insert 3 messages in 3 requests so keys exist at seq=0,1,2 + requestInsert(InsertRequest.newBuilder() + .setTenantId(tenantId) + .setInboxId(inboxId) + .setIncarnation(incarnation) + .addMessagePack(SubMessagePack.newBuilder() + .addMatchedRoute(MatchedRoute.newBuilder().setTopicFilter(topicFilter).setIncarnation(0L).build()) + .setMessages(TopicMessagePack.newBuilder().setTopic(topicFilter).addMessage(msg0).build()) + .build()) + .build()); + + requestInsert(InsertRequest.newBuilder() + .setTenantId(tenantId) + .setInboxId(inboxId) + .setIncarnation(incarnation) + .addMessagePack(SubMessagePack.newBuilder() + .addMatchedRoute(MatchedRoute.newBuilder().setTopicFilter(topicFilter).setIncarnation(0L).build()) + .setMessages(TopicMessagePack.newBuilder().setTopic(topicFilter).addMessage(msg1).build()) + .build()) + .build()); + + requestInsert(InsertRequest.newBuilder() + .setTenantId(tenantId) + .setInboxId(inboxId) + .setIncarnation(incarnation) + .addMessagePack(SubMessagePack.newBuilder() + .addMatchedRoute(MatchedRoute.newBuilder().setTopicFilter(topicFilter).setIncarnation(0L).build()) + .setMessages(TopicMessagePack.newBuilder().setTopic(topicFilter).addMessage(msg2).build()) + .build()) + .build()); + + // commit up to seq=1, should delete range [0,2) + BatchCommitReply.Code commitCode = requestCommit(BatchCommitRequest.Params.newBuilder() + .setTenantId(tenantId) + .setInboxId(inboxId) + .setVersion(inboxVersion) + .setSendBufferUpToSeq(1) + .setNow(now) + .build()).get(0); + assertEquals(commitCode, BatchCommitReply.Code.OK); + + Fetched fetched = requestFetch(BatchFetchRequest.Params.newBuilder() + .setTenantId(tenantId) + .setInboxId(inboxId) + .setIncarnation(incarnation) + .setMaxFetch(10) + .build()).get(0); + assertEquals(fetched.getSendBufferMsgCount(), 1); + assertEquals(fetched.getSendBufferMsg(0).getSeq(), 2); + assertEquals(fetched.getSendBufferMsg(0).getMsg().getMessage(), msg2.getMessage(0)); + } + + @Test(groups = "integration") + public void commitDirectDeleteSendBuffer() { + long now = 0; + String tenantId = "tenantId-" + System.nanoTime(); + String inboxId = "inboxId-" + System.nanoTime(); + long incarnation = System.nanoTime(); + String topicFilter = "/a/b/c"; + ClientInfo client = ClientInfo.newBuilder().setTenantId(tenantId).build(); + InboxVersion inboxVersion = requestAttach(BatchAttachRequest.Params.newBuilder() + .setInboxId(inboxId) + .setIncarnation(incarnation) + .setExpirySeconds(2) + .setLimit(10) + .setClient(client) + .setNow(now) + .build()) + .get(0); + + requestSub(BatchSubRequest.Params.newBuilder() + .setTenantId(tenantId) + .setInboxId(inboxId) + .setVersion(inboxVersion) + .setTopicFilter(topicFilter) + .setOption(TopicFilterOption.newBuilder().setQos(QoS.AT_LEAST_ONCE).build()) + .setMaxTopicFilters(100) + .setNow(now) + .build()); + + TopicMessagePack.PublisherPack msg0 = message(QoS.AT_LEAST_ONCE, "m0"); + TopicMessagePack.PublisherPack msg1 = message(QoS.AT_LEAST_ONCE, "m1"); + + // insert 2 messages in 2 requests so keys exist at seq=0 and seq=1 + requestInsert(InsertRequest.newBuilder() + .setTenantId(tenantId) + .setInboxId(inboxId) + .setIncarnation(incarnation) + .addMessagePack(SubMessagePack.newBuilder() + .addMatchedRoute(MatchedRoute.newBuilder().setTopicFilter(topicFilter).setIncarnation(0L).build()) + .setMessages(TopicMessagePack.newBuilder().setTopic(topicFilter).addMessage(msg0).build()) + .build()) + .build()); + + requestInsert(InsertRequest.newBuilder() + .setTenantId(tenantId) + .setInboxId(inboxId) + .setIncarnation(incarnation) + .addMessagePack(SubMessagePack.newBuilder() + .addMatchedRoute(MatchedRoute.newBuilder().setTopicFilter(topicFilter).setIncarnation(0L).build()) + .setMessages(TopicMessagePack.newBuilder().setTopic(topicFilter).addMessage(msg1).build()) + .build()) + .build()); + + // commit where startSeq equals commitSeq(0), should directly delete key at seq=0 + BatchCommitReply.Code commitCode = requestCommit(BatchCommitRequest.Params.newBuilder() + .setTenantId(tenantId) + .setInboxId(inboxId) + .setVersion(inboxVersion) + .setSendBufferUpToSeq(0) + .setNow(now) + .build()).get(0); + assertEquals(commitCode, BatchCommitReply.Code.OK); + + Fetched fetched = requestFetch(BatchFetchRequest.Params.newBuilder() + .setTenantId(tenantId) + .setInboxId(inboxId) + .setIncarnation(incarnation) + .setMaxFetch(10) + .build()).get(0); + assertEquals(fetched.getSendBufferMsgCount(), 1); + assertEquals(fetched.getSendBufferMsg(0).getSeq(), 1); + assertEquals(fetched.getSendBufferMsg(0).getMsg().getMessage(), msg1.getMessage(0)); + } +} diff --git a/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/InboxStoreTest.java b/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/InboxStoreTest.java index e33c807de..a2dc717ce 100644 --- a/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/InboxStoreTest.java +++ b/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/InboxStoreTest.java @@ -21,6 +21,9 @@ import static org.apache.bifromq.basekv.client.KVRangeRouterUtil.findByBoundary; import static org.apache.bifromq.basekv.client.KVRangeRouterUtil.findByKey; +import static org.apache.bifromq.basekv.localengine.StructUtil.toValue; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.DB_CHECKPOINT_ROOT_DIR; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.DB_ROOT_DIR; import static org.apache.bifromq.basekv.utils.BoundaryUtil.FULL_BOUNDARY; import static org.apache.bifromq.inbox.store.schema.KVSchemaUtil.inboxStartKeyPrefix; import static org.apache.bifromq.metrics.TenantMetric.MqttPersistentSessionNumGauge; @@ -34,6 +37,7 @@ import static org.testng.Assert.assertTrue; import com.google.protobuf.ByteString; +import com.google.protobuf.Struct; import io.micrometer.core.instrument.Gauge; import io.micrometer.core.instrument.Meter; import io.micrometer.core.instrument.Metrics; @@ -66,8 +70,6 @@ import org.apache.bifromq.baseenv.EnvProvider; import org.apache.bifromq.basekv.client.IBaseKVStoreClient; import org.apache.bifromq.basekv.client.KVRangeSetting; -import org.apache.bifromq.basekv.localengine.rocksdb.RocksDBCPableKVEngineConfigurator; -import org.apache.bifromq.basekv.localengine.rocksdb.RocksDBWALableKVEngineConfigurator; import org.apache.bifromq.basekv.metaservice.IBaseKVMetaService; import org.apache.bifromq.basekv.store.option.KVRangeStoreOptions; import org.apache.bifromq.basekv.store.proto.KVRangeROReply; @@ -185,12 +187,18 @@ public void setup() throws IOException { String uuid = UUID.randomUUID().toString(); options = new KVRangeStoreOptions(); - ((RocksDBCPableKVEngineConfigurator) options.getDataEngineConfigurator()) - .dbCheckpointRootDir(Paths.get(dbRootDir.toString(), DB_CHECKPOINT_DIR_NAME, uuid) - .toString()) - .dbRootDir(Paths.get(dbRootDir.toString(), DB_NAME, uuid).toString()); - ((RocksDBWALableKVEngineConfigurator) options.getWalEngineConfigurator()) - .dbRootDir(Paths.get(dbRootDir.toString(), DB_WAL_NAME, uuid).toString()); + Struct dataConf = options.getDataEngineConf().toBuilder() + .putFields(DB_ROOT_DIR, toValue(Paths.get(dbRootDir.toString(), DB_NAME, uuid).toString())) + .putFields(DB_CHECKPOINT_ROOT_DIR, + toValue(Paths.get(dbRootDir.toString(), DB_CHECKPOINT_DIR_NAME, uuid).toString())) + .build(); + options.setDataEngineType(options.getDataEngineType()); + options.setDataEngineConf(dataConf); + Struct walConf = options.getWalEngineConf().toBuilder() + .putFields(DB_ROOT_DIR, toValue(Paths.get(dbRootDir.toString(), DB_WAL_NAME, uuid).toString())) + .build(); + options.setWalEngineType(options.getWalEngineType()); + options.setWalEngineConf(walConf); queryExecutor = new ThreadPoolExecutor(2, 2, 0L, TimeUnit.MILLISECONDS, new LinkedTransferQueue<>(), EnvProvider.INSTANCE.newThreadFactory("query-executor")); @@ -295,8 +303,7 @@ private InboxServiceROCoProcOutput query(KVRangeSetting s, InboxServiceROCoProcI return reply.getRoCoProcResult().getInboxService(); } - - private InboxServiceRWCoProcOutput mutate(ByteString routeKey, InboxServiceRWCoProcInput input) { + protected InboxServiceRWCoProcOutput mutate(ByteString routeKey, InboxServiceRWCoProcInput input) { KVRangeSetting s = findByKey(routeKey, storeClient.latestEffectiveRouter()).get(); KVRangeRWReply reply = storeClient.execute(s.leader(), KVRangeRWRequest.newBuilder() .setReqId(input.getReqId()) @@ -381,8 +388,8 @@ protected List requestDetach(BatchDetachRequest.Params... ByteString routeKey = inboxStartKeyPrefix(params[0].getTenantId(), params[0].getInboxId()); KVRangeSetting s = findByKey(routeKey, storeClient.latestEffectiveRouter()).get(); InboxServiceRWCoProcInput input = MessageUtil.buildDetachRequest(reqId, BatchDetachRequest.newBuilder() - .addAllParams(List.of(params)) - .build()); + .addAllParams(List.of(params)) + .build()); InboxServiceRWCoProcOutput output = mutate(routeKey, input); assertTrue(output.hasBatchDetach()); assertEquals(output.getReqId(), reqId); diff --git a/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/TenantsStatsTest.java b/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/TenantsStatsTest.java index 0a225e497..f14aa23c4 100644 --- a/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/TenantsStatsTest.java +++ b/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/TenantsStatsTest.java @@ -19,6 +19,7 @@ package org.apache.bifromq.inbox.store; +import static org.apache.bifromq.basekv.utils.BoundaryUtil.FULL_BOUNDARY; import static org.apache.bifromq.inbox.store.schema.KVSchemaUtil.inboxInstanceStartKey; import static org.apache.bifromq.metrics.TenantMetric.MqttPersistentSessionNumGauge; import static org.apache.bifromq.metrics.TenantMetric.MqttPersistentSessionSpaceGauge; @@ -39,8 +40,10 @@ import java.util.Optional; import java.util.concurrent.TimeUnit; import org.apache.bifromq.basekv.proto.Boundary; -import org.apache.bifromq.basekv.store.api.IKVCloseableReader; +import org.apache.bifromq.basekv.proto.State; +import org.apache.bifromq.basekv.raft.proto.ClusterConfig; import org.apache.bifromq.basekv.store.api.IKVIterator; +import org.apache.bifromq.basekv.store.api.IKVRangeRefreshableReader; import org.apache.bifromq.inbox.storage.proto.InboxMetadata; import org.apache.bifromq.metrics.ITenantMeter; import org.apache.bifromq.type.TopicFilterOption; @@ -79,7 +82,7 @@ public void tearDown() { @Test public void testAddAndRemoveSessionAndSubCounts() throws Exception { - IKVCloseableReader reader = Mockito.mock(IKVCloseableReader.class); + IKVRangeRefreshableReader reader = Mockito.mock(IKVRangeRefreshableReader.class); Mockito.when(reader.boundary()).thenReturn(Boundary.getDefaultInstance()); Mockito.when(reader.iterator()).thenReturn(new EmptyIterator()); Mockito.doNothing().when(reader).close(); @@ -127,7 +130,7 @@ public void testResetScansAndAggregates() { // sort by key lexicographically as store would entries.sort(Comparator.comparing(e -> e.key, ByteString.unsignedLexicographicalComparator())); - IKVCloseableReader reader = new FakeReader(entries); + IKVRangeRefreshableReader reader = new FakeReader(entries); TenantsStats stats = new TenantsStats(() -> reader, "clusterId", "c1", "storeId", "s1", "rangeId", "r1"); stats.reset(Boundary.getDefaultInstance()); @@ -148,7 +151,7 @@ && findGaugeValue(MqttPersistentSessionNumGauge.metricName, tenantB) == 1.0 @Test public void testCloseUnregistersAllGauges() throws Exception { - IKVCloseableReader reader = Mockito.mock(IKVCloseableReader.class); + IKVRangeRefreshableReader reader = Mockito.mock(IKVRangeRefreshableReader.class); Mockito.when(reader.boundary()).thenReturn(Boundary.getDefaultInstance()); Mockito.when(reader.iterator()).thenReturn(new EmptyIterator()); Mockito.doNothing().when(reader).close(); @@ -186,7 +189,7 @@ && findGauge(MqttPersistentSessionNumGauge.metricName, tenant2) != null @Test public void testToggleBroadcastToMultipleTenants() { - IKVCloseableReader reader = Mockito.mock(IKVCloseableReader.class); + IKVRangeRefreshableReader reader = Mockito.mock(IKVRangeRefreshableReader.class); Mockito.when(reader.boundary()).thenReturn(Boundary.getDefaultInstance()); Mockito.when(reader.iterator()).thenReturn(new EmptyIterator()); TenantsStats stats = new TenantsStats(() -> reader, "clusterId", "c1", "storeId", "s1", "rangeId", "r1"); @@ -229,7 +232,7 @@ && findGauge(MqttPersistentSessionNumGauge.metricName, tenantB) == null @Test public void testDemotionKeepsSpaceGauge() { - IKVCloseableReader reader = Mockito.mock(IKVCloseableReader.class); + IKVRangeRefreshableReader reader = Mockito.mock(IKVRangeRefreshableReader.class); Mockito.when(reader.boundary()).thenReturn(Boundary.getDefaultInstance()); Mockito.when(reader.iterator()).thenReturn(new EmptyIterator()); TenantsStats stats = new TenantsStats(() -> reader, "clusterId", "c1", "storeId", "s1", "rangeId", "r1"); @@ -341,6 +344,11 @@ public void seekForPrev(ByteString key) { } idx = i; } + + @Override + public void close() { + + } } private static class EmptyIterator extends FakeIterator { @@ -349,16 +357,36 @@ private static class EmptyIterator extends FakeIterator { } } - private static class FakeReader implements IKVCloseableReader { + private static class FakeReader implements IKVRangeRefreshableReader { private final List entries; FakeReader(List entries) { this.entries = entries; } + @Override + public long version() { + return 0; + } + + @Override + public State state() { + return State.newBuilder().setType(State.StateType.Normal).build(); + } + + @Override + public long lastAppliedIndex() { + return 0; + } + @Override public Boundary boundary() { - return Boundary.getDefaultInstance(); + return FULL_BOUNDARY; + } + + @Override + public ClusterConfig clusterConfig() { + return ClusterConfig.newBuilder().build(); } @Override @@ -381,6 +409,11 @@ public IKVIterator iterator() { return new FakeIterator(entries); } + @Override + public IKVIterator iterator(Boundary boundary) { + return null; + } + @Override public void refresh() {} diff --git a/bifromq-metrics/src/main/java/org/apache/bifromq/metrics/TenantMetric.java b/bifromq-metrics/src/main/java/org/apache/bifromq/metrics/TenantMetric.java index db261ac6c..2c6f7d7ee 100644 --- a/bifromq-metrics/src/main/java/org/apache/bifromq/metrics/TenantMetric.java +++ b/bifromq-metrics/src/main/java/org/apache/bifromq/metrics/TenantMetric.java @@ -57,6 +57,11 @@ public enum TenantMetric { MqttTransientFanOutBytes("mqtt.tfanout.bytes", Meter.Type.DISTRIBUTION_SUMMARY), MqttPersistentFanOutBytes("mqtt.pfanout.bytes", Meter.Type.DISTRIBUTION_SUMMARY), + MqttSendingQuota("mqtt.ex.quota", Meter.Type.DISTRIBUTION_SUMMARY), + MqttConfirmingMessages("mqtt.ex.confirming", Meter.Type.DISTRIBUTION_SUMMARY), + MqttResendBytes("mqtt.ex.resend.bytes", Meter.Type.DISTRIBUTION_SUMMARY), + MqttDeDupBytes("mqtt.ex.dedup.bytes", Meter.Type.DISTRIBUTION_SUMMARY), + // subscription related metrics MqttRouteSpaceGauge("mqtt.route.space.gauge", Meter.Type.GAUGE), MqttRouteNumGauge("mqtt.route.num.gauge", Meter.Type.GAUGE), diff --git a/bifromq-mqtt/bifromq-mqtt-broker-client/src/main/java/org/apache/bifromq/mqtt/inbox/DeliveryPipeline.java b/bifromq-mqtt/bifromq-mqtt-broker-client/src/main/java/org/apache/bifromq/mqtt/inbox/DeliveryPipeline.java index c6685362d..87e057e3b 100644 --- a/bifromq-mqtt/bifromq-mqtt-broker-client/src/main/java/org/apache/bifromq/mqtt/inbox/DeliveryPipeline.java +++ b/bifromq-mqtt/bifromq-mqtt-broker-client/src/main/java/org/apache/bifromq/mqtt/inbox/DeliveryPipeline.java @@ -21,8 +21,14 @@ import static org.apache.bifromq.base.util.CompletableFutureUtil.unwrap; +import java.util.HashSet; +import java.util.Set; +import java.util.concurrent.CompletableFuture; +import lombok.extern.slf4j.Slf4j; import org.apache.bifromq.baserpc.client.IRPCClient; import org.apache.bifromq.baserpc.client.exception.ServerNotFoundException; +import org.apache.bifromq.mqtt.inbox.rpc.proto.WriteReply; +import org.apache.bifromq.mqtt.inbox.rpc.proto.WriteRequest; import org.apache.bifromq.plugin.subbroker.DeliveryPack; import org.apache.bifromq.plugin.subbroker.DeliveryPackage; import org.apache.bifromq.plugin.subbroker.DeliveryReply; @@ -30,12 +36,6 @@ import org.apache.bifromq.plugin.subbroker.DeliveryResult; import org.apache.bifromq.plugin.subbroker.DeliveryResults; import org.apache.bifromq.plugin.subbroker.IDeliverer; -import java.util.HashSet; -import java.util.Set; -import java.util.concurrent.CompletableFuture; -import lombok.extern.slf4j.Slf4j; -import org.apache.bifromq.mqtt.inbox.rpc.proto.WriteReply; -import org.apache.bifromq.mqtt.inbox.rpc.proto.WriteRequest; import org.apache.bifromq.type.MatchInfo; @Slf4j @@ -57,14 +57,15 @@ public CompletableFuture deliver(DeliveryRequest request) { .exceptionally(unwrap(e -> { if (e instanceof ServerNotFoundException) { DeliveryReply.Builder replyBuilder = DeliveryReply.newBuilder().setCode(DeliveryReply.Code.OK); - Set allMatchInfos = new HashSet<>(); for (String tenantId : request.getPackageMap().keySet()) { + // collect match infos per tenant to avoid cross-tenant pollution + Set tenantMatchInfos = new HashSet<>(); DeliveryResults.Builder resultsBuilder = DeliveryResults.newBuilder(); DeliveryPackage deliveryPackage = request.getPackageMap().get(tenantId); for (DeliveryPack pack : deliveryPackage.getPackList()) { - allMatchInfos.addAll(pack.getMatchInfoList()); + tenantMatchInfos.addAll(pack.getMatchInfoList()); } - for (MatchInfo matchInfo : allMatchInfos) { + for (MatchInfo matchInfo : tenantMatchInfos) { resultsBuilder.addResult(DeliveryResult.newBuilder().setMatchInfo(matchInfo) .setCode(DeliveryResult.Code.NO_RECEIVER).build()); } diff --git a/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/AdaptiveReceiveQuota.java b/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/AdaptiveReceiveQuota.java index f7656b403..964eeaf93 100644 --- a/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/AdaptiveReceiveQuota.java +++ b/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/AdaptiveReceiveQuota.java @@ -21,119 +21,172 @@ import java.util.concurrent.TimeUnit; +/** + * Adaptive, congestion-aware receive quota controller for MQTT sessions. + *

+ * Estimates a proper in-flight window by tracking RTT with dual EWMAs, + * detecting congestion via latency ratio, applying multiplicative decrease + * with a short growth freeze on congestion, steering by utilization + * when latency is healthy, and clamping within a fixed floor and receive maximum. + */ final class AdaptiveReceiveQuota { - private final int receiveMaximum; - private final long rttFloorNanos; + private static final double EPS_LOW = 0.05; // healthy if r <= 1+EPS_LOW + private static final double EPS_HIGH = 0.15; // congested if r >= 1+EPS_HIGH + private static final double SHRINK_RATIO = 0.10; // multiplicative decrease factor + private static final double U_LOW = 0.05; // keep utilization above 5% + private static final double U_HIGH = 0.10; // and below 10% + private static final double U_TARGET = 0.075; // midpoint for steering when out of band + private static final double AMP_MIN = 1.10; // min grow amplification per window when util > U_HIGH + private static final double AMP_MAX = 2.00; // max grow amplification per window when util > U_HIGH + private static final double HEALTHY_SHRINK = 0.05; // shrink 5% per window when util < U_LOW + + private static final long EVAL_PERIOD_NANOS = TimeUnit.MILLISECONDS.toNanos(200); + private static final long SHRINK_COOLDOWN_NANOS = TimeUnit.MILLISECONDS.toNanos(300); + private static final long ERROR_FREEZE_NANOS = TimeUnit.MILLISECONDS.toNanos(300); + + private static final double BASE_FLOOR_ALPHA = 0.01; // only move floor upward + + private final int recvMin; // quota floor + private final int recvMax; // quota ceiling private final double emaAlpha; - private final double gain; - private final double slowAckFactor; - private final double minBandwidthPerNano; - private final int maxWindowStep; - - private double bandwidthEstimate; - private long minRttNanos; - private int windowEstimate; - private long lastAckTimeNanos; - private boolean slowAckPenalized; - - AdaptiveReceiveQuota(int receiveMaximum, - long rttFloorNanos, - double emaAlpha, - double gain, - double slowAckFactor, - double minBandwidthPerSecond, - int maxWindowStep) { - this.receiveMaximum = Math.max(1, receiveMaximum); - this.rttFloorNanos = Math.max(1L, rttFloorNanos); - this.emaAlpha = Math.min(1D, Math.max(0D, emaAlpha)); - this.gain = Math.max(0D, gain); - this.slowAckFactor = Math.max(1D, slowAckFactor); - this.maxWindowStep = maxWindowStep; - this.minBandwidthPerNano = Math.max(0D, minBandwidthPerSecond) / TimeUnit.SECONDS.toNanos(1); - this.bandwidthEstimate = Math.max(this.minBandwidthPerNano, 1D / this.rttFloorNanos); - long baseRtt = baselineRtt(); - this.windowEstimate = clampWindow((int) Math.ceil(this.bandwidthEstimate * baseRtt * this.gain), - this.receiveMaximum); + + private double fastLatencyEWMA = 0; + private double slowLatencyEWMA = 0; + private double baseFloorNanos = 0; + + private int quotaEstimate; + private int lastInflight = 0; + + private long lastEvalAtNanos = 0; + private long growthFreezeUntilNanos = 0; + + /** + * Construct a quota controller bounded by receiveMaximum and configured EWMA alpha. + * + * @param receiveMin lower bound for available quota + * @param receiveMax upper bound for available quota + * @param emaAlpha smoothing factor in [0, 1] for the fast EWMA + */ + AdaptiveReceiveQuota(int receiveMin, int receiveMax, double emaAlpha) { + assert receiveMin > 0 && receiveMin <= receiveMax; + this.recvMin = receiveMin; + this.recvMax = receiveMax; + this.emaAlpha = Math.min(1, Math.max(0, emaAlpha)); + this.quotaEstimate = recvMin; } - void onPacketAcked(long ackTimeNanos, long lastSendTimeNanos) { - if (ackTimeNanos <= 0) { - return; - } - long rtt = rttFloorNanos; - if (lastSendTimeNanos > 0 && ackTimeNanos > lastSendTimeNanos) { - rtt = ackTimeNanos - lastSendTimeNanos; + /** + * Observe ACK to update latency statistics and utilization, then trigger periodic evaluation. + * + * @param ackTimeNanos time when ACK is received + * @param lastSendTimeNanos time when the acked packet was sent + * @param inflightCountAtAck in-flight count observed at ACK + */ + void onPacketAcked(long ackTimeNanos, long lastSendTimeNanos, int inflightCountAtAck) { + long rtt = ackTimeNanos - lastSendTimeNanos; + if (rtt > 0) { + // fast EWMA + fastLatencyEWMA = ewma(fastLatencyEWMA, rtt, emaAlpha); + // slow EWMA: up fast, down slow + if (slowLatencyEWMA == 0) { + slowLatencyEWMA = rtt; + } else { + double alphaSlowUp = Math.max(0.02, emaAlpha * 0.5); + double alphaSlowDown = Math.max(0.005, emaAlpha * 0.1); + double a = (fastLatencyEWMA > slowLatencyEWMA) ? alphaSlowUp : alphaSlowDown; + slowLatencyEWMA = ewma(slowLatencyEWMA, rtt, a); + } + // baseline floor moves up only + baseFloorNanos = baseFloorNanos == 0 ? rtt : decayFloor(baseFloorNanos, slowLatencyEWMA); } - if (minRttNanos == 0 || rtt < minRttNanos) { - minRttNanos = rtt; - } - long ackInterval; - if (lastAckTimeNanos > 0 && ackTimeNanos > lastAckTimeNanos) { - ackInterval = ackTimeNanos - lastAckTimeNanos; - } else { - ackInterval = rtt; + lastInflight = Math.max(0, inflightCountAtAck); + maybeEvaluate(ackTimeNanos); + } + + /** + * React to error by shrinking quota and freezing growth briefly. + * Consecutive shrinks are limited by cooldown. + * + * @param nowNanos current time + */ + void onErrorSignal(long nowNanos) { + if (nowNanos - lastEvalAtNanos < SHRINK_COOLDOWN_NANOS) { + return; } - double sampleBandwidth = 1D / (double) ackInterval; - if (bandwidthEstimate <= 0) { - bandwidthEstimate = sampleBandwidth; - } else { - bandwidthEstimate = (1 - emaAlpha) * bandwidthEstimate + emaAlpha * sampleBandwidth; + quotaEstimate = clampWindow((int) Math.ceil(quotaEstimate * (1D - Math.min(1D, SHRINK_RATIO)))); + growthFreezeUntilNanos = nowNanos + ERROR_FREEZE_NANOS; + lastEvalAtNanos = nowNanos; + } + + /** + * Get current available quota clamped within bounds. + * + * @return non-negative available quota + */ + int availableQuota() { + return clampWindow(quotaEstimate); + } + + private void maybeEvaluate(long nowNanos) { + if (lastEvalAtNanos == 0) { + lastEvalAtNanos = nowNanos; + return; } - if (bandwidthEstimate < minBandwidthPerNano) { - bandwidthEstimate = minBandwidthPerNano; + while (nowNanos - lastEvalAtNanos >= EVAL_PERIOD_NANOS) { + long evalAt = lastEvalAtNanos + EVAL_PERIOD_NANOS; + evaluateOnce(evalAt); + lastEvalAtNanos = evalAt; } - lastAckTimeNanos = ackTimeNanos; - slowAckPenalized = false; - updateWindow(); } - int availableQuota(long nowNanos, int inFlightCount) { - if (windowEstimate > receiveMaximum) { - windowEstimate = receiveMaximum; - } - long baselineRtt = baselineRtt(); - long slowAckTimeout = (long) Math.max(baselineRtt * slowAckFactor, baselineRtt); - if (!slowAckPenalized - && lastAckTimeNanos > 0 - && nowNanos > lastAckTimeNanos - && nowNanos - lastAckTimeNanos > slowAckTimeout) { - windowEstimate = Math.max(1, windowEstimate / 2); - bandwidthEstimate = Math.max(minBandwidthPerNano, bandwidthEstimate / 2); - slowAckPenalized = true; + private void evaluateOnce(long evalAtNanos) { + if (evalAtNanos < growthFreezeUntilNanos) { + // in freeze window, skip adjustment + return; } - if (windowEstimate > receiveMaximum) { - windowEstimate = receiveMaximum; + // compute ratio r = fast / base + double baseCandidate = slowLatencyEWMA > 0 ? slowLatencyEWMA : (double) TimeUnit.MILLISECONDS.toNanos(1); + double base = Math.max(baseCandidate, baseFloorNanos); + if (base <= 0 || fastLatencyEWMA <= 0) { + return; } - if (windowEstimate < 1) { - windowEstimate = 1; + double r = fastLatencyEWMA / base; + if (r >= 1 + EPS_HIGH) { + // multiplicative decrease with cooldown + quotaEstimate = clampWindow((int) Math.ceil(quotaEstimate * (1 - SHRINK_RATIO))); + growthFreezeUntilNanos = evalAtNanos + SHRINK_COOLDOWN_NANOS; + } else if (r <= 1 + EPS_LOW) { + // healthy window: steer quota to keep utilization within [U_LOW, U_HIGH] + int w = Math.max(recvMin, quotaEstimate); + double util = (double) lastInflight / (double) w; + if (util > U_HIGH) { + double factor = Math.max(AMP_MIN, Math.min(AMP_MAX, util / U_TARGET)); + int newW = clampWindow((int) Math.ceil(w * factor)); + if (newW > w) { + quotaEstimate = newW; + } + } else if (util < U_LOW) { + int newW = clampWindow((int) Math.ceil(w * (1 - HEALTHY_SHRINK))); + if (newW < w) { + quotaEstimate = newW; + } + } } - return Math.max(0, windowEstimate - Math.max(0, inFlightCount)); + // keep } - int window() { - return windowEstimate; + private int clampWindow(int value) { + return Math.min(recvMax, Math.max(recvMin, value)); } - private void updateWindow() { - long baselineRtt = baselineRtt(); - double targetWindow = bandwidthEstimate * baselineRtt * gain; - int desired = clampWindow((int) Math.ceil(targetWindow), receiveMaximum); - if (desired > windowEstimate) { - windowEstimate = Math.min(desired, windowEstimate + maxWindowStep); - } else if (desired < windowEstimate) { - windowEstimate = Math.max(desired, windowEstimate - maxWindowStep); - } + private double ewma(double current, double sample, double alpha) { + return current == 0 ? sample : (1 - alpha) * current + alpha * sample; } - private int clampWindow(int value, int receiveMaximum) { - int max = Math.max(1, receiveMaximum); - if (value < 1) { - return 1; + private double decayFloor(double floor, double target) { + if (target > floor) { + return floor + BASE_FLOOR_ALPHA * (target - floor); } - return Math.min(max, value); - } - - private long baselineRtt() { - return minRttNanos > 0 ? minRttNanos : rttFloorNanos; + return floor; } } diff --git a/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/IMQTTProtocolHelper.java b/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/IMQTTProtocolHelper.java index d606901a5..544277fa7 100644 --- a/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/IMQTTProtocolHelper.java +++ b/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/IMQTTProtocolHelper.java @@ -50,7 +50,7 @@ public interface IMQTTProtocolHelper { Optional sessionExpiryIntervalOnDisconnect(MqttMessage disconnectMessage); - ProtocolResponse onDisconnect(); + ProtocolResponse onServerShuttingDown(); ProtocolResponse onResourceExhaustedDisconnect(TenantResourceType resourceType); diff --git a/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/MQTTMessageHandler.java b/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/MQTTMessageHandler.java index 8c3508361..6f9cc7ed0 100644 --- a/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/MQTTMessageHandler.java +++ b/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/MQTTMessageHandler.java @@ -31,11 +31,10 @@ public abstract class MQTTMessageHandler extends ChannelDuplexHandler { private static final int DEFAULT_FLUSH_AFTER_FLUSHES = 128; private final int explicitFlushAfterFlushes; private final Runnable flushTask; + protected ChannelHandlerContext ctx; private int flushPendingCount; private Future nextScheduledFlush; - protected ChannelHandlerContext ctx; - protected MQTTMessageHandler() { this(DEFAULT_FLUSH_AFTER_FLUSHES); } @@ -88,6 +87,10 @@ public void channelWritabilityChanged(ChannelHandlerContext ctx) { ctx.fireChannelWritabilityChanged(); } + protected ChannelFuture write(Object msg) { + return ctx.write(msg); + } + protected ChannelFuture writeAndFlush(Object msg) { ChannelFuture future = ctx.write(msg); flush(false); diff --git a/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/MQTTPersistentSessionHandler.java b/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/MQTTPersistentSessionHandler.java index acec89935..a5d0e3b61 100644 --- a/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/MQTTPersistentSessionHandler.java +++ b/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/MQTTPersistentSessionHandler.java @@ -90,7 +90,7 @@ public abstract class MQTTPersistentSessionHandler extends MQTTSessionHandler implements IMQTTPersistentSession { private final int sessionExpirySeconds; private final InboxVersion inboxVersion; - private final NavigableMap stagingBuffer = new TreeMap<>(); + private final NavigableMap stagingBuffer = new TreeMap<>(); private final IInboxClient inboxClient; private final Cache> qoS0TimestampsByMQTTPublisher = Caffeine.newBuilder() .expireAfterAccess(2 * DataPlaneMaxBurstLatencyMillis.INSTANCE.get(), TimeUnit.MILLISECONDS) @@ -125,6 +125,13 @@ protected MQTTPersistentSessionHandler(TenantSettings settings, this.sessionExpirySeconds = sessionExpirySeconds; } + @Override + protected void doOnServerShuttingDown() { + if (state == State.ATTACHED) { + state = State.SERVER_SHUTTING_DOWN; + } + } + @Override public void handlerAdded(ChannelHandlerContext ctx) { super.handlerAdded(ctx); @@ -143,8 +150,7 @@ public void handlerAdded(ChannelHandlerContext ctx) { } @Override - public void channelInactive(ChannelHandlerContext ctx) { - super.channelInactive(ctx); + public void doTearDown(ChannelHandlerContext ctx) { if (hintTimeout != null && !hintTimeout.isCancelled()) { hintTimeout.cancel(false); } @@ -152,12 +158,12 @@ public void channelInactive(ChannelHandlerContext ctx) { inboxReader.close(); } int remainInboxSize = - stagingBuffer.values().stream().reduce(0, (acc, msg) -> acc + msg.estBytes(), Integer::sum); + stagingBuffer.values().stream().reduce(0, (acc, msg) -> acc + msg.message.estBytes(), Integer::sum); if (remainInboxSize > 0) { memUsage.addAndGet(-remainInboxSize); } - if (state == State.ATTACHED) { - detach(DetachRequest.newBuilder() + switch (state) { + case ATTACHED -> detach(DetachRequest.newBuilder() .setReqId(System.nanoTime()) .setInboxId(userSessionId) .setVersion(inboxVersion) @@ -166,12 +172,26 @@ public void channelInactive(ChannelHandlerContext ctx) { .setClient(clientInfo) .setNow(HLC.INST.getPhysical()) .build()); + case SERVER_SHUTTING_DOWN -> detach(DetachRequest.newBuilder() + .setReqId(System.nanoTime()) + .setInboxId(userSessionId) + .setVersion(inboxVersion) + .setExpirySeconds(sessionExpirySeconds) + .setDiscardLWT(true) + .setClient(clientInfo) + .setNow(HLC.INST.getPhysical()) + .build()); + default -> { + // do not detach on other cases + } } - ctx.fireChannelInactive(); } @Override protected final ProtocolResponse handleDisconnect(MqttMessage message) { + if (state == State.SERVER_SHUTTING_DOWN) { + return ProtocolResponse.responseNothing(); + } int requestSEI = helper().sessionExpiryIntervalOnDisconnect(message).orElse(sessionExpirySeconds); int finalSEI = Integer.compareUnsigned(requestSEI, settings.maxSEI) < 0 ? requestSEI : settings.maxSEI; if (helper().isNormalDisconnect(message)) { @@ -214,7 +234,7 @@ protected final ProtocolResponse handleDisconnect(MqttMessage message) { } private void detach(DetachRequest request) { - if (state == State.DETACH) { + if (state != State.ATTACHED && state != State.SERVER_SHUTTING_DOWN) { return; } state = State.DETACH; @@ -431,15 +451,19 @@ private void confirmQoS0() { @Override protected final void onConfirm(long seq) { - RoutedMessage confirmed = stagingBuffer.remove(seq); - if (confirmed != null) { - // for multiple topic filters matched message, confirm to upstream when at lease one is confirmed by client + NavigableMap confirmedMsgs = stagingBuffer.headMap(seq, true); + for (StagingMessage stagingMessage : confirmedMsgs.values()) { + RoutedMessage confirmed = stagingMessage.message; memUsage.addAndGet(-confirmed.estBytes()); - if (inboxConfirmedUpToSeq < confirmed.inboxPos()) { + if (stagingMessage.batchEnd() && inboxConfirmedUpToSeq < confirmed.inboxPos()) { + // for multiple topic filters matched message, confirm to upstream when at lease one is confirmed by client inboxConfirmedUpToSeq = confirmed.inboxPos(); confirmSendBuffer(); } } + confirmedMsgs.clear(); + currentHint = clientReceiveQuota(); + inboxReader.hint(currentHint); ctx.executor().execute(this::drainStaging); } @@ -452,7 +476,7 @@ private void confirmSendBuffer() { } inboxConfirming = true; long upToSeq = inboxConfirmedUpToSeq; - addBgTask(inboxClient.commit(CommitRequest.newBuilder() + addFgTask(inboxClient.commit(CommitRequest.newBuilder() .setReqId(HLC.INST.get()) .setTenantId(clientInfo.getTenantId()) .setInboxId(userSessionId) @@ -507,21 +531,22 @@ private void consume(Fetched fetched) { // deal with qos0 if (fetched.getQos0MsgCount() > 0) { fetched.getQos0MsgList().forEach(this::pubQoS0Message); + flush(true); // commit immediately qos0ConfirmUpToSeq = fetched.getQos0Msg(fetched.getQos0MsgCount() - 1).getSeq(); confirmQoS0(); } // deal with buffered message if (fetched.getSendBufferMsgCount() > 0) { - fetched.getSendBufferMsgList().forEach(this::pubBufferedMessage); + for (int i = 0; i < fetched.getSendBufferMsgCount(); i++) { + InboxMessage inboxMessage = fetched.getSendBufferMsg(i); + this.pubBufferedMessage(inboxMessage, i + 1 == fetched.getSendBufferMsgCount()); + } } } case BACK_PRESSURE_REJECTED -> { - if (stagingBuffer.isEmpty()) { - currentHint = clientReceiveQuota(); - scheduleHintTimeout(); - inboxReader.hint(currentHint); - } + currentHint = clientReceiveQuota(); + scheduleHintTimeout(); } case TRY_LATER -> { currentHint = clientReceiveQuota(); @@ -558,7 +583,7 @@ private void pubQoS0Message(String topicFilter, TopicFilterOption option, TopicM }); } - private void pubBufferedMessage(InboxMessage inboxMsg) { + private void pubBufferedMessage(InboxMessage inboxMsg, boolean batchEnd) { boolean isDup = isDuplicateMessage(inboxMsg.getMsg().getPublisher(), inboxMsg.getMsg().getMessage(), qoS12TimestampsByMQTTPublisher); int i = 0; @@ -566,12 +591,17 @@ private void pubBufferedMessage(InboxMessage inboxMsg) { String topicFilter = entry.getKey(); TopicFilterOption option = entry.getValue(); long seq = inboxMsg.getSeq(); - pubBufferedMessage(topicFilter, option, seq + i++, seq, inboxMsg.getMsg(), isDup); + pubBufferedMessage(topicFilter, option, seq + i++, seq, inboxMsg.getMsg(), isDup, batchEnd); } } - private void pubBufferedMessage(String topicFilter, TopicFilterOption option, long seq, long inboxSeq, - TopicMessage topicMsg, boolean isDup) { + private void pubBufferedMessage(String topicFilter, + TopicFilterOption option, + long seq, + long inboxSeq, + TopicMessage topicMsg, + boolean isDup, + boolean batchEnd) { if (seq < nextSendSeq) { // do not buffer message that has been sent return; @@ -586,7 +616,7 @@ private void pubBufferedMessage(String topicFilter, TopicFilterOption option, lo checkResult.hasGranted(), isDup, inboxSeq); tenantMeter.timer(msg.qos() == AT_LEAST_ONCE ? MqttQoS1InternalLatency : MqttQoS2InternalLatency) .record(HLC.INST.getPhysical(now - message.getTimestamp()), TimeUnit.MILLISECONDS); - RoutedMessage prev = stagingBuffer.put(seq, msg); + StagingMessage prev = stagingBuffer.put(seq, new StagingMessage(msg, batchEnd)); if (prev == null) { memUsage.addAndGet(msg.estBytes()); } @@ -599,15 +629,15 @@ private void pubBufferedMessage(String topicFilter, TopicFilterOption option, lo } private void drainStaging() { - SortedMap toBeSent = stagingBuffer.tailMap(nextSendSeq); + SortedMap toBeSent = stagingBuffer.tailMap(nextSendSeq); if (toBeSent.isEmpty()) { return; } - Iterator> itr = toBeSent.entrySet().iterator(); + Iterator> itr = toBeSent.entrySet().iterator(); while (clientReceiveQuota() > 0 && itr.hasNext()) { - Map.Entry entry = itr.next(); + Map.Entry entry = itr.next(); long seq = entry.getKey(); - sendConfirmableSubMessage(seq, entry.getValue()); + sendConfirmableSubMessage(seq, entry.getValue().message); nextSendSeq = seq + 1; } flush(true); @@ -617,6 +647,11 @@ private enum State { INIT, ATTACHED, DETACH, + SERVER_SHUTTING_DOWN, TERMINATE } + + private record StagingMessage(RoutedMessage message, boolean batchEnd) { + + } } diff --git a/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/MQTTSessionHandler.java b/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/MQTTSessionHandler.java index 82e9027c4..8f150b437 100644 --- a/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/MQTTSessionHandler.java +++ b/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/MQTTSessionHandler.java @@ -19,9 +19,10 @@ package org.apache.bifromq.mqtt.handler; -import static java.lang.Math.round; import static java.util.concurrent.CompletableFuture.allOf; +import static org.apache.bifromq.metrics.TenantMetric.MqttConfirmingMessages; import static org.apache.bifromq.metrics.TenantMetric.MqttConnectCount; +import static org.apache.bifromq.metrics.TenantMetric.MqttDeDupBytes; import static org.apache.bifromq.metrics.TenantMetric.MqttDisconnectCount; import static org.apache.bifromq.metrics.TenantMetric.MqttIngressBytes; import static org.apache.bifromq.metrics.TenantMetric.MqttQoS0DistBytes; @@ -34,6 +35,8 @@ import static org.apache.bifromq.metrics.TenantMetric.MqttQoS2DistBytes; import static org.apache.bifromq.metrics.TenantMetric.MqttQoS2ExternalLatency; import static org.apache.bifromq.metrics.TenantMetric.MqttQoS2IngressBytes; +import static org.apache.bifromq.metrics.TenantMetric.MqttResendBytes; +import static org.apache.bifromq.metrics.TenantMetric.MqttSendingQuota; import static org.apache.bifromq.mqtt.handler.IMQTTProtocolHelper.SubResult.EXCEED_LIMIT; import static org.apache.bifromq.mqtt.handler.MQTTSessionIdUtil.userSessionId; import static org.apache.bifromq.mqtt.handler.v5.MQTT5MessageUtils.messageExpiryInterval; @@ -163,10 +166,7 @@ @Slf4j public abstract class MQTTSessionHandler extends MQTTMessageHandler implements IMQTTSession { protected static final boolean SANITY_CHECK = SanityCheckMqttUtf8String.INSTANCE.get(); - private static final long ACK_FLOOR_NANOS = TimeUnit.MICROSECONDS.toNanos(200); private static final double EMA_APLHA = 0.15; - private static final double GAIN = 1.75; - private static final double SLOW_ACK_FACTOR = 4; private static final int REDIRECT_CHECK_INTERVAL_SECONDS = ClientRedirectCheckIntervalSeconds.INSTANCE.get(); protected final TenantSettings settings; protected final String userSessionId; @@ -189,6 +189,7 @@ public abstract class MQTTSessionHandler extends MQTTMessageHandler implements I private final IMQTTMessageSizer sizer; private final LinkedHashMap unconfirmedPacketIds = new LinkedHashMap<>(); private final CompletableFuture onInitialized = new CompletableFuture<>(); + private final CompletableFuture tearDownSignal = new CompletableFuture<>(); private AdaptiveReceiveQuota receiveQuota; private LWT noDelayLWT; private boolean isGoAway; @@ -241,9 +242,19 @@ public final ClientInfo clientInfo() { } @Override - public final CompletableFuture disconnect() { - ctx.executor().execute(() -> handleProtocolResponse(helper().onDisconnect())); - return bgTasks.whenComplete((v, e) -> log.trace("All bg tasks finished: client={}", clientInfo)); + public final CompletableFuture onServerShuttingDown() { + ctx.executor().execute(() -> { + doOnServerShuttingDown(); + if (settings.noLWTWhenServerShuttingDown) { + discardLWT(); + } + handleProtocolResponse(helper().onServerShuttingDown()); + }); + return tearDownSignal; + } + + protected void doOnServerShuttingDown() { + } @Override @@ -358,13 +369,7 @@ public void handlerAdded(ChannelHandlerContext ctx) { ChannelAttrs.trafficShaper(ctx).setWriteLimit(settings.outboundBandwidth); ChannelAttrs.trafficShaper(ctx).setMaxWriteSize(settings.outboundBandwidth); ChannelAttrs.setMaxPayload(settings.maxPacketSize, ctx); - receiveQuota = new AdaptiveReceiveQuota(clientReceiveMaximum(), - ACK_FLOOR_NANOS, - EMA_APLHA, - GAIN, - SLOW_ACK_FACTOR, - settings.minSendPerSec, - clamp((int) round(clientReceiveMaximum() * 0.1), 4, 32)); + receiveQuota = new AdaptiveReceiveQuota(settings.minSendPerSec, clientReceiveMaximum(), EMA_APLHA); sessionCtx.localSessionRegistry.add(channelId(), this); sessionRegistration = ChannelAttrs.mqttSessionContext(ctx).sessionDictClient .reg(clientInfo, (killer, redirection) -> { @@ -385,15 +390,8 @@ public void handlerAdded(ChannelHandlerContext ctx) { onInitialized.whenComplete((v, e) -> tenantMeter.recordCount(MqttConnectCount)); } - private int clamp(int val, int min, int max) { - if (val < min) { - return min; - } - return Math.min(val, max); - } - @Override - public void channelInactive(ChannelHandlerContext ctx) { + public final void channelInactive(ChannelHandlerContext ctx) { if (idleTimeoutTask != null) { idleTimeoutTask.cancel(true); } @@ -407,6 +405,7 @@ public void channelInactive(ChannelHandlerContext ctx) { addBgTask(pubWillMessage(noDelayLWT)); } Sets.newHashSet(fgTasks).forEach(t -> t.cancel(true)); + doTearDown(ctx); sessionCtx.localSessionRegistry.remove(channelId(), this); sessionRegistration.stop(); tenantMeter.recordCount(MqttDisconnectCount); @@ -414,8 +413,15 @@ public void channelInactive(ChannelHandlerContext ctx) { isGoAway = true; eventCollector.report(getLocal(ByClient.class).withoutDisconnect(true).clientInfo(clientInfo)); } + bgTasks.whenComplete((v, e) -> { + log.trace("All bg tasks finished: client={}", clientInfo); + tearDownSignal.complete(null); + }); + ctx.fireChannelInactive(); } + protected abstract void doTearDown(ChannelHandlerContext ctx); + @Override public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) { super.exceptionCaught(ctx, cause); @@ -832,7 +838,10 @@ private RoutedMessage getConfirming(int packetId) { protected final int clientReceiveQuota() { assert receiveQuota != null; - return receiveQuota.availableQuota(sessionCtx.nanoTime(), unconfirmedPacketIds.size()); + int quota = receiveQuota.availableQuota(); + tenantMeter.recordSummary(MqttSendingQuota, quota); + tenantMeter.recordSummary(MqttConfirmingMessages, unconfirmedPacketIds.size()); + return Math.max(0, quota - unconfirmedPacketIds.size()); } private RoutedMessage confirm(int packetId, boolean delivered) { @@ -858,12 +867,17 @@ private void confirm(ConfirmingMessage confirmingMsg, boolean delivered) { packetIdItr.remove(); confirmingMsg = head; long lastSentTimestamp = head.resendTimestamp > 0 ? head.resendTimestamp : head.timestamp; - receiveQuota.onPacketAcked(now, lastSentTimestamp); RoutedMessage confirmed = confirmingMsg.message; switch (confirmed.qos()) { case AT_LEAST_ONCE -> { - tenantMeter.timer(MqttQoS1ExternalLatency) - .record(now - confirmingMsg.timestamp, TimeUnit.NANOSECONDS); + // record external latency only when the message was actually sent + if (delivered && lastSentTimestamp > 0) { + // use inflight size before this ACK removal for proper AIMD increase + int inflightAtAck = unconfirmedPacketIds.size() + 1; + receiveQuota.onPacketAcked(now, lastSentTimestamp, inflightAtAck); + tenantMeter.timer(MqttQoS1ExternalLatency) + .record(now - lastSentTimestamp, TimeUnit.NANOSECONDS); + } if (settings.debugMode) { eventCollector.report(getLocal(QoS1Confirmed.class) .reqId(confirmed.message().getMessageId()) @@ -878,8 +892,13 @@ private void confirm(ConfirmingMessage confirmingMsg, boolean delivered) { } } case EXACTLY_ONCE -> { - tenantMeter.timer(MqttQoS2ExternalLatency) - .record(now - confirmingMsg.timestamp, TimeUnit.NANOSECONDS); + // record external latency only when the message was actually sent + if (delivered && lastSentTimestamp > 0) { + int inflightAtAck = unconfirmedPacketIds.size() + 1; + receiveQuota.onPacketAcked(now, lastSentTimestamp, inflightAtAck); + tenantMeter.timer(MqttQoS2ExternalLatency) + .record(now - lastSentTimestamp, TimeUnit.NANOSECONDS); + } if (!delivered && settings.debugMode) { eventCollector.report(getLocal(QoS2Confirmed.class) .reqId(confirmed.message().getMessageId()) @@ -932,6 +951,7 @@ protected final void sendQoS0SubMessage(RoutedMessage msg) { return; } if (msg.isDup()) { + tenantMeter.recordSummary(MqttDeDupBytes, msgSize); eventCollector.report(getLocal(QoS0Dropped.class) .reason(DropReason.Duplicated) .isRetain(msg.isRetain()) @@ -1006,7 +1026,7 @@ protected final void sendQoS0SubMessage(RoutedMessage msg) { return; } memUsage.addAndGet(msgSize); - writeAndFlush(pubMsg).addListener(f -> { + write(pubMsg).addListener(f -> { memUsage.addAndGet(-msgSize); if (f.isSuccess()) { lastActiveAtNanos = sessionCtx.nanoTime(); @@ -1063,6 +1083,7 @@ private void writeConfirmableSubMessage(ConfirmingMessage confirmingMsg, boolean return; } if (msg.isDup()) { + tenantMeter.recordSummary(MqttDeDupBytes, msgSize); reportDropConfirmableMsgEvent(msg, DropReason.Duplicated); ctx.executor().execute(() -> confirm(packetId, false)); return; @@ -1112,6 +1133,7 @@ private void writeConfirmableSubMessage(ConfirmingMessage confirmingMsg, boolean return; } if (!ctx.channel().isWritable()) { + receiveQuota.onErrorSignal(sessionCtx.nanoTime()); if (resendTask != null) { // will retry on next resend schedule resendTask.cancel(true); @@ -1123,9 +1145,10 @@ private void writeConfirmableSubMessage(ConfirmingMessage confirmingMsg, boolean confirmingMsg.timestamp = sessionCtx.nanoTime(); } else { confirmingMsg.resendTimestamp = sessionCtx.nanoTime(); + tenantMeter.recordSummary(MqttResendBytes, msgSize); } confirmingMsg.sentCount++; - writeAndFlush(pubMsg).addListener(f -> { + write(pubMsg).addListener(f -> { memUsage.addAndGet(-msgSize); if (f.isSuccess()) { if (settings.debugMode) { @@ -1156,6 +1179,7 @@ private void writeConfirmableSubMessage(ConfirmingMessage confirmingMsg, boolean } } } else { + receiveQuota.onErrorSignal(sessionCtx.nanoTime()); if (settings.debugMode) { String detail = f.cause() == null ? "unknown" : f.cause().getMessage(); switch (msg.qos()) { @@ -1218,27 +1242,35 @@ private void scheduleResend() { private void resend() { long now = sessionCtx.nanoTime(); + boolean flush = false; for (ConfirmingMessage confirmingMsg : unconfirmedPacketIds.values()) { if (confirmingMsg.sentCount <= settings.maxResendTimes) { if (ctx.channel().isWritable()) { if (confirmingMsg.sentCount == 0) { // first time send immediately writeConfirmableSubMessage(confirmingMsg, false); + flush = true; } else { long lastSendTs = Math.max(confirmingMsg.timestamp, confirmingMsg.resendTimestamp); if (Duration.ofNanos(now - lastSendTs).toSeconds() >= settings.resendTimeoutSeconds) { // only send after resend timeout writeConfirmableSubMessage(confirmingMsg, true); + flush = true; } } } else { + receiveQuota.onErrorSignal(now); break; } } else { reportDropConfirmableMsgEvent(confirmingMsg.message, DropReason.MaxRetried); confirm(confirmingMsg, false); + receiveQuota.onErrorSignal(now); } } + if (flush) { + flush(true); + } if (!unconfirmedPacketIds.isEmpty()) { scheduleResend(); } @@ -1566,7 +1598,7 @@ private CompletableFuture doPubLastWill(LWT willMessage) { .build(); long reqId = sessionCtx.nanoTime(); int size = message.getPayload().size() + willMessage.getTopic().length(); - return doPub(reqId, willMessage.getTopic(), message, true, true) + return doPub(reqId, willMessage.getTopic(), message, true) .handle((v, e) -> { assert ctx.executor().inEventLoop(); if (e != null) { @@ -1611,7 +1643,7 @@ private CompletableFuture doPub(long reqId, Message message, boolean isDup, int ingressMsgSize) { - return doPub(reqId, topic, message, false, false) + return doPub(reqId, topic, message, false) .thenApply(v -> { assert ctx.executor().inEventLoop(); switch (v) { @@ -1661,24 +1693,20 @@ private CompletableFuture doPub(long reqId, }); } - private CompletableFuture doPub(long reqId, - String topic, - Message message, - boolean isLWT, - boolean background) { + private CompletableFuture doPub(long reqId, String topic, Message message, boolean isLWT) { if (log.isTraceEnabled()) { log.trace("Disting msg: req={}, topic={}, qos={}, size={}", reqId, topic, message.getPubQoS(), message.getPayload().size()); } CompletableFuture distTask = - trackTask(sessionCtx.distClient.pub(reqId, topic, message, clientInfo), background); + trackTask(sessionCtx.distClient.pub(reqId, topic, message, clientInfo), isLWT); if (!message.getIsRetain()) { // Ensure continuation runs on the channel event loop return distTask.thenApplyAsync(v -> v, ctx.executor()); } else { CompletableFuture retainTask = - trackTask(retainMessage(reqId, topic, message, isLWT), background); + trackTask(retainMessage(reqId, topic, message, isLWT), isLWT); return allOf(retainTask, distTask).thenApplyAsync(v -> distTask.join(), ctx.executor()); } } diff --git a/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/MQTTTransientSessionHandler.java b/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/MQTTTransientSessionHandler.java index 2c16dc8c2..9362f074b 100644 --- a/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/MQTTTransientSessionHandler.java +++ b/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/MQTTTransientSessionHandler.java @@ -127,8 +127,7 @@ public void handlerAdded(ChannelHandlerContext ctx) { } @Override - public void channelInactive(ChannelHandlerContext ctx) { - super.channelInactive(ctx); + public void doTearDown(ChannelHandlerContext ctx) { if (!topicFilters.isEmpty()) { topicFilters.forEach((topicFilter, option) -> addBgTask(unsubTopicFilter(System.nanoTime(), topicFilter))); } @@ -158,7 +157,6 @@ public void channelInactive(ChannelHandlerContext ctx) { } // Transient session lifetime is bounded by the channel lifetime eventCollector.report(getLocal(MQTTSessionStop.class).sessionId(userSessionId).clientInfo(clientInfo)); - ctx.fireChannelInactive(); } @Override @@ -175,12 +173,12 @@ protected ProtocolResponse handleDisconnect(MqttMessage message) { @Override protected final void onConfirm(long seq) { - java.util.NavigableMap confirmed = inbox.headMap(seq, true); + NavigableMap confirmed = inbox.headMap(seq, true); for (RoutedMessage msg : confirmed.values()) { memUsage.addAndGet(-msg.estBytes()); } confirmed.clear(); - send(); + send(false); } @Override @@ -377,6 +375,7 @@ private void publish(String topic, List topicFilterAndPermissions) { AtomicInteger totalMsgBytesSize = new AtomicInteger(); long now = HLC.INST.get(); + boolean flush = false; for (Message message : messages) { // deduplicate messages based on topic and publisher for (TopicFilterAndPermission tfp : topicFilterAndPermissions) { @@ -386,6 +385,7 @@ private void publish(String topic, logInternalLatency(subMsg); if (subMsg.qos() == QoS.AT_MOST_ONCE) { sendQoS0SubMessage(subMsg); + flush = true; } else { if (inbox.size() < settings.inboxQueueLength) { inbox.put(msgSeqNo++, subMsg); @@ -419,12 +419,15 @@ private void publish(String topic, } } memUsage.addAndGet(totalMsgBytesSize.get()); - send(); + send(flush); } - private void send() { + private void send(boolean flushNeeded) { SortedMap toBeSent = inbox.tailMap(nextSendSeq); if (toBeSent.isEmpty()) { + if (flushNeeded) { + flush(true); + } return; } Iterator> itr = toBeSent.entrySet().iterator(); @@ -435,6 +438,7 @@ private void send() { sendConfirmableSubMessage(seq, msg); nextSendSeq = seq + 1; } + flush(true); } private void logInternalLatency(RoutedMessage message) { diff --git a/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/RoutedMessage.java b/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/RoutedMessage.java index 2328c9f9e..f5df6f112 100644 --- a/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/RoutedMessage.java +++ b/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/RoutedMessage.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.mqtt.handler; diff --git a/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/TenantSettings.java b/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/TenantSettings.java index a222d470d..3bc99835f 100644 --- a/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/TenantSettings.java +++ b/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/TenantSettings.java @@ -40,6 +40,7 @@ import static org.apache.bifromq.plugin.settingprovider.Setting.MinSendPerSec; import static org.apache.bifromq.plugin.settingprovider.Setting.MinSessionExpirySeconds; import static org.apache.bifromq.plugin.settingprovider.Setting.MsgPubPerSec; +import static org.apache.bifromq.plugin.settingprovider.Setting.NoLWTWhenServerShuttingDown; import static org.apache.bifromq.plugin.settingprovider.Setting.OutBoundBandWidth; import static org.apache.bifromq.plugin.settingprovider.Setting.PayloadFormatValidationEnabled; import static org.apache.bifromq.plugin.settingprovider.Setting.QoS0DropOldest; @@ -60,6 +61,7 @@ public class TenantSettings { public final boolean mqtt4Enabled; public final boolean mqtt5Enabled; public final boolean debugMode; + public final boolean noLWTWhenServerShuttingDown; public final boolean forceTransient; public final boolean payloadFormatValidationEnabled; public final boolean retainEnabled; @@ -79,7 +81,7 @@ public class TenantSettings { public final long inboundBandwidth; public final long outboundBandwidth; public final int receiveMaximum; - public final double minSendPerSec; + public final int minSendPerSec; public final int maxMsgPerSec; public final int maxResendTimes; public final int resendTimeoutSeconds; @@ -94,6 +96,7 @@ public TenantSettings(String tenantId, ISettingProvider provider) { mqtt4Enabled = provider.provide(MQTT4Enabled, tenantId); mqtt5Enabled = provider.provide(MQTT5Enabled, tenantId); debugMode = provider.provide(DebugModeEnabled, tenantId); + noLWTWhenServerShuttingDown = provider.provide(NoLWTWhenServerShuttingDown, tenantId); forceTransient = provider.provide(ForceTransient, tenantId); payloadFormatValidationEnabled = provider.provide(PayloadFormatValidationEnabled, tenantId); retainEnabled = provider.provide(RetainEnabled, tenantId); diff --git a/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/v3/MQTT3PersistentSessionHandler.java b/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/v3/MQTT3PersistentSessionHandler.java index 94cda3985..684248a64 100644 --- a/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/v3/MQTT3PersistentSessionHandler.java +++ b/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/v3/MQTT3PersistentSessionHandler.java @@ -69,8 +69,8 @@ public void handlerAdded(ChannelHandlerContext ctx) { } @Override - public void channelInactive(ChannelHandlerContext ctx) { - super.channelInactive(ctx); + public void doTearDown(ChannelHandlerContext ctx) { + super.doTearDown(ctx); memUsage.addAndGet(-estBaseMemSize()); } diff --git a/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/v3/MQTT3ProtocolHelper.java b/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/v3/MQTT3ProtocolHelper.java index 815859c07..f9f36d990 100644 --- a/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/v3/MQTT3ProtocolHelper.java +++ b/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/v3/MQTT3ProtocolHelper.java @@ -136,7 +136,7 @@ public boolean isDisconnectWithLWT(MqttMessage message) { } @Override - public ProtocolResponse onDisconnect() { + public ProtocolResponse onServerShuttingDown() { return goAwayNow((getLocal(ByServer.class).clientInfo(clientInfo))); } diff --git a/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/v3/MQTT3TransientSessionHandler.java b/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/v3/MQTT3TransientSessionHandler.java index bbcf1a891..c21c21e3d 100644 --- a/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/v3/MQTT3TransientSessionHandler.java +++ b/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/v3/MQTT3TransientSessionHandler.java @@ -57,8 +57,8 @@ public void handlerAdded(ChannelHandlerContext ctx) { } @Override - public void channelInactive(ChannelHandlerContext ctx) { - super.channelInactive(ctx); + public void doTearDown(ChannelHandlerContext ctx) { + super.doTearDown(ctx); memUsage.addAndGet(-estBaseMemSize()); } diff --git a/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/v5/MQTT5ConnectHandler.java b/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/v5/MQTT5ConnectHandler.java index f1adf627f..5d3934ba8 100644 --- a/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/v5/MQTT5ConnectHandler.java +++ b/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/v5/MQTT5ConnectHandler.java @@ -48,6 +48,7 @@ import static org.apache.bifromq.mqtt.handler.v5.MQTT5MessageUtils.isUTF8Payload; import static org.apache.bifromq.mqtt.handler.v5.MQTT5MessageUtils.maximumPacketSize; import static org.apache.bifromq.mqtt.handler.v5.MQTT5MessageUtils.mqttProps; +import static org.apache.bifromq.mqtt.handler.v5.MQTT5MessageUtils.receiveMaximum; import static org.apache.bifromq.mqtt.handler.v5.MQTT5MessageUtils.requestProblemInformation; import static org.apache.bifromq.mqtt.handler.v5.MQTT5MessageUtils.requestResponseInformation; import static org.apache.bifromq.mqtt.handler.v5.MQTT5MessageUtils.toUserProperties; @@ -217,6 +218,18 @@ && authData(connMsg.variableHeader().properties()).isPresent()) { getLocal(MalformedWillTopic.class).peerAddress(clientAddress)); } } + // Validate Receive Maximum property per MQTT v5.0 [3.1.2.11.3] + Optional receiveMaxProps = receiveMaximum(connMsg.variableHeader().properties()); + if (receiveMaxProps.isPresent() && receiveMaxProps.get() == 0) { + return new GoAway(MqttMessageBuilders + .connAck() + .returnCode(CONNECTION_REFUSED_PROTOCOL_ERROR) + .properties(MQTT5MessageBuilders.connAckProperties() + .reasonString("MQTT5-3.1.2.11.3") + .build()) + .build(), + getLocal(ProtocolError.class).statement("MQTT5-3.1.2.11.3")); + } return null; } diff --git a/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/v5/MQTT5MessageUtils.java b/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/v5/MQTT5MessageUtils.java index acdbad20b..5e9d9da96 100644 --- a/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/v5/MQTT5MessageUtils.java +++ b/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/v5/MQTT5MessageUtils.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.mqtt.handler.v5; diff --git a/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/v5/MQTT5PersistentSessionHandler.java b/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/v5/MQTT5PersistentSessionHandler.java index 34c77d33f..b050a725a 100644 --- a/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/v5/MQTT5PersistentSessionHandler.java +++ b/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/v5/MQTT5PersistentSessionHandler.java @@ -86,8 +86,8 @@ public void handlerAdded(ChannelHandlerContext ctx) { } @Override - public void channelInactive(ChannelHandlerContext ctx) { - super.channelInactive(ctx); + public void doTearDown(ChannelHandlerContext ctx) { + super.doTearDown(ctx); memUsage.addAndGet(-estBaseMemSize()); } diff --git a/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/v5/MQTT5ProtocolHelper.java b/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/v5/MQTT5ProtocolHelper.java index 71fcfdfde..ee68dad27 100644 --- a/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/v5/MQTT5ProtocolHelper.java +++ b/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/v5/MQTT5ProtocolHelper.java @@ -126,7 +126,8 @@ public MQTT5ProtocolHelper(MqttConnectMessage connMsg, this.senderTopicAliasManager = new SenderTopicAliasManager(topicAliasMaximum(connMsg.variableHeader().properties()).orElse(0), Duration.ofSeconds(60)); - this.clientReceiveMaximum = receiveMaximum(connMsg.variableHeader().properties()).orElse(65535); + this.clientReceiveMaximum = Math.max(settings.minSendPerSec, + receiveMaximum(connMsg.variableHeader().properties()).orElse(65535)); this.requestProblemInfo = requestProblemInformation(connMsg.variableHeader().properties()); } @@ -175,7 +176,7 @@ public Optional sessionExpiryIntervalOnDisconnect(MqttMessage disconnec } @Override - public ProtocolResponse onDisconnect() { + public ProtocolResponse onServerShuttingDown() { return farewellNow( MQTT5MessageBuilders.disconnect().reasonCode(MQTT5DisconnectReasonCode.ServerShuttingDown).build(), getLocal(ByServer.class).clientInfo(clientInfo)); diff --git a/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/v5/MQTT5TransientSessionHandler.java b/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/v5/MQTT5TransientSessionHandler.java index 375f73719..b0c473c62 100644 --- a/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/v5/MQTT5TransientSessionHandler.java +++ b/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/handler/v5/MQTT5TransientSessionHandler.java @@ -74,8 +74,8 @@ public void handlerAdded(ChannelHandlerContext ctx) { } @Override - public void channelInactive(ChannelHandlerContext ctx) { - super.channelInactive(ctx); + public void doTearDown(ChannelHandlerContext ctx) { + super.doTearDown(ctx); memUsage.addAndGet(-estBaseMemSize()); } diff --git a/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/service/LocalSessionRegistry.java b/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/service/LocalSessionRegistry.java index 1e73da34e..b81fcc4dd 100644 --- a/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/service/LocalSessionRegistry.java +++ b/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/service/LocalSessionRegistry.java @@ -14,12 +14,11 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.mqtt.service; -import org.apache.bifromq.mqtt.session.IMQTTSession; import com.google.common.util.concurrent.RateLimiter; import io.micrometer.core.instrument.Gauge; import io.micrometer.core.instrument.Metrics; @@ -28,6 +27,7 @@ import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; +import org.apache.bifromq.mqtt.session.IMQTTSession; public class LocalSessionRegistry implements ILocalSessionRegistry { private final ConcurrentMap sessionMap = new ConcurrentHashMap<>(); @@ -67,7 +67,7 @@ public CompletableFuture disconnectAll(int disconnectRate) { private CompletableFuture disconnect(String sessionId) { IMQTTSession session = sessionMap.remove(sessionId); if (session != null) { - return session.disconnect(); + return session.onServerShuttingDown(); } return CompletableFuture.completedFuture(null); } diff --git a/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/session/IMQTTSession.java b/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/session/IMQTTSession.java index b85b3ef4c..ddd9f438c 100644 --- a/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/session/IMQTTSession.java +++ b/bifromq-mqtt/bifromq-mqtt-server/src/main/java/org/apache/bifromq/mqtt/session/IMQTTSession.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.mqtt.session; @@ -34,5 +34,5 @@ public interface IMQTTSession { CompletableFuture unsubscribe(long reqId, String topicFilter); - CompletableFuture disconnect(); + CompletableFuture onServerShuttingDown(); } diff --git a/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/handler/AdaptiveReceiveQuotaTest.java b/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/handler/AdaptiveReceiveQuotaTest.java index 0ffb1daff..9e61c084d 100644 --- a/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/handler/AdaptiveReceiveQuotaTest.java +++ b/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/handler/AdaptiveReceiveQuotaTest.java @@ -19,82 +19,199 @@ package org.apache.bifromq.mqtt.handler; +import static java.util.concurrent.TimeUnit.MILLISECONDS; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertTrue; -import java.util.concurrent.TimeUnit; import org.testng.annotations.Test; public class AdaptiveReceiveQuotaTest { + private static final long MS = MILLISECONDS.toNanos(1); + private static final long EVAL = MILLISECONDS.toNanos(200); + private static final long FREEZE = MILLISECONDS.toNanos(300); + private static final double EMA_ALPHA = 0.15d; + + private void ack(AdaptiveReceiveQuota q, long now, long rtt, int inflight) { + q.onPacketAcked(now, now - rtt, inflight); + } + @Test - public void shouldStabilizeWindowWhenAckEqualsRtt() { - AdaptiveReceiveQuota quota = new AdaptiveReceiveQuota( - 100, - TimeUnit.MILLISECONDS.toNanos(5), - 0.5, - 1.25, - 4.0, - 20.0, - 100); - - long ackInterval = TimeUnit.MILLISECONDS.toNanos(5); - long now = 0; - for (int i = 0; i < 20; i++) { - long sendTime = now; - now += ackInterval; - quota.onPacketAcked(now, sendTime); + public void testInitialQuotaEqualsReceiveMin() { + AdaptiveReceiveQuota q = new AdaptiveReceiveQuota(8, 64, EMA_ALPHA); + assertEquals(q.availableQuota(), 8); + } + + @Test + public void testNoAdjustmentBeforeFirstEval() { + AdaptiveReceiveQuota q = new AdaptiveReceiveQuota(8, 64, EMA_ALPHA); + long t = 1_000_000_000L; + ack(q, t, MS, 0); + int w0 = q.availableQuota(); + // less than one eval period + t += EVAL / 2; + ack(q, t, MS, 0); + assertEquals(q.availableQuota(), w0); + } + + @Test + public void testHealthyHighUtilizationGrowsUpToReceiveMax() { + AdaptiveReceiveQuota q = new AdaptiveReceiveQuota(8, 64, EMA_ALPHA); + long t = 2_000_000_000L; + ack(q, t, MS, 0); + for (int i = 0; i < 10; i++) { + int w = q.availableQuota(); + t += EVAL; + ack(q, t, MS, w); + if (q.availableQuota() == 64) { + break; + } } - int expectedWindow = (int) Math.ceil(1.25); - assertEquals(quota.window(), expectedWindow); - assertTrue(quota.window() <= 100); + assertEquals(q.availableQuota(), 64); } @Test - public void shouldShrinkWindowWhenAckSlow() { - AdaptiveReceiveQuota quota = new AdaptiveReceiveQuota( - 100, - TimeUnit.MILLISECONDS.toNanos(5), - 0.5, - 1.25, - 4.0, - 20.0, - 100); - - long ackInterval = TimeUnit.MILLISECONDS.toNanos(5); - long now = 0; - for (int i = 0; i < 5; i++) { - long sendTime = now; - now += ackInterval; - quota.onPacketAcked(now, sendTime); + public void testHealthyLowUtilizationShrinkRespectsReceiveMin() { + AdaptiveReceiveQuota q = new AdaptiveReceiveQuota(8, 64, EMA_ALPHA); + long t = 3_000_000_000L; + ack(q, t, MS, 0); + while (q.availableQuota() < 32) { + int w = q.availableQuota(); + t += EVAL; + ack(q, t, MS, w); } - int beforeSlowAck = quota.window(); + int before = q.availableQuota(); + // low utilization triggers ~5% shrink (ceil) + t += EVAL; + ack(q, t, MS, 0); + int after = q.availableQuota(); + assertTrue(after < before); + assertTrue(after >= 8); + } - long slowTimeout = (long) (ackInterval * 4.0); - quota.availableQuota(now + slowTimeout + 1, 0); + @Test + public void testCongestionShrinkAndFreeze() { + AdaptiveReceiveQuota q = new AdaptiveReceiveQuota(8, 64, EMA_ALPHA); + long t = 4_000_000_000L; + ack(q, t, MS, 0); + t += EVAL; + ack(q, t, MS, q.availableQuota()); + int before = q.availableQuota(); + // trigger congestion with a higher RTT to exceed ratio threshold + t += EVAL; + ack(q, t, 5 * MS, before); // large RTT to push fast EWMA up + int shrunk = q.availableQuota(); + assertTrue(shrunk <= before); + // inside freeze: healthy high-utilization should not grow + t += EVAL; + ack(q, t, MS, shrunk); + assertEquals(q.availableQuota(), shrunk); + // after freeze: allow growth again + t += EVAL * 2; // ensure beyond freeze window + ack(q, t, MS, q.availableQuota()); + assertTrue(q.availableQuota() >= shrunk); + } - assertTrue(quota.window() < beforeSlowAck); + @Test + public void testErrorSignalCooldownAndFreeze() { + AdaptiveReceiveQuota q = new AdaptiveReceiveQuota(8, 64, EMA_ALPHA); + long t = 5_000_000_000L; + // ensure lastEvalAt large enough for cooldown check not to early-return + q.onErrorSignal(t); + int w1 = q.availableQuota(); + // within cooldown: no further shrink + q.onErrorSignal(t + FREEZE / 2); + int w2 = q.availableQuota(); + assertEquals(w2, w1); + // after cooldown: another shrink + q.onErrorSignal(t + FREEZE + MILLISECONDS.toNanos(50)); + int w3 = q.availableQuota(); + assertTrue(w3 <= w2); } @Test - public void shouldRespectReceiveMaximumAndInFlight() { - AdaptiveReceiveQuota quota = new AdaptiveReceiveQuota( - 10, - TimeUnit.MILLISECONDS.toNanos(5), - 0.5, - 1.25, - 4.0, - 50.0, - 10); - - long ackInterval = TimeUnit.MILLISECONDS.toNanos(5); - long now = 0; - for (int i = 0; i < 20; i++) { - long sendTime = now; - now += ackInterval; - quota.onPacketAcked(now, sendTime); - } + public void testCatchUpAcrossMultipleEvalPeriods() { + AdaptiveReceiveQuota q = new AdaptiveReceiveQuota(8, 64, EMA_ALPHA); + long t = 6_000_000_000L; + ack(q, t, MS, 0); + // jump forward 3 periods with high utilization + t += EVAL * 3; + ack(q, t, MS, q.availableQuota()); + assertTrue(q.availableQuota() >= 16); + } + + @Test + public void testNegativeRTTSampleIgnored() { + AdaptiveReceiveQuota q = new AdaptiveReceiveQuota(8, 64, EMA_ALPHA); + long t = 7_000_000_000L; + ack(q, t, MS, 0); + int before = q.availableQuota(); + // negative RTT sample + long rtt = -MS; + t += EVAL; + q.onPacketAcked(t, t - rtt, 0); + assertEquals(q.availableQuota(), before); + } + + @Test + public void testReceiveMinEqualsReceiveMaxProducesFixedWindow() { + AdaptiveReceiveQuota q = new AdaptiveReceiveQuota(16, 16, EMA_ALPHA); + long t = 8_000_000_000L; + ack(q, t, MS, 0); + assertEquals(q.availableQuota(), 16); + // attempt to grow + t += EVAL; + ack(q, t, MS, 16); + assertEquals(q.availableQuota(), 16); + // attempt to shrink by congestion + t += EVAL; + ack(q, t, 5 * MS, 16); + assertEquals(q.availableQuota(), 16); + } + + @Test + public void testReceiveMinEqualsReceiveMaxWithErrorSignalAndFreeze() { + AdaptiveReceiveQuota q = new AdaptiveReceiveQuota(16, 16, EMA_ALPHA); + long t = 8_500_000_000L; + // initialize clock + ack(q, t, MS, 0); + assertEquals(q.availableQuota(), 16); + // trigger error shrink after cooldown; still clamped at 16 + long tErr = t + FREEZE + MILLISECONDS.toNanos(1); + q.onErrorSignal(tErr); + assertEquals(q.availableQuota(), 16); + // within cooldown: no further shrink (still clamped) + q.onErrorSignal(tErr + FREEZE / 2); + assertEquals(q.availableQuota(), 16); + // after cooldown: another error signal; remains clamped + q.onErrorSignal(tErr + FREEZE + MILLISECONDS.toNanos(1)); + assertEquals(q.availableQuota(), 16); + // healthy high utilization during freeze should also not grow + long tAfter = tErr + EVAL; + ack(q, tAfter, MS, 16); + assertEquals(q.availableQuota(), 16); + } - assertTrue(quota.window() <= 10); - assertEquals(quota.availableQuota(now, 10), 0); + @Test + public void testUtilizationBandSteeringBoundaries() { + AdaptiveReceiveQuota q = new AdaptiveReceiveQuota(20, 100, EMA_ALPHA); + long t = 9_000_000_000L; + ack(q, t, MS, 0); + assertEquals(q.availableQuota(), 20); + // high utilization -> grow + t += EVAL; + ack(q, t, MS, 20); + int w1 = q.availableQuota(); + assertTrue(w1 > 20); + // low utilization -> shrink + t += EVAL; + ack(q, t, MS, 0); + int w2 = q.availableQuota(); + assertTrue(w2 < w1); + assertTrue(w2 >= 20); + // mid utilization in band -> steady + t += EVAL; + ack(q, t, MS, Math.max(1, (int) Math.round(w2 * 0.07))); + int w3 = q.availableQuota(); + assertEquals(w3, w2); } } diff --git a/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/handler/v3/MQTT3PersistentSessionHandlerTest.java b/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/handler/v3/MQTT3PersistentSessionHandlerTest.java index eebbe4156..4fe843642 100644 --- a/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/handler/v3/MQTT3PersistentSessionHandlerTest.java +++ b/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/handler/v3/MQTT3PersistentSessionHandlerTest.java @@ -334,7 +334,7 @@ public void qoS1PubAndAck() { channel.writeInbound(MQTTMessageUtils.pubAckMessage(message.variableHeader().packetId())); } verifyEventUnordered(QOS1_PUSHED, QOS1_PUSHED, QOS1_PUSHED, QOS1_CONFIRMED, QOS1_CONFIRMED, QOS1_CONFIRMED); - verify(inboxClient, times(3)).commit(argThat(CommitRequest::hasSendBufferUpToSeq)); + verify(inboxClient, times(1)).commit(argThat(CommitRequest::hasSendBufferUpToSeq)); } @Test @@ -432,7 +432,7 @@ public void qoS1PubAndNotAllAck() { } } verifyEventUnordered(QOS1_PUSHED, QOS1_PUSHED, QOS1_PUSHED, QOS1_CONFIRMED, QOS1_CONFIRMED); - verify(inboxClient, times(2)).commit(argThat(CommitRequest::hasSendBufferUpToSeq)); + verify(inboxClient, times(0)).commit(argThat(CommitRequest::hasSendBufferUpToSeq)); } @Test @@ -484,7 +484,7 @@ public void qoS2PubAndRel() { ((MqttMessageIdVariableHeader) message.variableHeader()).messageId())); } verifyEvent(QOS2_PUSHED, QOS2_PUSHED, QOS2_RECEIVED, QOS2_RECEIVED, QOS2_CONFIRMED, QOS2_CONFIRMED); - verify(inboxClient, times(2)).commit(argThat(CommitRequest::hasSendBufferUpToSeq)); + verify(inboxClient, times(1)).commit(argThat(CommitRequest::hasSendBufferUpToSeq)); } @Test diff --git a/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/handler/v3/MQTT3TransientSessionHandlerTest.java b/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/handler/v3/MQTT3TransientSessionHandlerTest.java index dccde7b1a..6efa2b125 100644 --- a/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/handler/v3/MQTT3TransientSessionHandlerTest.java +++ b/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/handler/v3/MQTT3TransientSessionHandlerTest.java @@ -70,6 +70,7 @@ import static org.apache.bifromq.type.MQTTClientInfoConstants.MQTT_TYPE_VALUE; import static org.apache.bifromq.type.MQTTClientInfoConstants.MQTT_USER_ID_KEY; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyDouble; import static org.mockito.ArgumentMatchers.anyInt; import static org.mockito.ArgumentMatchers.anyLong; import static org.mockito.ArgumentMatchers.anyString; @@ -120,6 +121,7 @@ import lombok.extern.slf4j.Slf4j; import org.apache.bifromq.basehlc.HLC; import org.apache.bifromq.dist.client.PubResult; +import org.apache.bifromq.metrics.TenantMetric; import org.apache.bifromq.mqtt.handler.BaseSessionHandlerTest; import org.apache.bifromq.mqtt.handler.ChannelAttrs; import org.apache.bifromq.mqtt.handler.TenantSettings; @@ -129,6 +131,7 @@ import org.apache.bifromq.plugin.authprovider.type.Denied; import org.apache.bifromq.plugin.authprovider.type.Granted; import org.apache.bifromq.plugin.authprovider.type.MQTTAction; +import org.apache.bifromq.plugin.eventcollector.Event; import org.apache.bifromq.plugin.eventcollector.EventType; import org.apache.bifromq.plugin.eventcollector.mqttbroker.pushhandling.DropReason; import org.apache.bifromq.plugin.eventcollector.mqttbroker.pushhandling.QoS0Dropped; @@ -734,12 +737,16 @@ public void qos0PubExceedBufferCapacity() { ArgumentCaptor longCaptor = ArgumentCaptor.forClass(Long.class); verify(localDistService).match(anyLong(), eq(topicFilter), longCaptor.capture(), any()); - channel.writeOneOutbound(MQTTMessageUtils.largeMqttMessage(300 * 1024)); + channel.writeOneOutbound(MQTTMessageUtils.largeMqttMessage(300 * 1024)); + assertFalse(channel.isWritable()); // ensure overflow path List payloads = s2cMessagesPayload(1, 32 * 1024); transientSessionHandler.publish(s2cMessageList(topic, payloads, QoS.AT_MOST_ONCE), Collections.singleton(new IMQTTTransientSession.MatchedTopicFilter(topicFilter, longCaptor.getValue()))); channel.runPendingTasks(); + channel.readOutbound(); + channel.readOutbound(); + // verify no extra QoS0 publish produced MqttPublishMessage message = channel.readOutbound(); assertNull(message); verifyEvent(MQTT_SESSION_START, QOS0_DROPPED); @@ -820,14 +827,22 @@ public void qos1PubExceedBufferCapacity() { verify(localDistService).match(anyLong(), eq(topicFilter), longCaptor.capture(), any()); channel.writeOneOutbound(MQTTMessageUtils.largeMqttMessage(300 * 1024)); + channel.writeOneOutbound(MQTTMessageUtils.largeMqttMessage(300 * 1024)); + assertFalse(channel.isWritable()); // ensure overflow path List payloads = s2cMessagesPayload(1, 32 * 1024); transientSessionHandler.publish(s2cMessageList(topic, payloads, QoS.AT_LEAST_ONCE), Collections.singleton(new IMQTTTransientSession.MatchedTopicFilter(topicFilter, longCaptor.getValue()))); channel.runPendingTasks(); + channel.readOutbound(); + channel.readOutbound(); MqttPublishMessage message = channel.readOutbound(); - assertNull(message); - // With channel backpressure, QoS1 is not dropped - verifyEvent(MQTT_SESSION_START); + if (message != null) { + assertEquals(message.fixedHeader().qosLevel().value(), QoS.AT_LEAST_ONCE_VALUE); + assertEquals(message.variableHeader().topicName(), topic); + } + verifyEventUnordered(MQTT_SESSION_START, QOS1_PUSHED); + // verify resend metric NOT recorded when not actually resent + verify(tenantMeter, times(0)).recordSummary(eq(TenantMetric.MqttResendBytes), anyDouble()); } @Test @@ -875,6 +890,8 @@ public void qoS1PubAndNoAck() { expected[0] = MQTT_SESSION_START; Arrays.fill(expected, 1, expected.length, QOS1_PUSHED); verifyEventUnordered(expected); + // verify resend metric recorded exactly for resends + verify(tenantMeter, times(concurrent * 2)).recordSummary(eq(TenantMetric.MqttResendBytes), anyDouble()); } @Test @@ -1400,14 +1417,20 @@ public void qoS2PubExceedBufferCapacity() { verify(localDistService).match(anyLong(), eq(topicFilter), longCaptor.capture(), any()); channel.writeOneOutbound(MQTTMessageUtils.largeMqttMessage(300 * 1024)); + channel.writeOneOutbound(MQTTMessageUtils.largeMqttMessage(300 * 1024)); + assertFalse(channel.isWritable()); // ensure overflow path List payloads = s2cMessagesPayload(1, 32 * 1024); transientSessionHandler.publish(s2cMessageList(topic, payloads, QoS.EXACTLY_ONCE), Collections.singleton(new IMQTTTransientSession.MatchedTopicFilter(topicFilter, longCaptor.getValue()))); channel.runPendingTasks(); + channel.readOutbound(); + channel.readOutbound(); MqttPublishMessage message = channel.readOutbound(); - assertNull(message); - // With backpressure, QoS2 is not dropped - verifyEvent(MQTT_SESSION_START); + if (message != null) { + assertEquals(message.fixedHeader().qosLevel().value(), QoS.EXACTLY_ONCE_VALUE); + assertEquals(message.variableHeader().topicName(), topic); + } + verifyEventUnordered(MQTT_SESSION_START, QOS2_PUSHED); } @Test @@ -1604,7 +1627,13 @@ public void qoS2PubWithSameSourcePacketId() { batchTopics.add(message2_2.variableHeader().topicName()); assertEquals(batchTopics, expectedTopics); - verifyEventUnordered(MQTT_SESSION_START, QOS2_PUSHED, QOS2_PUSHED, QOS2_PUSHED, QOS2_PUSHED); + ArgumentCaptor eventCaptor = ArgumentCaptor.forClass(Event.class); + verify(eventCollector, atLeast(5)).report(eventCaptor.capture()); + List types = eventCaptor.getAllValues().stream().map(Event::type).toList(); + long pushedCount = types.stream().filter(t -> t == QOS2_PUSHED).count(); + assertTrue(pushedCount >= 4, "should push at least four qos2 messages"); + assertTrue(types.contains(MQTT_SESSION_START), "session should start"); + assertFalse(types.contains(QOS2_DROPPED), "no qos2 message should be dropped"); + assertFalse(types.contains(QOS2_DIST_ERROR), "no qos2 distribution error expected"); } - } diff --git a/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/handler/v3/MQTTPersistentS2CPubTest.java b/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/handler/v3/MQTTPersistentS2CPubTest.java index 8455fed03..cfa2ea407 100644 --- a/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/handler/v3/MQTTPersistentS2CPubTest.java +++ b/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/handler/v3/MQTTPersistentS2CPubTest.java @@ -145,7 +145,7 @@ public void qoS1PubAndAck() { } verifyEventUnordered(CLIENT_CONNECTED, QOS1_PUSHED, QOS1_PUSHED, QOS1_PUSHED, QOS1_CONFIRMED, QOS1_CONFIRMED, QOS1_CONFIRMED); - verify(inboxClient, times(3)).commit(argThat(CommitRequest::hasSendBufferUpToSeq)); + verify(inboxClient, times(1)).commit(argThat(CommitRequest::hasSendBufferUpToSeq)); } @Test @@ -165,7 +165,7 @@ public void qoS1PubAndNotAllAck() { } } verifyEventUnordered(CLIENT_CONNECTED, QOS1_PUSHED, QOS1_PUSHED, QOS1_PUSHED, QOS1_CONFIRMED, QOS1_CONFIRMED); - verify(inboxClient, times(2)).commit(argThat(CommitRequest::hasSendBufferUpToSeq)); + verify(inboxClient, times(0)).commit(argThat(CommitRequest::hasSendBufferUpToSeq)); } @Test @@ -217,7 +217,7 @@ public void qoS2PubAndRel() { } verifyEvent(CLIENT_CONNECTED, QOS2_PUSHED, QOS2_PUSHED, QOS2_RECEIVED, QOS2_RECEIVED, QOS2_CONFIRMED, QOS2_CONFIRMED); - verify(inboxClient, times(2)).commit(argThat(CommitRequest::hasSendBufferUpToSeq)); + verify(inboxClient, times(1)).commit(argThat(CommitRequest::hasSendBufferUpToSeq)); } @Test diff --git a/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/handler/v5/ConnectHandlerTest.java b/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/handler/v5/ConnectHandlerTest.java index e087849f1..0e8a13c30 100644 --- a/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/handler/v5/ConnectHandlerTest.java +++ b/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/handler/v5/ConnectHandlerTest.java @@ -431,6 +431,38 @@ public void inboxExistCallBackPressureRejected() { verify(eventCollector).report(argThat(e -> e.type() == EventType.SERVER_BUSY)); } + + + @Test + public void receiveMaximumZeroIsProtocolError() { + MqttConnectMessage connMsg = MqttMessageBuilders.connect().clientId("client") + .protocolVersion(MqttVersion.MQTT_5) + .properties(MQTT5MessageUtils.mqttProps() + .addReceiveMaximum(0) + .build()) + .build(); + channel.writeInbound(connMsg); + channel.advanceTimeBy(6, TimeUnit.SECONDS); + channel.runScheduledPendingTasks(); + MqttConnAckMessage connAckMessage = channel.readOutbound(); + assertEquals(connAckMessage.variableHeader().connectReturnCode(), CONNECTION_REFUSED_PROTOCOL_ERROR); + assertFalse(channel.isOpen()); + verify(eventCollector).report(argThat(e -> e.type() == PROTOCOL_ERROR)); + } + + @Test + public void receiveMaximumValidPassesValidation() { + MqttConnectMessage connMsg = MqttMessageBuilders.connect().clientId("client") + .protocolVersion(MqttVersion.MQTT_5) + .properties(MQTT5MessageUtils.mqttProps() + .addReceiveMaximum(128) + .build()) + .build(); + when(authProvider.auth(any(MQTT5AuthData.class))).thenReturn(new CompletableFuture<>()); + channel.writeInbound(connMsg); + verify(authProvider).auth(any(MQTT5AuthData.class)); + } + @Test public void attachCallBackPressureRejected() { when(authProvider.auth(any(MQTT5AuthData.class))).thenReturn(CompletableFuture.completedFuture( diff --git a/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/handler/v5/PersistentSessionHandlerTest.java b/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/handler/v5/PersistentSessionHandlerTest.java index b10b2540c..ec891a51d 100644 --- a/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/handler/v5/PersistentSessionHandlerTest.java +++ b/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/handler/v5/PersistentSessionHandlerTest.java @@ -318,7 +318,7 @@ public void qoS1PubAndAck() { channel.writeInbound(MQTTMessageUtils.pubAckMessage(message.variableHeader().packetId())); } verifyEventUnordered(QOS1_PUSHED, QOS1_PUSHED, QOS1_PUSHED, QOS1_CONFIRMED, QOS1_CONFIRMED, QOS1_CONFIRMED); - verify(inboxClient, times(3)).commit(argThat(CommitRequest::hasSendBufferUpToSeq)); + verify(inboxClient, times(1)).commit(argThat(CommitRequest::hasSendBufferUpToSeq)); } @Test @@ -338,7 +338,7 @@ public void qoS1PubAndNotAllAck() { } } verifyEventUnordered(QOS1_PUSHED, QOS1_PUSHED, QOS1_PUSHED, QOS1_CONFIRMED, QOS1_CONFIRMED); - verify(inboxClient, times(2)).commit(argThat(CommitRequest::hasSendBufferUpToSeq)); + verify(inboxClient, times(0)).commit(argThat(CommitRequest::hasSendBufferUpToSeq)); } @Test diff --git a/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/handler/v5/PersistentSessionShutdownLWTTest.java b/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/handler/v5/PersistentSessionShutdownLWTTest.java new file mode 100644 index 000000000..565970a40 --- /dev/null +++ b/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/handler/v5/PersistentSessionShutdownLWTTest.java @@ -0,0 +1,219 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.mqtt.handler.v5; + +import static io.netty.handler.codec.mqtt.MqttMessageType.DISCONNECT; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.timeout; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; + +import com.google.protobuf.ByteString; +import io.netty.channel.Channel; +import io.netty.channel.ChannelDuplexHandler; +import io.netty.channel.ChannelHandlerContext; +import io.netty.channel.ChannelInitializer; +import io.netty.channel.ChannelPipeline; +import io.netty.channel.embedded.EmbeddedChannel; +import io.netty.handler.codec.mqtt.MqttDecoder; +import io.netty.handler.codec.mqtt.MqttMessage; +import io.netty.handler.codec.mqtt.MqttMessageBuilders; +import io.netty.handler.codec.mqtt.MqttReasonCodeAndPropertiesVariableHeader; +import io.netty.handler.codec.mqtt.MqttVersion; +import io.netty.handler.traffic.ChannelTrafficShapingHandler; +import java.lang.reflect.Method; +import java.net.InetSocketAddress; +import java.util.concurrent.CompletableFuture; +import lombok.SneakyThrows; +import org.apache.bifromq.basehlc.HLC; +import org.apache.bifromq.dist.client.PubResult; +import org.apache.bifromq.inbox.rpc.proto.DetachReply; +import org.apache.bifromq.inbox.rpc.proto.DetachRequest; +import org.apache.bifromq.inbox.storage.proto.InboxVersion; +import org.apache.bifromq.inbox.storage.proto.LWT; +import org.apache.bifromq.mqtt.handler.BaseSessionHandlerTest; +import org.apache.bifromq.mqtt.handler.ChannelAttrs; +import org.apache.bifromq.mqtt.handler.TenantSettings; +import org.apache.bifromq.mqtt.handler.v5.reason.MQTT5DisconnectReasonCode; +import org.apache.bifromq.mqtt.session.IMQTTSession; +import org.apache.bifromq.plugin.settingprovider.Setting; +import org.apache.bifromq.type.Message; +import org.mockito.ArgumentCaptor; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class PersistentSessionShutdownLWTTest extends BaseSessionHandlerTest { + + private LWT will; + + @BeforeMethod(alwaysRun = true) + public void setup(Method method) { + super.setup(method); + mockSessionReg(); + will = LWT.newBuilder() + .setTopic("willTopic") + .setMessage(Message.newBuilder() + .setPayload(ByteString.copyFromUtf8("will")) + .setTimestamp(HLC.INST.get()) + .build()) + .setDelaySeconds(0) + .build(); + + doAnswer(inv -> null).when(localSessionRegistry).add(anyString(), any(IMQTTSession.class)); + + mockInboxReader(); + ChannelDuplexHandler sessionHandlerAdder = buildChannelHandler(); + channel = new EmbeddedChannel(true, true, new ChannelInitializer<>() { + @Override + protected void initChannel(Channel ch) { + ch.attr(ChannelAttrs.MQTT_SESSION_CTX).set(sessionContext); + ch.attr(ChannelAttrs.PEER_ADDR).set(new InetSocketAddress(remoteIp, remotePort)); + ChannelPipeline pipeline = ch.pipeline(); + pipeline.addLast(new ChannelTrafficShapingHandler(512 * 1024, 512 * 1024)); + pipeline.addLast(MqttDecoder.class.getName(), new MqttDecoder(256 * 1024)); + pipeline.addLast(sessionHandlerAdder); + } + }); + } + + @SneakyThrows + @AfterMethod(alwaysRun = true) + public void tearDown(Method method) { + super.tearDown(method); + will = null; + } + + @Override + protected ChannelDuplexHandler buildChannelHandler() { + return new ChannelDuplexHandler() { + @Override + public void channelActive(ChannelHandlerContext ctx) throws Exception { + super.channelActive(ctx); + ctx.pipeline().addLast(MQTT5PersistentSessionHandler.builder() + .settings(new TenantSettings(tenantId, settingProvider)) + .tenantMeter(tenantMeter) + .oomCondition(oomCondition) + .inboxVersion(InboxVersion.newBuilder().setMod(0).setIncarnation(0).build()) + .connMsg(MqttMessageBuilders.connect().protocolVersion(MqttVersion.MQTT_5).build()) + .userSessionId("userSession") + .keepAliveTimeSeconds(120) + .sessionExpirySeconds(120) + .clientInfo(clientInfo) + .noDelayLWT(will) + .ctx(ctx) + .build()); + ctx.pipeline().remove(this); + } + }; + } + + @Test + public void serverShutdownShouldDiscardLWT() { + assertTrue(channel.isOpen()); + + doAnswer(inv -> CompletableFuture.completedFuture(DetachReply.newBuilder() + .setCode(DetachReply.Code.OK).build())) + .when(inboxClient).detach(any()); + + MQTT5PersistentSessionHandler handler = (MQTT5PersistentSessionHandler) channel.pipeline().last(); + handler.awaitInitialized().join(); + CompletableFuture shutdown = handler.onServerShuttingDown(); // non-blocking + channel.runPendingTasks(); + channel.runScheduledPendingTasks(); + shutdown.join(); + + MqttMessage disconnMessage = channel.readOutbound(); + assertEquals(disconnMessage.fixedHeader().messageType(), DISCONNECT); + assertEquals(((MqttReasonCodeAndPropertiesVariableHeader) disconnMessage.variableHeader()).reasonCode(), + MQTT5DisconnectReasonCode.ServerShuttingDown.value()); + channel.close(); + channel.runPendingTasks(); + assertFalse(channel.isActive()); + + verify(distClient, never()).pub(anyLong(), anyString(), any(), any()); + + ArgumentCaptor reqCap = ArgumentCaptor.forClass(DetachRequest.class); + verify(inboxClient, timeout(1000)).detach(reqCap.capture()); + DetachRequest sent = reqCap.getValue(); + assertTrue(sent.getDiscardLWT()); + } + + @Test + public void serverShutdownShouldSendLWTWhenSettingAllowed() { + mockCheckPermission(true); + when(settingProvider.provide(eq(Setting.NoLWTWhenServerShuttingDown), anyString())).thenReturn(false); + when(distClient.pub(anyLong(), anyString(), any(), any())).thenReturn( + CompletableFuture.completedFuture(PubResult.OK)); + doAnswer(inv -> CompletableFuture.completedFuture(DetachReply.newBuilder() + .setCode(DetachReply.Code.OK).build())) + .when(inboxClient).detach(any()); + + ChannelDuplexHandler sessionHandlerAdder = buildChannelHandler(); + EmbeddedChannel ch = new EmbeddedChannel(true, true, new ChannelInitializer<>() { + @Override + protected void initChannel(Channel c) { + c.attr(ChannelAttrs.MQTT_SESSION_CTX).set(sessionContext); + c.attr(ChannelAttrs.PEER_ADDR).set(new InetSocketAddress(remoteIp, remotePort)); + ChannelPipeline pipeline = c.pipeline(); + pipeline.addLast(new ChannelTrafficShapingHandler(512 * 1024, 512 * 1024)); + pipeline.addLast(MqttDecoder.class.getName(), new MqttDecoder(256 * 1024)); + pipeline.addLast(sessionHandlerAdder); + } + }); + + try { + assertTrue(ch.isOpen()); + MQTT5PersistentSessionHandler handler = (MQTT5PersistentSessionHandler) ch.pipeline().last(); + handler.awaitInitialized().join(); + CompletableFuture shutdown = handler.onServerShuttingDown(); // non-blocking + ch.runPendingTasks(); + ch.runScheduledPendingTasks(); + shutdown.join(); + + MqttMessage disconnMessage = ch.readOutbound(); + assertEquals(disconnMessage.fixedHeader().messageType(), DISCONNECT); + assertEquals(((MqttReasonCodeAndPropertiesVariableHeader) disconnMessage.variableHeader()).reasonCode(), + MQTT5DisconnectReasonCode.ServerShuttingDown.value()); + + ch.close(); + ch.runPendingTasks(); + assertFalse(ch.isActive()); + + verify(distClient, timeout(1000)).pub(anyLong(), anyString(), any(), any()); + + ArgumentCaptor reqCap = ArgumentCaptor.forClass(DetachRequest.class); + verify(inboxClient, timeout(1000)).detach(reqCap.capture()); + assertTrue(reqCap.getValue().getDiscardLWT()); + } finally { + if (ch.isOpen()) { + ch.close(); + } + } + } +} diff --git a/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/handler/v5/TransientSessionHandlerTest.java b/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/handler/v5/TransientSessionHandlerTest.java index b565d8d72..dc697f023 100644 --- a/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/handler/v5/TransientSessionHandlerTest.java +++ b/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/handler/v5/TransientSessionHandlerTest.java @@ -19,7 +19,6 @@ package org.apache.bifromq.mqtt.handler.v5; - import static io.netty.handler.codec.mqtt.MqttMessageType.DISCONNECT; import static io.netty.handler.codec.mqtt.MqttMessageType.PUBACK; import static io.netty.handler.codec.mqtt.MqttMessageType.PUBREC; @@ -31,6 +30,7 @@ import static org.apache.bifromq.mqtt.handler.v5.reason.MQTT5UnsubAckReasonCode.NotAuthorized; import static org.apache.bifromq.mqtt.handler.v5.reason.MQTT5UnsubAckReasonCode.Success; import static org.apache.bifromq.mqtt.handler.v5.reason.MQTT5UnsubAckReasonCode.UnspecifiedError; +import static org.apache.bifromq.plugin.eventcollector.EventType.BY_SERVER; import static org.apache.bifromq.plugin.eventcollector.EventType.EXCEED_PUB_RATE; import static org.apache.bifromq.plugin.eventcollector.EventType.EXCEED_RECEIVING_LIMIT; import static org.apache.bifromq.plugin.eventcollector.EventType.INVALID_TOPIC; @@ -69,6 +69,7 @@ import static org.apache.bifromq.retain.rpc.proto.RetainReply.Result.ERROR; import static org.apache.bifromq.retain.rpc.proto.RetainReply.Result.RETAINED; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyDouble; import static org.mockito.ArgumentMatchers.anyInt; import static org.mockito.ArgumentMatchers.anyLong; import static org.mockito.ArgumentMatchers.anyString; @@ -118,6 +119,7 @@ import lombok.extern.slf4j.Slf4j; import org.apache.bifromq.basehlc.HLC; import org.apache.bifromq.dist.client.PubResult; +import org.apache.bifromq.metrics.TenantMetric; import org.apache.bifromq.mqtt.handler.BaseSessionHandlerTest; import org.apache.bifromq.mqtt.handler.ChannelAttrs; import org.apache.bifromq.mqtt.handler.TenantSettings; @@ -171,6 +173,100 @@ protected void initChannel(Channel ch) { transientSessionHandler = (MQTT5TransientSessionHandler) channel.pipeline().last(); } + @Test + public void disconnectByServer() { + assertTrue(channel.isOpen()); + MQTT5TransientSessionHandler handler = (MQTT5TransientSessionHandler) channel.pipeline().last(); + // Ensure handler fully initialized before shutdown + handler.awaitInitialized().join(); + // Drive event loop to trigger close before waiting + CompletableFuture shutdown = handler.onServerShuttingDown(); // non-blocking + channel.runPendingTasks(); + channel.runScheduledPendingTasks(); + shutdown.join(); + + MqttMessage disconnMessage = channel.readOutbound(); + assertEquals(disconnMessage.fixedHeader().messageType(), DISCONNECT); + assertEquals(((MqttReasonCodeAndPropertiesVariableHeader) disconnMessage.variableHeader()).reasonCode(), + MQTT5DisconnectReasonCode.ServerShuttingDown.value()); + assertFalse(channel.isActive()); + verify(eventCollector).report(argThat(e -> e.type() == BY_SERVER)); + } + + @Test + public void dedupQoS1() { + mockCheckPermission(true); + mockDistMatch(true); + transientSessionHandler.subscribe(System.nanoTime(), topicFilter, QoS.AT_LEAST_ONCE); + channel.runPendingTasks(); + ArgumentCaptor longCaptor = ArgumentCaptor.forClass(Long.class); + verify(localDistService).match(anyLong(), eq(topicFilter), longCaptor.capture(), any()); + + long now = HLC.INST.get(); + TopicMessagePack pack = TopicMessagePack.newBuilder() + .setTopic(topic) + .addMessage(TopicMessagePack.PublisherPack.newBuilder() + .setPublisher(ClientInfo.newBuilder() + .putMetadata(MQTTClientInfoConstants.MQTT_CHANNEL_ID_KEY, "channel1") + .build()) + .addMessage(Message.newBuilder() + .setMessageId(1) + .setExpiryInterval(Integer.MAX_VALUE) + .setPayload(ByteString.EMPTY) + .setTimestamp(now) + .setPubQoS(QoS.AT_LEAST_ONCE) + .build())) + .build(); + + transientSessionHandler.publish(pack, + Collections.singleton(new IMQTTTransientSession.MatchedTopicFilter(topicFilter, longCaptor.getValue()))); + channel.runPendingTasks(); + transientSessionHandler.publish(pack, + Collections.singleton(new IMQTTTransientSession.MatchedTopicFilter(topicFilter, longCaptor.getValue()))); + channel.runPendingTasks(); + + MqttPublishMessage first = channel.readOutbound(); + assertEquals(first.fixedHeader().qosLevel().value(), QoS.AT_LEAST_ONCE_VALUE); + assertEquals(first.variableHeader().topicName(), topic); + + verify(tenantMeter, times(1)).recordSummary(eq(TenantMetric.MqttDeDupBytes), anyDouble()); + verify(eventCollector).report( + argThat(e -> e.type() == QOS1_DROPPED && ((QoS1Dropped) e).reason() == DropReason.Duplicated)); + } + + @Test + public void dedupQoS2() { + mockCheckPermission(true); + mockDistMatch(true); + transientSessionHandler.subscribe(System.nanoTime(), topicFilter, QoS.EXACTLY_ONCE); + channel.runPendingTasks(); + ArgumentCaptor longCaptor = ArgumentCaptor.forClass(Long.class); + verify(localDistService).match(anyLong(), eq(topicFilter), longCaptor.capture(), any()); + + long now = HLC.INST.get(); + TopicMessagePack pack = TopicMessagePack.newBuilder().setTopic(topic).addMessage( + TopicMessagePack.PublisherPack.newBuilder().setPublisher( + ClientInfo.newBuilder().putMetadata(MQTTClientInfoConstants.MQTT_CHANNEL_ID_KEY, "channel1").build()) + .addMessage(Message.newBuilder().setMessageId(2).setExpiryInterval(Integer.MAX_VALUE) + .setPayload(ByteString.EMPTY).setTimestamp(now).setPubQoS(QoS.EXACTLY_ONCE).build())) + .build(); + + transientSessionHandler.publish(pack, + Collections.singleton(new IMQTTTransientSession.MatchedTopicFilter(topicFilter, longCaptor.getValue()))); + channel.runPendingTasks(); + transientSessionHandler.publish(pack, + Collections.singleton(new IMQTTTransientSession.MatchedTopicFilter(topicFilter, longCaptor.getValue()))); + channel.runPendingTasks(); + + MqttPublishMessage first = channel.readOutbound(); + assertEquals(first.fixedHeader().qosLevel().value(), QoS.EXACTLY_ONCE_VALUE); + assertEquals(first.variableHeader().topicName(), topic); + + verify(tenantMeter, times(1)).recordSummary(eq(TenantMetric.MqttDeDupBytes), anyDouble()); + verify(eventCollector).report( + argThat(e -> e.type() == QOS2_DROPPED && ((QoS2Dropped) e).reason() == DropReason.Duplicated)); + } + @SneakyThrows @AfterMethod(alwaysRun = true) public void tearDown(Method method) { @@ -941,10 +1037,14 @@ public void qos0PubExceedBufferCapacity() { verify(localDistService).match(anyLong(), eq(topicFilter), longCaptor.capture(), any()); channel.writeOneOutbound(MQTTMessageUtils.largeMqttMessage(300 * 1024)); + channel.writeOneOutbound(MQTTMessageUtils.largeMqttMessage(300 * 1024)); + assertFalse(channel.isWritable()); // ensure overflow path List payloads = s2cMessagesPayload(1, 32 * 1024); transientSessionHandler.publish(s2cMQTT5MessageList(topic, payloads, QoS.AT_MOST_ONCE), Collections.singleton(new IMQTTTransientSession.MatchedTopicFilter(topicFilter, longCaptor.getValue()))); channel.runPendingTasks(); + channel.readOutbound(); + channel.readOutbound(); MqttPublishMessage message = channel.readOutbound(); assertNull(message); verifyEvent(MQTT_SESSION_START, QOS0_DROPPED); @@ -1032,6 +1132,8 @@ public void dedup() { assertEquals(message.variableHeader().topicName(), topic); verifyEvent(MQTT_SESSION_START, QOS0_PUSHED, QOS0_DROPPED); + // verify dedup metric recorded + verify(tenantMeter).recordSummary(eq(TenantMetric.MqttDeDupBytes), anyDouble()); } @Test @@ -1063,6 +1165,8 @@ public void ignoreDedupNonMQTTPublisher() { assertEquals(message.variableHeader().topicName(), topic); verifyEvent(MQTT_SESSION_START, QOS0_PUSHED, QOS0_PUSHED); + // verify dedup metric NOT recorded for non MQTT publisher + verify(tenantMeter, times(0)).recordSummary(eq(TenantMetric.MqttDeDupBytes), anyDouble()); } @@ -1102,6 +1206,8 @@ public void ignoreDedupRetainMessage() { assertEquals(message.variableHeader().topicName(), topic); verifyEvent(MQTT_SESSION_START, QOS0_PUSHED, QOS0_PUSHED); + // verify dedup metric NOT recorded for retain messages + verify(tenantMeter, times(0)).recordSummary(eq(TenantMetric.MqttDeDupBytes), anyDouble()); } diff --git a/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/integration/MQTTTest.java b/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/integration/MQTTTest.java index 83f3119cc..850c2790b 100644 --- a/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/integration/MQTTTest.java +++ b/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/integration/MQTTTest.java @@ -25,6 +25,7 @@ import static org.mockito.Mockito.lenient; import com.google.common.collect.Sets; +import com.google.protobuf.Struct; import io.reactivex.rxjava3.core.Observable; import java.lang.reflect.Method; import java.time.Duration; @@ -38,7 +39,6 @@ import org.apache.bifromq.basecrdt.service.CRDTServiceOptions; import org.apache.bifromq.basecrdt.service.ICRDTService; import org.apache.bifromq.basekv.client.IBaseKVStoreClient; -import org.apache.bifromq.basekv.localengine.memory.InMemKVEngineConfigurator; import org.apache.bifromq.basekv.metaservice.IBaseKVMetaService; import org.apache.bifromq.basekv.store.option.KVRangeStoreOptions; import org.apache.bifromq.basekv.utils.BoundaryUtil; @@ -170,6 +170,7 @@ public final void setupClass() { .trafficService(trafficService) .metaService(metaService) .build(); + Struct memConf = Struct.newBuilder().build(); inboxStore = IInboxStore.builder() .rpcServerBuilder(rpcServerBuilder) .agentHost(agentHost) @@ -186,8 +187,10 @@ public final void setupClass() { .bgTaskExecutor(bgTaskExecutor) .bootstrapDelay(Duration.ofSeconds(1)) .storeOptions(new KVRangeStoreOptions() - .setDataEngineConfigurator(new InMemKVEngineConfigurator()) - .setWalEngineConfigurator(new InMemKVEngineConfigurator())) + .setDataEngineType("memory") + .setDataEngineConf(memConf) + .setWalEngineType("memory") + .setWalEngineConf(memConf)) .build(); distClient = IDistClient.newBuilder() .trafficService(trafficService) @@ -217,8 +220,10 @@ public final void setupClass() { .bgTaskExecutor(bgTaskExecutor) .bootstrapDelay(Duration.ofSeconds(1)) .storeOptions(new KVRangeStoreOptions() - .setDataEngineConfigurator(new InMemKVEngineConfigurator()) - .setWalEngineConfigurator(new InMemKVEngineConfigurator())) + .setDataEngineType("memory") + .setDataEngineConf(memConf) + .setWalEngineType("memory") + .setWalEngineConf(memConf)) .build(); distWorkerStoreClient = IBaseKVStoreClient.newBuilder() @@ -247,8 +252,10 @@ public final void setupClass() { .bgTaskExecutor(bgTaskExecutor) .bootstrapDelay(Duration.ofSeconds(1)) .storeOptions(new KVRangeStoreOptions() - .setDataEngineConfigurator(new InMemKVEngineConfigurator()) - .setWalEngineConfigurator(new InMemKVEngineConfigurator())) + .setDataEngineType("memory") + .setDataEngineConf(memConf) + .setWalEngineType("memory") + .setWalEngineConf(memConf)) .subBrokerManager(inboxBrokerMgr) .settingProvider(settingProvider) .build(); @@ -302,9 +309,12 @@ public final void setupClass() { .filter(state -> state == IRPCClient.ConnState.READY) .firstElement() .blockingSubscribe(); - await().forever().until(() -> BoundaryUtil.isValidSplitSet(distWorkerStoreClient.latestEffectiveRouter().keySet())); - await().forever().until(() -> BoundaryUtil.isValidSplitSet(inboxStoreKVStoreClient.latestEffectiveRouter().keySet())); - await().forever().until(() -> BoundaryUtil.isValidSplitSet(retainStoreKVStoreClient.latestEffectiveRouter().keySet())); + await().forever() + .until(() -> BoundaryUtil.isValidSplitSet(distWorkerStoreClient.latestEffectiveRouter().keySet())); + await().forever() + .until(() -> BoundaryUtil.isValidSplitSet(inboxStoreKVStoreClient.latestEffectiveRouter().keySet())); + await().forever() + .until(() -> BoundaryUtil.isValidSplitSet(retainStoreKVStoreClient.latestEffectiveRouter().keySet())); lenient().when(settingProvider.provide(any(), anyString())) .thenAnswer(invocation -> { Setting setting = invocation.getArgument(0); diff --git a/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/integration/v3/MQTTDisconnectTest.java b/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/integration/v3/MQTTDisconnectTest.java index 01ff971ef..d3c10609b 100644 --- a/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/integration/v3/MQTTDisconnectTest.java +++ b/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/integration/v3/MQTTDisconnectTest.java @@ -24,8 +24,10 @@ import static org.mockito.Mockito.atLeast; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; +import static org.testng.Assert.assertSame; import static org.testng.Assert.assertTrue; +import java.util.concurrent.CompletableFuture; import org.apache.bifromq.mqtt.integration.MQTTTest; import org.apache.bifromq.mqtt.integration.v3.client.MqttTestClient; import org.apache.bifromq.plugin.authprovider.type.CheckResult; @@ -36,7 +38,6 @@ import org.apache.bifromq.plugin.eventcollector.Event; import org.apache.bifromq.plugin.eventcollector.EventType; import org.apache.bifromq.plugin.eventcollector.mqttbroker.clientdisconnect.ByClient; -import java.util.concurrent.CompletableFuture; import org.eclipse.paho.client.mqttv3.MqttConnectOptions; import org.mockito.ArgumentCaptor; import org.testng.annotations.Test; @@ -66,9 +67,9 @@ public void disconnectDirectly() { ArgumentCaptor> argCaptor = ArgumentCaptor.forClass(Event.class); verify(eventCollector, atLeast(2)).report(argCaptor.capture()); Event event = argCaptor.getAllValues().get(argCaptor.getAllValues().size() - 2); - assertTrue(event.type() == EventType.BY_CLIENT && ((ByClient) event).withoutDisconnect()); - event = argCaptor.getAllValues().get(argCaptor.getAllValues().size() - 1); assertTrue(event.type() == EventType.MQTT_SESSION_STOP); + event = argCaptor.getAllValues().get(argCaptor.getAllValues().size() - 1); + assertTrue(event.type() == EventType.BY_CLIENT && ((ByClient) event).withoutDisconnect()); } @Test(groups = "integration") @@ -94,10 +95,16 @@ public void disconnect() { await().until(() -> !mqttClient.isConnected()); ArgumentCaptor> argCaptor = ArgumentCaptor.forClass(Event.class); verify(eventCollector, atLeast(2)).report(argCaptor.capture()); - Event event = argCaptor.getAllValues().get(argCaptor.getAllValues().size() - 2); - assertTrue(event.type() == EventType.BY_CLIENT && !((ByClient) event).withoutDisconnect()); - event = argCaptor.getAllValues().get(argCaptor.getAllValues().size() - 1); - assertTrue(event.type() == EventType.MQTT_SESSION_STOP); + boolean foundByClientWithDisconnect = false; + for (Event e : argCaptor.getAllValues()) { + if (e.type() == EventType.BY_CLIENT && !((ByClient) e).withoutDisconnect()) { + foundByClientWithDisconnect = true; + break; + } + } + assertTrue(foundByClientWithDisconnect); + Event last = argCaptor.getAllValues().get(argCaptor.getAllValues().size() - 1); + assertSame(last.type(), EventType.MQTT_SESSION_STOP); } } diff --git a/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/service/LocalSessionRegistryTest.java b/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/service/LocalSessionRegistryTest.java index 3a7a4a1b7..faab5d847 100644 --- a/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/service/LocalSessionRegistryTest.java +++ b/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/service/LocalSessionRegistryTest.java @@ -24,9 +24,9 @@ import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertNull; +import java.util.concurrent.CompletableFuture; import org.apache.bifromq.mqtt.MockableTest; import org.apache.bifromq.mqtt.session.IMQTTSession; -import java.util.concurrent.CompletableFuture; import org.mockito.Mock; import org.testng.annotations.Test; @@ -53,15 +53,15 @@ public void testRemove() { @Test public void testDisconnectAll() { - when(session1.disconnect()).thenReturn(CompletableFuture.completedFuture(null)); - when(session2.disconnect()).thenReturn(CompletableFuture.completedFuture(null)); + when(session1.onServerShuttingDown()).thenReturn(CompletableFuture.completedFuture(null)); + when(session2.onServerShuttingDown()).thenReturn(CompletableFuture.completedFuture(null)); LocalSessionRegistry localSessionRegistry = new LocalSessionRegistry(); localSessionRegistry.add("sessionId1", session1); localSessionRegistry.add("sessionId2", session2); localSessionRegistry.disconnectAll(1); assertNull(localSessionRegistry.get("sessionId1")); assertNull(localSessionRegistry.get("sessionId2")); - verify(session1).disconnect(); - verify(session2).disconnect(); + verify(session1).onServerShuttingDown(); + verify(session2).onServerShuttingDown(); } } diff --git a/bifromq-plugin/bifromq-plugin-setting-provider-helper/src/main/java/org/apache/bifromq/plugin/settingprovider/DevOnlySettingProvider.java b/bifromq-plugin/bifromq-plugin-setting-provider-helper/src/main/java/org/apache/bifromq/plugin/settingprovider/DevOnlySettingProvider.java index bcfbb9bc3..b3a7e6ae4 100644 --- a/bifromq-plugin/bifromq-plugin-setting-provider-helper/src/main/java/org/apache/bifromq/plugin/settingprovider/DevOnlySettingProvider.java +++ b/bifromq-plugin/bifromq-plugin-setting-provider-helper/src/main/java/org/apache/bifromq/plugin/settingprovider/DevOnlySettingProvider.java @@ -26,7 +26,7 @@ class DevOnlySettingProvider implements ISettingProvider { DevOnlySettingProvider() { for (Setting setting : Setting.values()) { - initialValues.put(setting, setting.current("DevOnly")); + initialValues.put(setting, setting.initialValue()); } } diff --git a/bifromq-plugin/bifromq-plugin-setting-provider/src/main/java/org/apache/bifromq/plugin/settingprovider/Setting.java b/bifromq-plugin/bifromq-plugin-setting-provider/src/main/java/org/apache/bifromq/plugin/settingprovider/Setting.java index 77d265e4f..6b8874f73 100644 --- a/bifromq-plugin/bifromq-plugin-setting-provider/src/main/java/org/apache/bifromq/plugin/settingprovider/Setting.java +++ b/bifromq-plugin/bifromq-plugin-setting-provider/src/main/java/org/apache/bifromq/plugin/settingprovider/Setting.java @@ -31,6 +31,7 @@ public enum Setting { MQTT3Enabled(Boolean.class, val -> true, true), MQTT4Enabled(Boolean.class, val -> true, true), MQTT5Enabled(Boolean.class, val -> true, true), + NoLWTWhenServerShuttingDown(Boolean.class, val -> true, true), DebugModeEnabled(Boolean.class, val -> true, false), ForceTransient(Boolean.class, val -> true, false), ByPassPermCheckError(Boolean.class, val -> true, true), @@ -52,7 +53,7 @@ public enum Setting { OutBoundBandWidth(Long.class, val -> (long) val >= 0, 512 * 1024L), MaxLastWillBytes(Integer.class, val -> (int) val > 0 && (int) val <= 250 * 1024 * 1024, 128), MaxUserPayloadBytes(Integer.class, val -> (int) val > 0 && (int) val <= 256 * 1024 * 1024, 256 * 1024), - MinSendPerSec(Double.class, val -> (double) val >= 0.0, 1.0), + MinSendPerSec(Integer.class, val -> (int) val > 0, 8), MaxResendTimes(Integer.class, val -> (int) val >= 0, 3), ResendTimeoutSeconds(Integer.class, val -> (int) val > 0, 10), MaxTopicFiltersPerSub(Integer.class, val -> (int) val > 0 && (int) val <= 100, 10), diff --git a/bifromq-retain/bifromq-retain-server/src/main/java/org/apache/bifromq/retain/server/scheduler/MatchCallScheduler.java b/bifromq-retain/bifromq-retain-server/src/main/java/org/apache/bifromq/retain/server/scheduler/MatchCallScheduler.java index 5f638f21b..9b6e06020 100644 --- a/bifromq-retain/bifromq-retain-server/src/main/java/org/apache/bifromq/retain/server/scheduler/MatchCallScheduler.java +++ b/bifromq-retain/bifromq-retain-server/src/main/java/org/apache/bifromq/retain/server/scheduler/MatchCallScheduler.java @@ -19,12 +19,12 @@ package org.apache.bifromq.retain.server.scheduler; +import java.time.Duration; +import java.util.Optional; import org.apache.bifromq.basekv.client.IBaseKVStoreClient; import org.apache.bifromq.basescheduler.BatchCallScheduler; import org.apache.bifromq.plugin.settingprovider.ISettingProvider; import org.apache.bifromq.sysprops.props.DataPlaneMaxBurstLatencyMillis; -import java.time.Duration; -import java.util.Optional; public class MatchCallScheduler extends BatchCallScheduler @@ -32,7 +32,7 @@ public class MatchCallScheduler public MatchCallScheduler(IBaseKVStoreClient retainStoreClient, ISettingProvider settingProvider) { super((name, batcherKey) -> () -> new BatchMatchCall(batcherKey, retainStoreClient, settingProvider), - Duration.ofSeconds(DataPlaneMaxBurstLatencyMillis.INSTANCE.get()).toNanos()); + Duration.ofMillis(DataPlaneMaxBurstLatencyMillis.INSTANCE.get()).toNanos()); } @Override diff --git a/bifromq-retain/bifromq-retain-store/src/main/java/org/apache/bifromq/retain/store/RetainStore.java b/bifromq-retain/bifromq-retain-store/src/main/java/org/apache/bifromq/retain/store/RetainStore.java index cb39c179a..de9cc8832 100644 --- a/bifromq-retain/bifromq-retain-store/src/main/java/org/apache/bifromq/retain/store/RetainStore.java +++ b/bifromq-retain/bifromq-retain-store/src/main/java/org/apache/bifromq/retain/store/RetainStore.java @@ -68,7 +68,7 @@ public RetainStore(RetainStoreBuilder builder) { this.clusterId = builder.clusterId; this.storeClient = builder.retainStoreClient; this.gcInterval = builder.gcInterval; - coProcFactory = new RetainStoreCoProcFactory(builder.loadEstimateWindow); + coProcFactory = new RetainStoreCoProcFactory(); Map loadedFactories = BaseHookLoader.load(IRetainStoreBalancerFactory.class); for (String factoryName : builder.balancerFactoryConfig.keySet()) { diff --git a/bifromq-retain/bifromq-retain-store/src/main/java/org/apache/bifromq/retain/store/RetainStoreBuilder.java b/bifromq-retain/bifromq-retain-store/src/main/java/org/apache/bifromq/retain/store/RetainStoreBuilder.java index 6ee8f547a..fe85552c4 100644 --- a/bifromq-retain/bifromq-retain-store/src/main/java/org/apache/bifromq/retain/store/RetainStoreBuilder.java +++ b/bifromq-retain/bifromq-retain-store/src/main/java/org/apache/bifromq/retain/store/RetainStoreBuilder.java @@ -19,11 +19,6 @@ package org.apache.bifromq.retain.store; -import org.apache.bifromq.basecluster.IAgentHost; -import org.apache.bifromq.basekv.client.IBaseKVStoreClient; -import org.apache.bifromq.basekv.metaservice.IBaseKVMetaService; -import org.apache.bifromq.basekv.store.option.KVRangeStoreOptions; -import org.apache.bifromq.baserpc.server.RPCServerBuilder; import com.google.protobuf.Struct; import java.time.Duration; import java.util.HashMap; @@ -33,6 +28,11 @@ import lombok.NoArgsConstructor; import lombok.Setter; import lombok.experimental.Accessors; +import org.apache.bifromq.basecluster.IAgentHost; +import org.apache.bifromq.basekv.client.IBaseKVStoreClient; +import org.apache.bifromq.basekv.metaservice.IBaseKVMetaService; +import org.apache.bifromq.basekv.store.option.KVRangeStoreOptions; +import org.apache.bifromq.baserpc.server.RPCServerBuilder; /** * The builder for building Retain Store. @@ -54,7 +54,6 @@ public class RetainStoreBuilder { Duration zombieProbeDelay = Duration.ofSeconds(15); Duration balancerRetryDelay = Duration.ofSeconds(5); Map balancerFactoryConfig = new HashMap<>(); - Duration loadEstimateWindow = Duration.ofSeconds(5); Duration gcInterval = Duration.ofMinutes(60); Map attributes = new HashMap<>(); diff --git a/bifromq-retain/bifromq-retain-store/src/main/java/org/apache/bifromq/retain/store/RetainStoreCoProc.java b/bifromq-retain/bifromq-retain-store/src/main/java/org/apache/bifromq/retain/store/RetainStoreCoProc.java index c2c6c5d5c..b1611d3b3 100644 --- a/bifromq-retain/bifromq-retain-store/src/main/java/org/apache/bifromq/retain/store/RetainStoreCoProc.java +++ b/bifromq-retain/bifromq-retain-store/src/main/java/org/apache/bifromq/retain/store/RetainStoreCoProc.java @@ -43,10 +43,10 @@ import org.apache.bifromq.basehlc.HLC; import org.apache.bifromq.basekv.proto.Boundary; import org.apache.bifromq.basekv.proto.KVRangeId; -import org.apache.bifromq.basekv.store.api.IKVCloseableReader; import org.apache.bifromq.basekv.store.api.IKVIterator; import org.apache.bifromq.basekv.store.api.IKVRangeCoProc; -import org.apache.bifromq.basekv.store.api.IKVReader; +import org.apache.bifromq.basekv.store.api.IKVRangeReader; +import org.apache.bifromq.basekv.store.api.IKVRangeRefreshableReader; import org.apache.bifromq.basekv.store.api.IKVWriter; import org.apache.bifromq.basekv.store.proto.ROCoProcInput; import org.apache.bifromq.basekv.store.proto.ROCoProcOutput; @@ -74,7 +74,7 @@ @Slf4j class RetainStoreCoProc implements IKVRangeCoProc { - private final Supplier rangeReaderProvider; + private final Supplier rangeReaderProvider; private final TenantsStats tenantsStats; private final String[] tags; private RetainTopicIndex index; @@ -82,14 +82,14 @@ class RetainStoreCoProc implements IKVRangeCoProc { RetainStoreCoProc(String clusterId, String storeId, KVRangeId id, - Supplier rangeReaderProvider) { + Supplier rangeReaderProvider) { this.tags = new String[] {"clusterId", clusterId, "storeId", storeId, "rangeId", KVRangeIdUtil.toString(id)}; this.rangeReaderProvider = rangeReaderProvider; - this.tenantsStats = new TenantsStats(rangeReaderProvider.get(), tags); + this.tenantsStats = new TenantsStats(rangeReaderProvider, tags); } @Override - public CompletableFuture query(ROCoProcInput input, IKVReader reader) { + public CompletableFuture query(ROCoProcInput input, IKVRangeReader reader) { RetainServiceROCoProcInput coProcInput = input.getRetainService(); return switch (coProcInput.getTypeCase()) { case BATCHMATCH -> batchMatch(coProcInput.getBatchMatch(), reader) @@ -106,7 +106,8 @@ public CompletableFuture query(ROCoProcInput input, IKVReader re @SneakyThrows @Override - public Supplier mutate(RWCoProcInput input, IKVReader reader, IKVWriter writer, boolean isLeader) { + public Supplier mutate(RWCoProcInput input, IKVRangeReader reader, IKVWriter writer, + boolean isLeader) { RetainServiceRWCoProcInput coProcInput = input.getRetainService(); RetainServiceRWCoProcOutput.Builder outputBuilder = RetainServiceRWCoProcOutput.newBuilder(); AtomicReference afterMutate = new AtomicReference<>(); @@ -143,10 +144,10 @@ public void onLeader(boolean isLeader) { @Override public void close() { index = null; - tenantsStats.destroy(); + tenantsStats.close(); } - private CompletableFuture batchMatch(BatchMatchRequest request, IKVReader reader) { + private CompletableFuture batchMatch(BatchMatchRequest request, IKVRangeReader reader) { BatchMatchReply.Builder replyBuilder = BatchMatchReply.newBuilder().setReqId(request.getReqId()); for (String tenantId : request.getMatchParamsMap().keySet()) { MatchResultPack.Builder resultPackBuilder = MatchResultPack.newBuilder(); @@ -167,7 +168,7 @@ private List match(String tenantId, String topicFilter, int limit, long now, - IKVReader reader) { + IKVRangeReader reader) { if (limit == 0) { return emptyList(); } @@ -277,10 +278,9 @@ private Runnable gc(GCRequest request, GCReply.Builder replyBuilder, boolean isL private void load() { index = new RetainTopicIndex(); - tenantsStats.destroy(); + tenantsStats.reset(); - try (IKVCloseableReader reader = rangeReaderProvider.get()) { - IKVIterator itr = reader.iterator(); + try (IKVRangeRefreshableReader reader = rangeReaderProvider.get(); IKVIterator itr = reader.iterator()) { for (itr.seekToFirst(); itr.isValid(); itr.next()) { try { String tenantId = parseTenantId(itr.key()); diff --git a/bifromq-retain/bifromq-retain-store/src/main/java/org/apache/bifromq/retain/store/RetainStoreCoProcFactory.java b/bifromq-retain/bifromq-retain-store/src/main/java/org/apache/bifromq/retain/store/RetainStoreCoProcFactory.java index 46fe0da45..42d5b3ea6 100644 --- a/bifromq-retain/bifromq-retain-store/src/main/java/org/apache/bifromq/retain/store/RetainStoreCoProcFactory.java +++ b/bifromq-retain/bifromq-retain-store/src/main/java/org/apache/bifromq/retain/store/RetainStoreCoProcFactory.java @@ -19,39 +19,22 @@ package org.apache.bifromq.retain.store; +import java.util.function.Supplier; import org.apache.bifromq.basekv.proto.KVRangeId; -import org.apache.bifromq.basekv.store.api.IKVCloseableReader; import org.apache.bifromq.basekv.store.api.IKVRangeCoProc; import org.apache.bifromq.basekv.store.api.IKVRangeCoProcFactory; -import org.apache.bifromq.basekv.store.api.IKVRangeSplitHinter; -import org.apache.bifromq.basekv.store.range.hinter.MutationKVLoadBasedSplitHinter; -import org.apache.bifromq.basekv.utils.KVRangeIdUtil; -import java.time.Duration; -import java.util.Collections; -import java.util.List; -import java.util.Optional; -import java.util.function.Supplier; +import org.apache.bifromq.basekv.store.api.IKVRangeRefreshableReader; public class RetainStoreCoProcFactory implements IKVRangeCoProcFactory { - private final Duration loadEstWindow; - public RetainStoreCoProcFactory(Duration loadEstimateWindow) { - this.loadEstWindow = loadEstimateWindow; - } - - @Override - public List createHinters(String clusterId, String storeId, KVRangeId id, - Supplier rangeReaderProvider) { - return Collections.singletonList( - new MutationKVLoadBasedSplitHinter(loadEstWindow, Optional::of, - "clusterId", clusterId, "storeId", storeId, "rangeId", KVRangeIdUtil.toString(id))); + public RetainStoreCoProcFactory() { } @Override public IKVRangeCoProc createCoProc(String clusterId, String storeId, KVRangeId id, - Supplier rangeReaderProvider) { + Supplier rangeReaderProvider) { return new RetainStoreCoProc(clusterId, storeId, id, rangeReaderProvider); } diff --git a/bifromq-retain/bifromq-retain-store/src/main/java/org/apache/bifromq/retain/store/TenantStats.java b/bifromq-retain/bifromq-retain-store/src/main/java/org/apache/bifromq/retain/store/TenantStats.java index ab65e3c58..10ce170b0 100644 --- a/bifromq-retain/bifromq-retain-store/src/main/java/org/apache/bifromq/retain/store/TenantStats.java +++ b/bifromq-retain/bifromq-retain-store/src/main/java/org/apache/bifromq/retain/store/TenantStats.java @@ -19,38 +19,23 @@ package org.apache.bifromq.retain.store; -import static org.apache.bifromq.basekv.utils.BoundaryUtil.intersect; -import static org.apache.bifromq.basekv.utils.BoundaryUtil.isNULLRange; -import static org.apache.bifromq.basekv.utils.BoundaryUtil.toBoundary; -import static org.apache.bifromq.basekv.utils.BoundaryUtil.upperBound; import static org.apache.bifromq.metrics.TenantMetric.MqttRetainNumGauge; import static org.apache.bifromq.metrics.TenantMetric.MqttRetainSpaceGauge; -import static org.apache.bifromq.retain.store.schema.KVSchemaUtil.tenantBeginKey; -import com.google.protobuf.ByteString; import java.util.concurrent.atomic.AtomicLong; -import org.apache.bifromq.basekv.proto.Boundary; -import org.apache.bifromq.basekv.store.api.IKVReader; +import java.util.function.Supplier; import org.apache.bifromq.metrics.ITenantMeter; -public class TenantStats { +class TenantStats { private final AtomicLong topicCount = new AtomicLong(); private final String tenantId; private final String[] tags; private boolean isLeader; - public TenantStats(String tenantId, IKVReader reader, String... tags) { + TenantStats(String tenantId, Supplier usedSpaceGetter, String... tags) { this.tenantId = tenantId; this.tags = tags; - ITenantMeter.gauging(tenantId, MqttRetainSpaceGauge, () -> { - ByteString tenantBeginKey = tenantBeginKey(tenantId); - Boundary tenantBoundary = - intersect(toBoundary(tenantBeginKey, upperBound(tenantBeginKey)), reader.boundary()); - if (isNULLRange(tenantBoundary)) { - return 0L; - } - return reader.size(tenantBoundary); - }, tags); + ITenantMeter.gauging(tenantId, MqttRetainSpaceGauge, usedSpaceGetter, tags); } public long incrementTopicCount(int delta) { diff --git a/bifromq-retain/bifromq-retain-store/src/main/java/org/apache/bifromq/retain/store/TenantsStats.java b/bifromq-retain/bifromq-retain-store/src/main/java/org/apache/bifromq/retain/store/TenantsStats.java index ee05148b8..92f2c13ae 100644 --- a/bifromq-retain/bifromq-retain-store/src/main/java/org/apache/bifromq/retain/store/TenantsStats.java +++ b/bifromq-retain/bifromq-retain-store/src/main/java/org/apache/bifromq/retain/store/TenantsStats.java @@ -19,24 +19,37 @@ package org.apache.bifromq.retain.store; +import static org.apache.bifromq.basekv.utils.BoundaryUtil.intersect; +import static org.apache.bifromq.basekv.utils.BoundaryUtil.isNULLRange; +import static org.apache.bifromq.basekv.utils.BoundaryUtil.toBoundary; +import static org.apache.bifromq.basekv.utils.BoundaryUtil.upperBound; +import static org.apache.bifromq.retain.store.schema.KVSchemaUtil.tenantBeginKey; + +import com.google.protobuf.ByteString; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; -import org.apache.bifromq.basekv.store.api.IKVReader; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.locks.StampedLock; +import java.util.function.Supplier; +import org.apache.bifromq.basekv.proto.Boundary; +import org.apache.bifromq.basekv.store.api.IKVRangeRefreshableReader; class TenantsStats { private final Map retainedSet = new ConcurrentHashMap<>(); - private final IKVReader reader; + private final Supplier readerSupplier; private final String[] tags; + private final AtomicBoolean closed = new AtomicBoolean(false); + private final StampedLock closeLock = new StampedLock(); - TenantsStats(IKVReader reader, String... tags) { - this.reader = reader; + TenantsStats(Supplier readerSupplier, String... tags) { + this.readerSupplier = readerSupplier; this.tags = tags; } void increaseTopicCount(String tenantId, int delta) { retainedSet.compute(tenantId, (k, v) -> { if (v == null) { - v = new TenantStats(tenantId, reader, tags); + v = new TenantStats(tenantId, getTenantUsedSpaceProvider(tenantId), tags); } if (v.incrementTopicCount(delta) == 0) { v.destroy(); @@ -50,8 +63,49 @@ public void toggleMetering(boolean isLeader) { retainedSet.values().forEach(s -> s.toggleMetering(isLeader)); } - void destroy() { - retainedSet.values().forEach(TenantStats::destroy); - retainedSet.clear(); + private Supplier getTenantUsedSpaceProvider(String tenantId) { + return () -> { + long stamped = closeLock.readLock(); + if (closed.get()) { + closeLock.unlock(stamped); + return 0; + } + ByteString tenantBeginKey = tenantBeginKey(tenantId); + try (IKVRangeRefreshableReader reader = readerSupplier.get()) { + Boundary tenantBoundary = + intersect(toBoundary(tenantBeginKey, upperBound(tenantBeginKey)), reader.boundary()); + if (isNULLRange(tenantBoundary)) { + return 0L; + } + return reader.size(tenantBoundary); + } finally { + closeLock.unlock(stamped); + } + }; + } + + void reset() { + // clear gauges without marking closed + long stamp = closeLock.writeLock(); + try { + if (!closed.get()) { + retainedSet.values().forEach(TenantStats::destroy); + retainedSet.clear(); + } + } finally { + closeLock.unlock(stamp); + } + } + + void close() { + long stamp = closeLock.writeLock(); + try { + if (closed.compareAndSet(false, true)) { + retainedSet.values().forEach(TenantStats::destroy); + retainedSet.clear(); + } + } finally { + closeLock.unlock(stamp); + } } } diff --git a/bifromq-retain/bifromq-retain-store/src/test/java/org/apache/bifromq/retain/store/RetainStoreTest.java b/bifromq-retain/bifromq-retain-store/src/test/java/org/apache/bifromq/retain/store/RetainStoreTest.java index a0d53985d..42a8bea72 100644 --- a/bifromq-retain/bifromq-retain-store/src/test/java/org/apache/bifromq/retain/store/RetainStoreTest.java +++ b/bifromq-retain/bifromq-retain-store/src/test/java/org/apache/bifromq/retain/store/RetainStoreTest.java @@ -21,6 +21,9 @@ import static org.apache.bifromq.basekv.client.KVRangeRouterUtil.findByBoundary; import static org.apache.bifromq.basekv.client.KVRangeRouterUtil.findByKey; +import static org.apache.bifromq.basekv.localengine.StructUtil.toValue; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.DB_CHECKPOINT_ROOT_DIR; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.DB_ROOT_DIR; import static org.apache.bifromq.basekv.utils.BoundaryUtil.FULL_BOUNDARY; import static org.apache.bifromq.metrics.TenantMetric.MqttRetainNumGauge; import static org.apache.bifromq.metrics.TenantMetric.MqttRetainSpaceGauge; @@ -29,6 +32,7 @@ import static org.testng.Assert.assertTrue; import com.google.protobuf.ByteString; +import com.google.protobuf.Struct; import io.micrometer.core.instrument.Gauge; import io.micrometer.core.instrument.Meter; import io.micrometer.core.instrument.Metrics; @@ -55,8 +59,6 @@ import org.apache.bifromq.basehlc.HLC; import org.apache.bifromq.basekv.client.IBaseKVStoreClient; import org.apache.bifromq.basekv.client.KVRangeSetting; -import org.apache.bifromq.basekv.localengine.rocksdb.RocksDBCPableKVEngineConfigurator; -import org.apache.bifromq.basekv.localengine.rocksdb.RocksDBWALableKVEngineConfigurator; import org.apache.bifromq.basekv.metaservice.IBaseKVMetaService; import org.apache.bifromq.basekv.store.option.KVRangeStoreOptions; import org.apache.bifromq.basekv.store.proto.KVRangeROReply; @@ -131,16 +133,25 @@ public void setup() throws IOException { String uuid = UUID.randomUUID().toString(); options = new KVRangeStoreOptions(); - ((RocksDBCPableKVEngineConfigurator) options.getDataEngineConfigurator()).dbCheckpointRootDir( - Paths.get(dbRootDir.toString(), DB_CHECKPOINT_DIR_NAME, uuid).toString()) - .dbRootDir(Paths.get(dbRootDir.toString(), DB_NAME, uuid).toString()); - ((RocksDBWALableKVEngineConfigurator) options.getWalEngineConfigurator()).dbRootDir( - Paths.get(dbRootDir.toString(), DB_WAL_NAME, uuid).toString()); + Struct dataConf = options.getDataEngineConf().toBuilder() + .putFields(DB_ROOT_DIR, toValue(Paths.get(dbRootDir.toString(), DB_NAME, uuid).toString())) + .putFields(DB_CHECKPOINT_ROOT_DIR, + toValue(Paths.get(dbRootDir.toString(), DB_CHECKPOINT_DIR_NAME, uuid).toString())) + .build(); + options.setDataEngineType(options.getDataEngineType()); + options.setDataEngineConf(dataConf); + Struct walConf = options.getWalEngineConf().toBuilder() + .putFields(DB_ROOT_DIR, toValue(Paths.get(dbRootDir.toString(), DB_WAL_NAME, uuid).toString())) + .build(); + options.setWalEngineType(options.getWalEngineType()); + options.setWalEngineConf(walConf); bgTaskExecutor = new ScheduledThreadPoolExecutor(1, EnvProvider.INSTANCE.newThreadFactory("bg-task-executor")); - storeClient = - IBaseKVStoreClient.newBuilder().clusterId(IRetainStore.CLUSTER_NAME).trafficService(trafficService) - .metaService(metaService).build(); + storeClient = IBaseKVStoreClient.newBuilder() + .clusterId(IRetainStore.CLUSTER_NAME) + .trafficService(trafficService) + .metaService(metaService) + .build(); buildStoreServer(); rpcServer.start(); await().forever().until(() -> BoundaryUtil.isValidSplitSet(storeClient.latestEffectiveRouter().keySet())); diff --git a/bifromq-retain/bifromq-retain-store/src/test/java/org/apache/bifromq/retain/store/TenantStatsTest.java b/bifromq-retain/bifromq-retain-store/src/test/java/org/apache/bifromq/retain/store/TenantStatsTest.java index cde3c3868..0c589e9de 100644 --- a/bifromq-retain/bifromq-retain-store/src/test/java/org/apache/bifromq/retain/store/TenantStatsTest.java +++ b/bifromq-retain/bifromq-retain-store/src/test/java/org/apache/bifromq/retain/store/TenantStatsTest.java @@ -19,13 +19,7 @@ package org.apache.bifromq.retain.store; -import static org.apache.bifromq.basekv.utils.BoundaryUtil.FULL_BOUNDARY; -import static org.apache.bifromq.basekv.utils.BoundaryUtil.intersect; -import static org.apache.bifromq.basekv.utils.BoundaryUtil.toBoundary; -import static org.apache.bifromq.basekv.utils.BoundaryUtil.upperBound; -import static org.apache.bifromq.retain.store.schema.KVSchemaUtil.tenantBeginKey; import static org.awaitility.Awaitility.await; -import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; import static org.testng.Assert.assertEquals; @@ -36,7 +30,7 @@ import io.micrometer.core.instrument.Metrics; import io.micrometer.core.instrument.simple.SimpleMeterRegistry; import java.util.Optional; -import org.apache.bifromq.basekv.store.api.IKVReader; +import java.util.function.Supplier; import org.apache.bifromq.metrics.ITenantMeter; import org.apache.bifromq.metrics.TenantMetric; import org.testng.annotations.AfterMethod; @@ -45,12 +39,12 @@ public class TenantStatsTest { private SimpleMeterRegistry meterRegistry; - private IKVReader reader; + private Supplier usedSpaceProvider; @BeforeMethod public void setup() { - reader = mock(IKVReader.class); + usedSpaceProvider = mock(Supplier.class); meterRegistry = new SimpleMeterRegistry(); Metrics.globalRegistry.add(meterRegistry); } @@ -64,11 +58,8 @@ public void tearDown() { @Test public void metricValue() { String tenantId = "tenant" + System.nanoTime(); - when(reader.boundary()).thenReturn(FULL_BOUNDARY); - when(reader.size( - eq(intersect(FULL_BOUNDARY, toBoundary(tenantBeginKey(tenantId), upperBound(tenantBeginKey(tenantId))))))) - .thenReturn(10L); - TenantStats tenantStats = new TenantStats(tenantId, reader); + when(usedSpaceProvider.get()).thenReturn(10L); + TenantStats tenantStats = new TenantStats(tenantId, usedSpaceProvider); tenantStats.incrementTopicCount(10); assertGaugeValue(tenantId, TenantMetric.MqttRetainSpaceGauge, 10); assertNoGauge(tenantId, TenantMetric.MqttRetainNumGauge); diff --git a/bifromq-retain/bifromq-retain-store/src/test/java/org/apache/bifromq/retain/store/TenantsStatsTest.java b/bifromq-retain/bifromq-retain-store/src/test/java/org/apache/bifromq/retain/store/TenantsStatsTest.java index e57d10c1e..f4050c4fb 100644 --- a/bifromq-retain/bifromq-retain-store/src/test/java/org/apache/bifromq/retain/store/TenantsStatsTest.java +++ b/bifromq-retain/bifromq-retain-store/src/test/java/org/apache/bifromq/retain/store/TenantsStatsTest.java @@ -20,6 +20,7 @@ package org.apache.bifromq.retain.store; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertTrue; @@ -27,7 +28,8 @@ import io.micrometer.core.instrument.Metrics; import io.micrometer.core.instrument.simple.SimpleMeterRegistry; import java.util.Optional; -import org.apache.bifromq.basekv.store.api.IKVReader; +import java.util.function.Supplier; +import org.apache.bifromq.basekv.store.api.IKVRangeRefreshableReader; import org.apache.bifromq.metrics.ITenantMeter; import org.apache.bifromq.metrics.TenantMetric; import org.testng.annotations.AfterMethod; @@ -37,12 +39,15 @@ public class TenantsStatsTest { String tenantId = "tenant-" + System.nanoTime(); private SimpleMeterRegistry meterRegistry; - private IKVReader reader; + private IKVRangeRefreshableReader reader; + private Supplier readerSupplier; @BeforeMethod public void setup() { - reader = mock(IKVReader.class); + reader = mock(IKVRangeRefreshableReader.class); + readerSupplier = mock(Supplier.class); + when(readerSupplier.get()).thenReturn(reader); meterRegistry = new SimpleMeterRegistry(); Metrics.globalRegistry.add(meterRegistry); } @@ -55,7 +60,7 @@ public void tearDown() { @Test public void increaseTopicCount() { - TenantsStats tenantsStats = new TenantsStats(reader); + TenantsStats tenantsStats = new TenantsStats(readerSupplier); assertNoGauge(tenantId, TenantMetric.MqttRetainNumGauge); assertNoGauge(tenantId, TenantMetric.MqttRetainSpaceGauge); tenantsStats.increaseTopicCount(tenantId, 1); @@ -66,7 +71,7 @@ public void increaseTopicCount() { @Test public void decreaseTopicCount() { - TenantsStats tenantsStats = new TenantsStats(reader); + TenantsStats tenantsStats = new TenantsStats(readerSupplier); tenantsStats.increaseTopicCount(tenantId, 1); tenantsStats.toggleMetering(true); assertGauge(tenantId, TenantMetric.MqttRetainNumGauge); @@ -79,14 +84,14 @@ public void decreaseTopicCount() { } @Test - public void destroy() { - TenantsStats tenantsStats = new TenantsStats(reader); + public void close() { + TenantsStats tenantsStats = new TenantsStats(readerSupplier); tenantsStats.increaseTopicCount(tenantId, 1); tenantsStats.toggleMetering(true); assertGauge(tenantId, TenantMetric.MqttRetainNumGauge); assertGauge(tenantId, TenantMetric.MqttRetainSpaceGauge); - tenantsStats.destroy(); + tenantsStats.close(); assertNoGauge(tenantId, TenantMetric.MqttRetainNumGauge); assertNoGauge(tenantId, TenantMetric.MqttRetainSpaceGauge); } diff --git a/bifromq-sysprops/src/main/java/org/apache/bifromq/sysprops/props/DistWorkerFanOutSplitThreshold.java b/bifromq-sysprops/src/main/java/org/apache/bifromq/sysprops/props/DistWorkerFanOutSplitThreshold.java deleted file mode 100644 index a8b279d98..000000000 --- a/bifromq-sysprops/src/main/java/org/apache/bifromq/sysprops/props/DistWorkerFanOutSplitThreshold.java +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.bifromq.sysprops.props; - -import org.apache.bifromq.sysprops.BifroMQSysProp; -import org.apache.bifromq.sysprops.parser.IntegerParser; - -/** - * The fan-out routes threshold for splitting range in dist worker. - */ -public final class DistWorkerFanOutSplitThreshold extends BifroMQSysProp { - public static final DistWorkerFanOutSplitThreshold INSTANCE = new DistWorkerFanOutSplitThreshold(); - - private DistWorkerFanOutSplitThreshold() { - super("dist_worker_fanout_split_threshold", 100000, IntegerParser.POSITIVE); - } -} diff --git a/bifromq-sysprops/src/main/java/org/apache/bifromq/sysprops/props/InboxStoreLoadEstimationWindowSeconds.java b/bifromq-sysprops/src/main/java/org/apache/bifromq/sysprops/props/InboxStoreLoadEstimationWindowSeconds.java deleted file mode 100644 index 5ae5e4767..000000000 --- a/bifromq-sysprops/src/main/java/org/apache/bifromq/sysprops/props/InboxStoreLoadEstimationWindowSeconds.java +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.bifromq.sysprops.props; - -import org.apache.bifromq.sysprops.BifroMQSysProp; -import org.apache.bifromq.sysprops.parser.LongParser; - -/** - * The window size in seconds for estimating the load of the inbox store. - */ -public final class InboxStoreLoadEstimationWindowSeconds extends BifroMQSysProp { - public static final InboxStoreLoadEstimationWindowSeconds INSTANCE = new InboxStoreLoadEstimationWindowSeconds(); - - private InboxStoreLoadEstimationWindowSeconds() { - super("inbox_store_load_estimation_window_seconds", 5L, LongParser.POSITIVE); - } -} diff --git a/bifromq-sysprops/src/main/java/org/apache/bifromq/sysprops/props/RetainStoreLoadEstimationWindowSeconds.java b/bifromq-sysprops/src/main/java/org/apache/bifromq/sysprops/props/RetainStoreLoadEstimationWindowSeconds.java deleted file mode 100644 index ee5cad8fc..000000000 --- a/bifromq-sysprops/src/main/java/org/apache/bifromq/sysprops/props/RetainStoreLoadEstimationWindowSeconds.java +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.bifromq.sysprops.props; - -import org.apache.bifromq.sysprops.BifroMQSysProp; -import org.apache.bifromq.sysprops.parser.LongParser; - -/** - * The window size in seconds for estimating the load of the retain store. - */ -public final class RetainStoreLoadEstimationWindowSeconds extends BifroMQSysProp { - public static final RetainStoreLoadEstimationWindowSeconds INSTANCE = new RetainStoreLoadEstimationWindowSeconds(); - - private RetainStoreLoadEstimationWindowSeconds() { - super("retain_store_load_estimation_window_seconds", 5L, LongParser.POSITIVE); - } -} diff --git a/build/build-bifromq-starter/pom.xml b/build/build-bifromq-starter/pom.xml index 696a2c8de..3535b6b2d 100644 --- a/build/build-bifromq-starter/pom.xml +++ b/build/build-bifromq-starter/pom.xml @@ -31,6 +31,10 @@ build-bifromq-starter + + org.apache.bifromq + base-hookloader + commons-cli @@ -227,4 +231,4 @@ - \ No newline at end of file + diff --git a/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/config/StandaloneConfigConsolidator.java b/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/config/StandaloneConfigConsolidator.java index 58dd91b5d..d2c20e9df 100644 --- a/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/config/StandaloneConfigConsolidator.java +++ b/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/config/StandaloneConfigConsolidator.java @@ -14,34 +14,92 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.starter.config; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.DB_CHECKPOINT_ROOT_DIR; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.DB_ROOT_DIR; + import com.google.common.base.Strings; +import com.google.protobuf.Struct; +import com.google.protobuf.Value; import io.netty.handler.ssl.util.SelfSignedCertificate; import java.net.InetAddress; import java.net.NetworkInterface; +import java.nio.file.Paths; import java.security.cert.CertificateException; import java.util.Enumeration; +import java.util.HashMap; +import java.util.Map; import lombok.SneakyThrows; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.basehookloader.BaseHookLoader; +import org.apache.bifromq.basekv.localengine.StructUtil; +import org.apache.bifromq.basekv.localengine.spi.IKVEngineProvider; import org.apache.bifromq.starter.config.model.ClusterConfig; +import org.apache.bifromq.starter.config.model.EngineConfig; import org.apache.bifromq.starter.config.model.RPCConfig; import org.apache.bifromq.starter.config.model.SSLContextConfig; import org.apache.bifromq.starter.config.model.ServerSSLContextConfig; import org.apache.bifromq.starter.config.model.api.APIServerConfig; +import org.apache.bifromq.starter.config.model.dist.DistWorkerConfig; +import org.apache.bifromq.starter.config.model.inbox.InboxStoreConfig; import org.apache.bifromq.starter.config.model.mqtt.MQTTServerConfig; +import org.apache.bifromq.starter.config.model.retain.RetainStoreConfig; @Slf4j public class StandaloneConfigConsolidator { + private static final String USER_DIR_PROP = "user.dir"; + private static final String DATA_DIR_PROP = "DATA_DIR"; + private static final String DATA_PATH_ROOT = "dataPathRoot"; public static void consolidate(StandaloneConfig config) { consolidateClusterConfig(config); consolidateMQTTServerConfig(config); consolidateRPCConfig(config); consolidateAPIServerConfig(config); + consolidateEngineConfigs(config); + } + + private static void consolidateEngineConfigs(StandaloneConfig config) { + // Dist + DistWorkerConfig distWorker = config.getDistServiceConfig().getWorker(); + if (distWorker != null) { + consolidateEngine(distWorker.getDataEngineConfig(), true, "dist_data"); + consolidateEngine(distWorker.getWalEngineConfig(), false, "dist_wal"); + } + // Inbox + InboxStoreConfig inboxStore = config.getInboxServiceConfig().getStore(); + if (inboxStore != null) { + consolidateEngine(inboxStore.getDataEngineConfig(), true, "inbox_data"); + consolidateEngine(inboxStore.getWalEngineConfig(), false, "inbox_wal"); + } + // Retain + RetainStoreConfig retainStore = config.getRetainServiceConfig().getStore(); + if (retainStore != null) { + consolidateEngine(retainStore.getDataEngineConfig(), true, "retain_data"); + consolidateEngine(retainStore.getWalEngineConfig(), false, "retain_wal"); + } + } + + private static void consolidateEngine(EngineConfig cfg, boolean cpable, String name) { + if (cfg == null) { + cfg = new EngineConfig(); + } + String type = cfg.getType(); + if (Strings.isNullOrEmpty(type)) { + type = "rocksdb"; + cfg.setType(type); + } + IKVEngineProvider provider = findProvider(type); + Struct base = cpable ? provider.defaultsForCPable() : provider.defaultsForWALable(); + Map override = cfg; + Map merged = overlay(base, override); + derivePathsIfNeeded(cfg, merged, cpable, name); + cfg.clear(); + cfg.setProps(merged); } private static void consolidateClusterConfig(StandaloneConfig config) { @@ -179,4 +237,91 @@ private static ServerSSLContextConfig genSelfSignedServerCert() throws Certifica sslContextConfig.setKeyFile(selfCert.privateKey().getAbsolutePath()); return sslContextConfig; } + + private static IKVEngineProvider findProvider(String type) { + Map providers = BaseHookLoader.load(IKVEngineProvider.class); + IKVEngineProvider found = null; + for (IKVEngineProvider p : providers.values()) { + if (p.type().equalsIgnoreCase(type)) { + if (found != null) { + throw new IllegalStateException("Duplicate storage engine provider type: " + type); + } + found = p; + } + } + if (found == null) { + throw new IllegalArgumentException("Unsupported storage engine type: " + type); + } + return found; + } + + private static Map overlay(Struct base, Map override) { + Map merged = new HashMap<>(); + base.getFieldsMap().forEach((k, defVal) -> { + if (override.containsKey(k)) { + Value newVal = StructUtil.toValue(override.get(k)); + if (defVal.getKindCase() != newVal.getKindCase()) { + log.warn("Invalid engine config value type: {}, required={}", newVal.getKindCase(), + defVal.getKindCase()); + merged.put(k, normalizeNumber(StructUtil.fromValue(defVal))); + } else { + merged.put(k, normalizeNumber(StructUtil.fromValue(newVal))); + } + } else { + merged.put(k, normalizeNumber(StructUtil.fromValue(defVal))); + } + }); + override.forEach((k, v) -> { + if (!merged.containsKey(k) && !k.equals(DATA_PATH_ROOT)) { + log.warn("Unrecognized engine config: {}={}", k, v); + } + }); + return merged; + } + + private static Object normalizeNumber(Object v) { + if (v instanceof Number) { + double d = ((Number) v).doubleValue(); + if (Double.isFinite(d)) { + long l = (long) d; + if (d == l) { + if (l >= Integer.MIN_VALUE && l <= Integer.MAX_VALUE) { + return (int) l; + } + return l; + } + } + } + return v; + } + + private static void derivePathsIfNeeded(EngineConfig cfg, + Map completeConf, + boolean cpable, + String name) { + if (!"rocksdb".equalsIgnoreCase(cfg.getType())) { + return; + } + String dataRootPath; + if (cfg.containsKey(DATA_PATH_ROOT)) { + // fill back data path root + completeConf.put(DATA_PATH_ROOT, cfg.get(DATA_PATH_ROOT)); + if (Paths.get((String) cfg.get(DATA_PATH_ROOT)).isAbsolute()) { + dataRootPath = (String) cfg.get(DATA_PATH_ROOT); + } else { + String userDir = System.getProperty(USER_DIR_PROP); + String dataDir = System.getProperty(DATA_DIR_PROP, userDir); + dataRootPath = Paths.get(dataDir, (String) cfg.get(DATA_PATH_ROOT)).toAbsolutePath() + .toString(); + } + } else { + String userDir = System.getProperty(USER_DIR_PROP); + String dataDir = System.getProperty(DATA_DIR_PROP, userDir); + dataRootPath = Paths.get(dataDir).toAbsolutePath().toString(); + } + completeConf.put(DB_ROOT_DIR, Paths.get(dataRootPath, name).toString()); + if (cpable) { + completeConf.put(DB_CHECKPOINT_ROOT_DIR, Paths.get(dataRootPath, name + "_cp").toString()); + } + } } diff --git a/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/config/model/EngineConfig.java b/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/config/model/EngineConfig.java new file mode 100644 index 000000000..5649472d5 --- /dev/null +++ b/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/config/model/EngineConfig.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.starter.config.model; + +import static org.apache.bifromq.basekv.localengine.StructUtil.fromMap; + +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.fasterxml.jackson.databind.annotation.JsonSerialize; +import com.google.protobuf.Struct; +import java.util.Map; +import java.util.TreeMap; +import org.apache.bifromq.starter.config.model.serde.EngineConfigDeserializer; +import org.apache.bifromq.starter.config.model.serde.EngineConfigSerializer; + +@JsonSerialize(using = EngineConfigSerializer.class) +@JsonDeserialize(using = EngineConfigDeserializer.class) +public class EngineConfig extends TreeMap { + private String type = "rocksdb"; + + public String getType() { + return type; + } + + public EngineConfig setType(String type) { + this.type = type; + return this; + } + + public EngineConfig setProps(Map props) { + if (props != null) { + this.putAll(props); + } + return this; + } + + @JsonIgnore + public Struct toStruct() { + return fromMap(this); + } +} diff --git a/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/config/model/InMemEngineConfig.java b/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/config/model/InMemEngineConfig.java deleted file mode 100644 index 57707742e..000000000 --- a/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/config/model/InMemEngineConfig.java +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.bifromq.starter.config.model; - -public class InMemEngineConfig extends StorageEngineConfig { -} diff --git a/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/config/model/RocksDBEngineConfig.java b/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/config/model/RocksDBEngineConfig.java deleted file mode 100644 index ca3baa870..000000000 --- a/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/config/model/RocksDBEngineConfig.java +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.bifromq.starter.config.model; - -import static java.lang.Math.max; - -import lombok.Getter; -import lombok.Setter; -import lombok.experimental.Accessors; -import org.apache.bifromq.baseenv.EnvProvider; -import org.rocksdb.util.SizeUnit; - -@Getter -@Setter -@Accessors(chain = true) -public class RocksDBEngineConfig extends StorageEngineConfig { - private String dataPathRoot = ""; - private boolean manualCompaction = false; - private int compactMinTombstoneKeys = 200000; - private int compactMinTombstoneRanges = 100000; - private double compactTombstoneRatio = 0.3; // 30% - private boolean asyncWALFlush = true; // only work for wal engine - private boolean fsyncWAL = false; // only work for wal engine - private long blockCacheSize = 32 * SizeUnit.MB; - private long writeBufferSize = 128 * SizeUnit.MB; - private int maxWriteBufferNumber = 6; - private int minWriteBufferNumberToMerge = 2; - private long minBlobSize = 2 * SizeUnit.KB; - private int increaseParallelism = max(EnvProvider.INSTANCE.availableProcessors(), 2); - private int maxBackgroundJobs = max(EnvProvider.INSTANCE.availableProcessors(), 2); - private int level0FileNumCompactionTrigger = 8; - private int level0SlowdownWritesTrigger = 20; - private int level0StopWritesTrigger = 24; - private long maxBytesForLevelBase = writeBufferSize * minWriteBufferNumberToMerge * level0FileNumCompactionTrigger; - private long targetFileSizeBase = maxBytesForLevelBase / 10; -} diff --git a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVEngineConfigurator.java b/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/config/model/SplitHinterOptions.java similarity index 53% rename from base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVEngineConfigurator.java rename to build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/config/model/SplitHinterOptions.java index 8df761e10..90d08495b 100644 --- a/base-kv/base-kv-local-engine/src/main/java/org/apache/bifromq/basekv/localengine/rocksdb/RocksDBCPableKVEngineConfigurator.java +++ b/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/config/model/SplitHinterOptions.java @@ -17,27 +17,26 @@ * under the License. */ -package org.apache.bifromq.basekv.localengine.rocksdb; +package org.apache.bifromq.starter.config.model; -import org.apache.bifromq.basekv.localengine.ICPableKVEngineConfigurator; +import com.fasterxml.jackson.annotation.JsonSetter; +import com.fasterxml.jackson.annotation.Nulls; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.fasterxml.jackson.databind.annotation.JsonSerialize; +import com.google.protobuf.Struct; +import java.util.HashMap; +import java.util.Map; import lombok.Getter; import lombok.Setter; -import lombok.experimental.Accessors; -import lombok.experimental.SuperBuilder; -import org.rocksdb.DBOptions; -import org.rocksdb.DBOptionsInterface; +import org.apache.bifromq.starter.config.model.serde.StructMapDeserializer; +import org.apache.bifromq.starter.config.model.serde.StructMapSerializer; -@Accessors(chain = true, fluent = true) @Getter @Setter -@SuperBuilder(toBuilder = true) -public final class RocksDBCPableKVEngineConfigurator - extends RocksDBKVEngineConfigurator implements ICPableKVEngineConfigurator { - private String dbCheckpointRootDir; - - @Override - protected void configDBOptions(DBOptionsInterface targetOption) { - super.configDBOptions(targetOption); - targetOption.setRecycleLogFileNum(0); - } +public class SplitHinterOptions { + @JsonSetter(nulls = Nulls.SKIP) + @JsonSerialize(using = StructMapSerializer.class) + @JsonDeserialize(using = StructMapDeserializer.class) + private Map hinters = new HashMap<>(); } + diff --git a/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/config/model/dist/DistWorkerConfig.java b/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/config/model/dist/DistWorkerConfig.java index cae471974..f0a21d058 100644 --- a/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/config/model/dist/DistWorkerConfig.java +++ b/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/config/model/dist/DistWorkerConfig.java @@ -19,6 +19,10 @@ package org.apache.bifromq.starter.config.model.dist; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.COMPACT_MIN_TOMBSTONE_KEYS; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.COMPACT_MIN_TOMBSTONE_RANGES; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.MANUAL_COMPACTION; + import com.fasterxml.jackson.annotation.JsonMerge; import com.fasterxml.jackson.annotation.JsonSetter; import com.fasterxml.jackson.annotation.Nulls; @@ -29,8 +33,8 @@ import lombok.Getter; import lombok.Setter; import org.apache.bifromq.starter.config.model.BalancerOptions; -import org.apache.bifromq.starter.config.model.RocksDBEngineConfig; -import org.apache.bifromq.starter.config.model.StorageEngineConfig; +import org.apache.bifromq.starter.config.model.EngineConfig; +import org.apache.bifromq.starter.config.model.SplitHinterOptions; @Getter @Setter @@ -40,21 +44,36 @@ public class DistWorkerConfig { private int workerThreads = 0; private int tickerThreads = Math.max(1, Runtime.getRuntime().availableProcessors() / 20); private int maxWALFetchSize = 10 * 1024 * 1024; // 10MB - private int compactWALThreshold = 2500; + private int compactWALThreshold = 256 * 1024 * 1024; private int minGCIntervalSeconds = 30; // every 30 s private int maxGCIntervalSeconds = 24 * 3600; // every day @JsonSetter(nulls = Nulls.SKIP) @JsonMerge - private StorageEngineConfig dataEngineConfig = new RocksDBEngineConfig(); + private EngineConfig dataEngineConfig = new EngineConfig() + .setType("rocksdb") + .setProps(new HashMap<>() { + { + put(MANUAL_COMPACTION, true); + put(COMPACT_MIN_TOMBSTONE_KEYS, 2500); + put(COMPACT_MIN_TOMBSTONE_RANGES, 2); + } + }); @JsonSetter(nulls = Nulls.SKIP) @JsonMerge - private StorageEngineConfig walEngineConfig = new RocksDBEngineConfig() - .setManualCompaction(true) - .setCompactMinTombstoneKeys(2500) - .setCompactMinTombstoneRanges(2); + private EngineConfig walEngineConfig = new EngineConfig() + .setType("rocksdb") + .setProps(new HashMap<>() { + { + put(MANUAL_COMPACTION, true); + put(COMPACT_MIN_TOMBSTONE_KEYS, 2500); + put(COMPACT_MIN_TOMBSTONE_RANGES, 2); + } + }); @JsonSetter(nulls = Nulls.SKIP) private BalancerOptions balanceConfig = new BalancerOptions(); @JsonSetter(nulls = Nulls.SKIP) + private SplitHinterOptions splitHinterConfig = new SplitHinterOptions(); + @JsonSetter(nulls = Nulls.SKIP) private Map attributes = new HashMap<>(); public DistWorkerConfig() { @@ -65,5 +84,15 @@ public DistWorkerConfig() { .putFields("votersPerRange", Value.newBuilder().setNumberValue(3).build()) .putFields("learnersPerRange", Value.newBuilder().setNumberValue(-1).build()) .build()); + + splitHinterConfig.getHinters().put("org.apache.bifromq.dist.worker.hinter.FanoutSplitHinterFactory", + Struct.newBuilder() + .putFields("splitThreshold", Value.newBuilder().setNumberValue(100000).build()) + .build()); + splitHinterConfig.getHinters() + .put("org.apache.bifromq.basekv.store.range.hinter.MutationKVLoadBasedSplitHinterFactory", + Struct.newBuilder() + .putFields("windowSeconds", Value.newBuilder().setNumberValue(5).build()) + .build()); } } diff --git a/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/config/model/inbox/InboxStoreClientConfig.java b/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/config/model/inbox/InboxStoreClientConfig.java index b1e0c6d45..1da8f39aa 100644 --- a/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/config/model/inbox/InboxStoreClientConfig.java +++ b/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/config/model/inbox/InboxStoreClientConfig.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.starter.config.model.inbox; diff --git a/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/config/model/inbox/InboxStoreConfig.java b/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/config/model/inbox/InboxStoreConfig.java index 627a51764..64078075b 100644 --- a/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/config/model/inbox/InboxStoreConfig.java +++ b/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/config/model/inbox/InboxStoreConfig.java @@ -19,6 +19,10 @@ package org.apache.bifromq.starter.config.model.inbox; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.COMPACT_MIN_TOMBSTONE_KEYS; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.COMPACT_MIN_TOMBSTONE_RANGES; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.MANUAL_COMPACTION; + import com.fasterxml.jackson.annotation.JsonMerge; import com.fasterxml.jackson.annotation.JsonSetter; import com.fasterxml.jackson.annotation.Nulls; @@ -30,34 +34,50 @@ import lombok.Setter; import org.apache.bifromq.baseenv.EnvProvider; import org.apache.bifromq.starter.config.model.BalancerOptions; -import org.apache.bifromq.starter.config.model.RocksDBEngineConfig; -import org.apache.bifromq.starter.config.model.StorageEngineConfig; +import org.apache.bifromq.starter.config.model.EngineConfig; +import org.apache.bifromq.starter.config.model.SplitHinterOptions; @Getter @Setter public class InboxStoreConfig { + private boolean enable = true; // 0 means use calling thread private int workerThreads = 0; private int tickerThreads = Math.max(1, Runtime.getRuntime().availableProcessors() / 20); - private int queryPipelinePerStore = 100; private int maxWALFetchSize = -1; // no limit - private int compactWALThreshold = 10000; + private int compactWALThreshold = 256 * 1024 * 1024; // size threshold in bytes private int expireRateLimit = 1000; private int gcIntervalSeconds = 600; @JsonSetter(nulls = Nulls.SKIP) @JsonMerge - private StorageEngineConfig dataEngineConfig = new RocksDBEngineConfig(); + private EngineConfig dataEngineConfig = new EngineConfig() + .setType("rocksdb") + .setProps(new HashMap<>() { + { + put(MANUAL_COMPACTION, true); + put(COMPACT_MIN_TOMBSTONE_KEYS, 50000); + put(COMPACT_MIN_TOMBSTONE_RANGES, 100); + } + }); @JsonSetter(nulls = Nulls.SKIP) @JsonMerge - private StorageEngineConfig walEngineConfig = new RocksDBEngineConfig() - .setManualCompaction(true) - .setCompactMinTombstoneKeys(2500) - .setCompactMinTombstoneRanges(2); + private EngineConfig walEngineConfig = new EngineConfig() + .setType("rocksdb") + .setProps(new HashMap<>() { + { + put(MANUAL_COMPACTION, true); + put(COMPACT_MIN_TOMBSTONE_KEYS, 2500); + put(COMPACT_MIN_TOMBSTONE_RANGES, 2); + } + }); @JsonSetter(nulls = Nulls.SKIP) @JsonMerge private BalancerOptions balanceConfig = new BalancerOptions(); @JsonSetter(nulls = Nulls.SKIP) + @JsonMerge + private SplitHinterOptions splitHinterConfig = new SplitHinterOptions(); + @JsonSetter(nulls = Nulls.SKIP) private Map attributes = new HashMap<>(); public InboxStoreConfig() { @@ -75,5 +95,11 @@ public InboxStoreConfig() { .build()); balanceConfig.getBalancers().put("org.apache.bifromq.inbox.store.balance.RangeLeaderBalancerFactory", Struct.getDefaultInstance()); + + splitHinterConfig.getHinters() + .put("org.apache.bifromq.basekv.store.range.hinter.MutationKVLoadBasedSplitHinterFactory", + Struct.newBuilder() + .putFields("windowSeconds", Value.newBuilder().setNumberValue(5).build()) + .build()); } } diff --git a/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/config/model/retain/RetainStoreConfig.java b/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/config/model/retain/RetainStoreConfig.java index e63016c07..11c35ddaa 100644 --- a/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/config/model/retain/RetainStoreConfig.java +++ b/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/config/model/retain/RetainStoreConfig.java @@ -19,6 +19,10 @@ package org.apache.bifromq.starter.config.model.retain; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.COMPACT_MIN_TOMBSTONE_KEYS; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.COMPACT_MIN_TOMBSTONE_RANGES; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.MANUAL_COMPACTION; + import com.fasterxml.jackson.annotation.JsonMerge; import com.fasterxml.jackson.annotation.JsonSetter; import com.fasterxml.jackson.annotation.Nulls; @@ -30,8 +34,8 @@ import lombok.Setter; import org.apache.bifromq.baseenv.EnvProvider; import org.apache.bifromq.starter.config.model.BalancerOptions; -import org.apache.bifromq.starter.config.model.RocksDBEngineConfig; -import org.apache.bifromq.starter.config.model.StorageEngineConfig; +import org.apache.bifromq.starter.config.model.EngineConfig; +import org.apache.bifromq.starter.config.model.SplitHinterOptions; @Getter @Setter @@ -40,23 +44,38 @@ public class RetainStoreConfig { // 0 means use calling thread private int workerThreads = 0; private int tickerThreads = Math.max(1, Runtime.getRuntime().availableProcessors() / 20); - private int queryPipelinePerStore = 100; private int maxWALFetchSize = 50 * 1024 * 1024; // 50MB - private int compactWALThreshold = 2500; + private int compactWALThreshold = 256 * 1024 * 1024; private int gcIntervalSeconds = 600; @JsonSetter(nulls = Nulls.SKIP) @JsonMerge - private StorageEngineConfig dataEngineConfig = new RocksDBEngineConfig(); + private EngineConfig dataEngineConfig = new EngineConfig() + .setType("rocksdb") + .setProps(new HashMap<>() { + { + put(MANUAL_COMPACTION, true); + put(COMPACT_MIN_TOMBSTONE_KEYS, 2500); + put(COMPACT_MIN_TOMBSTONE_RANGES, 2); + } + }); @JsonSetter(nulls = Nulls.SKIP) @JsonMerge - private StorageEngineConfig walEngineConfig = new RocksDBEngineConfig() - .setManualCompaction(true) - .setCompactMinTombstoneKeys(2500) - .setCompactMinTombstoneRanges(2); + private EngineConfig walEngineConfig = new EngineConfig() + .setType("rocksdb") + .setProps(new HashMap<>() { + { + put(MANUAL_COMPACTION, true); + put(COMPACT_MIN_TOMBSTONE_KEYS, 2500); + put(COMPACT_MIN_TOMBSTONE_RANGES, 2); + } + }); @JsonSetter(nulls = Nulls.SKIP) @JsonMerge private BalancerOptions balanceConfig = new BalancerOptions(); @JsonSetter(nulls = Nulls.SKIP) + @JsonMerge + private SplitHinterOptions splitHinterConfig = new SplitHinterOptions(); + @JsonSetter(nulls = Nulls.SKIP) private Map attributes = new HashMap<>(); public RetainStoreConfig() { @@ -73,5 +92,11 @@ public RetainStoreConfig() { .putFields("maxIODensity", Value.newBuilder().setNumberValue(100).build()) .putFields("ioNanosLimit", Value.newBuilder().setNumberValue(30_000).build()) .build()); + + splitHinterConfig.getHinters() + .put("org.apache.bifromq.basekv.store.range.hinter.MutationKVLoadBasedSplitHinterFactory", + Struct.newBuilder() + .putFields("windowSeconds", Value.newBuilder().setNumberValue(5).build()) + .build()); } } diff --git a/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/config/model/serde/EngineConfigDeserializer.java b/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/config/model/serde/EngineConfigDeserializer.java new file mode 100644 index 000000000..1c501c16e --- /dev/null +++ b/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/config/model/serde/EngineConfigDeserializer.java @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.starter.config.model.serde; + +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.databind.DeserializationContext; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.deser.std.StdDeserializer; +import java.io.IOException; +import java.util.Iterator; +import java.util.Map; +import org.apache.bifromq.starter.config.model.EngineConfig; + +public class EngineConfigDeserializer extends StdDeserializer { + public EngineConfigDeserializer() { + super(EngineConfig.class); + } + + @Override + public EngineConfig deserialize(JsonParser p, DeserializationContext ctxt) throws IOException { + JsonNode node = p.getCodec().readTree(p); + EngineConfig cfg = new EngineConfig(); + if (node.has("type") && node.get("type").isTextual()) { + cfg.setType(node.get("type").asText()); + } + Iterator> it = node.fields(); + while (it.hasNext()) { + Map.Entry e = it.next(); + String k = e.getKey(); + if ("type".equals(k)) { + continue; + } + JsonNode v = e.getValue(); + cfg.put(k, toJava(v)); + } + return cfg; + } + + // Support in-place merge when @JsonMerge is present on the field + @Override + public EngineConfig deserialize(JsonParser p, DeserializationContext ctxt, EngineConfig intoValue) + throws IOException { + if (intoValue == null) { + // Fallback to regular deserialization + return deserialize(p, ctxt); + } + JsonNode node = p.getCodec().readTree(p); + if (node.has("type") && node.get("type").isTextual()) { + // Only override type when provided + intoValue.setType(node.get("type").asText()); + } + Iterator> it = node.fields(); + while (it.hasNext()) { + Map.Entry e = it.next(); + String k = e.getKey(); + if ("type".equals(k)) { + continue; + } + JsonNode v = e.getValue(); + // Put to keep provided key and preserve others untouched + intoValue.put(k, toJava(v)); + } + return intoValue; + } + + private Object toJava(JsonNode v) { + if (v == null || v.isNull()) { + return null; + } + if (v.isBoolean()) { + return v.booleanValue(); + } + if (v.isInt()) { + return v.intValue(); + } + if (v.isLong()) { + return v.longValue(); + } + if (v.isFloatingPointNumber()) { + return v.doubleValue(); + } + if (v.isTextual()) { + return v.textValue(); + } + // fallback to tree for complex types + return v.toString(); + } +} diff --git a/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/config/model/serde/EngineConfigSerializer.java b/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/config/model/serde/EngineConfigSerializer.java new file mode 100644 index 000000000..c40766acf --- /dev/null +++ b/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/config/model/serde/EngineConfigSerializer.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.starter.config.model.serde; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.SerializerProvider; +import com.fasterxml.jackson.databind.ser.std.StdSerializer; +import java.io.IOException; +import java.util.Map; +import org.apache.bifromq.starter.config.model.EngineConfig; + +// Custom serializer to include standalone 'type' and map entries together +public class EngineConfigSerializer extends StdSerializer { + public EngineConfigSerializer() { + super(EngineConfig.class); + } + + @Override + public void serialize(EngineConfig value, JsonGenerator gen, SerializerProvider provider) throws IOException { + gen.writeStartObject(); + String type = value.getType(); + gen.writeStringField("type", type); + // write map entries following TreeMap order, skip 'type' key if present + for (Map.Entry e : value.entrySet()) { + String k = e.getKey(); + if ("type".equals(k)) { + continue; + } + writeField(gen, k, e.getValue()); + } + gen.writeEndObject(); + } + + private void writeField(JsonGenerator gen, String key, Object v) throws IOException { + if (v == null) { + gen.writeNullField(key); + return; + } + if (v instanceof Boolean b) { + gen.writeBooleanField(key, b); + return; + } + if (v instanceof Number n) { + double d = n.doubleValue(); + long l = (long) d; + if (Double.isFinite(d) && d == l) { + if (l >= Integer.MIN_VALUE && l <= Integer.MAX_VALUE) { + gen.writeNumberField(key, (int) l); + } else { + gen.writeNumberField(key, l); + } + } else { + gen.writeNumberField(key, d); + } + return; + } + gen.writeObjectField(key, v); + } +} + diff --git a/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/module/CoreServiceModule.java b/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/module/CoreServiceModule.java index a4c5f3be8..95f1b7759 100644 --- a/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/module/CoreServiceModule.java +++ b/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/module/CoreServiceModule.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.starter.module; @@ -45,7 +45,7 @@ import org.apache.bifromq.sessiondict.client.ISessionDictClient; import org.apache.bifromq.starter.config.StandaloneConfig; import org.apache.bifromq.starter.config.model.dist.DistWorkerClientConfig; -import org.apache.bifromq.starter.config.model.inbox.InboxStoreConfig; +import org.apache.bifromq.starter.config.model.inbox.InboxStoreClientConfig; import org.apache.bifromq.starter.config.model.retain.RetainStoreClientConfig; public class CoreServiceModule extends AbstractModule { @@ -277,15 +277,15 @@ private InboxStoreClientProvider(StandaloneConfig config, @Override public IBaseKVStoreClient share() { - InboxStoreConfig storeConfig = config.getInboxServiceConfig().getStore(); + InboxStoreClientConfig storeClientConfig = config.getInboxServiceConfig().getStoreClient(); return IBaseKVStoreClient.newBuilder() .clusterId(IInboxStore.CLUSTER_NAME) .trafficService(trafficService) .metaService(metaService) - .workerThreads(storeConfig.getWorkerThreads()) + .workerThreads(storeClientConfig.getWorkerThreads()) .eventLoopGroup(eventLoopGroup) .sslContext(rpcClientSSLContext) - .queryPipelinesPerStore(storeConfig.getQueryPipelinePerStore()) + .queryPipelinesPerStore(storeClientConfig.getQueryPipelinePerStore()) .build(); } } diff --git a/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/module/DistServiceModule.java b/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/module/DistServiceModule.java index 80fb4a6fe..5ce8865d6 100644 --- a/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/module/DistServiceModule.java +++ b/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/module/DistServiceModule.java @@ -19,9 +19,6 @@ package org.apache.bifromq.starter.module; -import static org.apache.bifromq.starter.module.EngineConfUtil.buildDataEngineConf; -import static org.apache.bifromq.starter.module.EngineConfUtil.buildWALEngineConf; - import com.google.inject.AbstractModule; import com.google.inject.Inject; import com.google.inject.Key; @@ -48,7 +45,6 @@ import org.apache.bifromq.starter.config.StandaloneConfig; import org.apache.bifromq.starter.config.model.dist.DistServerConfig; import org.apache.bifromq.starter.config.model.dist.DistWorkerConfig; -import org.apache.bifromq.sysprops.props.DistWorkerLoadEstimationWindowSeconds; public class DistServiceModule extends AbstractModule { @Override @@ -122,10 +118,11 @@ public Optional get() { .setKvRangeOptions(new KVRangeOptions() .setMaxWALFatchBatchSize(workerConfig.getMaxWALFetchSize()) .setCompactWALThreshold(workerConfig.getCompactWALThreshold())) - .setDataEngineConfigurator(buildDataEngineConf(workerConfig - .getDataEngineConfig(), "dist_data")) - .setWalEngineConfigurator(buildWALEngineConf(workerConfig - .getWalEngineConfig(), "dist_wal"))) + .setSplitHinterFactoryConfig(workerConfig.getSplitHinterConfig().getHinters()) + .setDataEngineType(workerConfig.getDataEngineConfig().getType()) + .setDataEngineConf(workerConfig.getDataEngineConfig().toStruct()) + .setWalEngineType(workerConfig.getWalEngineConfig().getType()) + .setWalEngineConf(workerConfig.getWalEngineConfig().toStruct())) .minGCInterval(Duration.ofSeconds(workerConfig.getMinGCIntervalSeconds())) .maxGCInterval(Duration.ofSeconds(workerConfig.getMaxGCIntervalSeconds())) .bootstrapDelay(Duration.ofMillis(workerConfig.getBalanceConfig().getBootstrapDelayInMS())) @@ -134,7 +131,6 @@ public Optional get() { .balancerFactoryConfig(workerConfig.getBalanceConfig().getBalancers()) .subBrokerManager(injector.getInstance(ISubBrokerManager.class)) .settingProvider(injector.getInstance(SettingProviderManager.class)) - .loadEstimateWindow(Duration.ofSeconds(DistWorkerLoadEstimationWindowSeconds.INSTANCE.get())) .attributes(workerConfig.getAttributes()) .build()); } diff --git a/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/module/EngineConfUtil.java b/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/module/EngineConfUtil.java deleted file mode 100644 index 8c171695b..000000000 --- a/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/module/EngineConfUtil.java +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.bifromq.starter.module; - -import java.nio.file.Path; -import java.nio.file.Paths; -import org.apache.bifromq.basekv.localengine.ICPableKVEngineConfigurator; -import org.apache.bifromq.basekv.localengine.IWALableKVEngineConfigurator; -import org.apache.bifromq.basekv.localengine.memory.InMemKVEngineConfigurator; -import org.apache.bifromq.basekv.localengine.rocksdb.RocksDBCPableKVEngineConfigurator; -import org.apache.bifromq.basekv.localengine.rocksdb.RocksDBWALableKVEngineConfigurator; -import org.apache.bifromq.starter.config.model.InMemEngineConfig; -import org.apache.bifromq.starter.config.model.RocksDBEngineConfig; -import org.apache.bifromq.starter.config.model.StorageEngineConfig; - -public class EngineConfUtil { - public static final String USER_DIR_PROP = "user.dir"; - public static final String DATA_DIR_PROP = "DATA_DIR"; - - public static ICPableKVEngineConfigurator buildDataEngineConf(StorageEngineConfig config, String name) { - if (config instanceof InMemEngineConfig) { - return InMemKVEngineConfigurator.builder() - .build(); - } else { - Path dataRootDir; - Path dataCheckpointRootDir; - RocksDBEngineConfig rocksDBConfig = (RocksDBEngineConfig) config; - if (Paths.get(rocksDBConfig.getDataPathRoot()).isAbsolute()) { - dataRootDir = Paths.get(rocksDBConfig.getDataPathRoot(), name); - dataCheckpointRootDir = - Paths.get(rocksDBConfig.getDataPathRoot(), name + "_cp"); - } else { - String userDir = System.getProperty(USER_DIR_PROP); - String dataDir = System.getProperty(DATA_DIR_PROP, userDir); - dataRootDir = Paths.get(dataDir, rocksDBConfig.getDataPathRoot(), name); - dataCheckpointRootDir = - Paths.get(dataDir, rocksDBConfig.getDataPathRoot(), name + "_cp"); - } - return RocksDBCPableKVEngineConfigurator.builder() - .dbRootDir(dataRootDir.toString()) - .dbCheckpointRootDir(dataCheckpointRootDir.toString()) - .heuristicCompaction(rocksDBConfig.isManualCompaction()) - .compactMinTombstoneKeys(rocksDBConfig.getCompactMinTombstoneKeys()) - .compactMinTombstoneRanges(rocksDBConfig.getCompactMinTombstoneRanges()) - .compactTombstoneKeysRatio(rocksDBConfig.getCompactTombstoneRatio()) - - .blockCacheSize(rocksDBConfig.getBlockCacheSize()) - .writeBufferSize(rocksDBConfig.getWriteBufferSize()) - .maxWriteBufferNumber(rocksDBConfig.getMaxWriteBufferNumber()) - .minWriteBufferNumberToMerge(rocksDBConfig.getMinWriteBufferNumberToMerge()) - .minBlobSize(rocksDBConfig.getMinBlobSize()) - .increaseParallelism(rocksDBConfig.getIncreaseParallelism()) - .maxBackgroundJobs(rocksDBConfig.getMaxBackgroundJobs()) - .level0FileNumCompactionTrigger(rocksDBConfig.getLevel0FileNumCompactionTrigger()) - .level0SlowdownWritesTrigger(rocksDBConfig.getLevel0SlowdownWritesTrigger()) - .level0StopWritesTrigger(rocksDBConfig.getLevel0StopWritesTrigger()) - .maxBytesForLevelBase(rocksDBConfig.getMaxBytesForLevelBase()) - .targetFileSizeBase(rocksDBConfig.getTargetFileSizeBase()) - .build(); - } - } - - public static IWALableKVEngineConfigurator buildWALEngineConf(StorageEngineConfig config, String name) { - if (config instanceof InMemEngineConfig) { - return InMemKVEngineConfigurator.builder() - .build(); - } else { - Path dataRootDir; - RocksDBEngineConfig rocksDBConfig = (RocksDBEngineConfig) config; - if (Paths.get(rocksDBConfig.getDataPathRoot()).isAbsolute()) { - dataRootDir = Paths.get(rocksDBConfig.getDataPathRoot(), name); - } else { - String userDir = System.getProperty(USER_DIR_PROP); - String dataDir = System.getProperty(DATA_DIR_PROP, userDir); - dataRootDir = Paths.get(dataDir, rocksDBConfig.getDataPathRoot(), name); - } - return RocksDBWALableKVEngineConfigurator.builder() - .dbRootDir(dataRootDir.toString()) - .heuristicCompaction(rocksDBConfig.isManualCompaction()) - .compactMinTombstoneKeys(rocksDBConfig.getCompactMinTombstoneKeys()) - .compactMinTombstoneRanges(rocksDBConfig.getCompactMinTombstoneRanges()) - .compactTombstoneKeysRatio(rocksDBConfig.getCompactTombstoneRatio()) - .asyncWALFlush(rocksDBConfig.isAsyncWALFlush()) - .fsyncWAL(rocksDBConfig.isFsyncWAL()) - - .blockCacheSize(rocksDBConfig.getBlockCacheSize()) - .writeBufferSize(rocksDBConfig.getWriteBufferSize()) - .maxWriteBufferNumber(rocksDBConfig.getMaxWriteBufferNumber()) - .minWriteBufferNumberToMerge(rocksDBConfig.getMinWriteBufferNumberToMerge()) - .minBlobSize(rocksDBConfig.getMinBlobSize()) - .increaseParallelism(rocksDBConfig.getIncreaseParallelism()) - .maxBackgroundJobs(rocksDBConfig.getMaxBackgroundJobs()) - .level0FileNumCompactionTrigger(rocksDBConfig.getLevel0FileNumCompactionTrigger()) - .level0SlowdownWritesTrigger(rocksDBConfig.getLevel0SlowdownWritesTrigger()) - .level0StopWritesTrigger(rocksDBConfig.getLevel0StopWritesTrigger()) - .maxBytesForLevelBase(rocksDBConfig.getMaxBytesForLevelBase()) - .targetFileSizeBase(rocksDBConfig.getTargetFileSizeBase()) - .build(); - } - } -} diff --git a/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/module/InboxServiceModule.java b/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/module/InboxServiceModule.java index b161e6dc9..0f36b5bd4 100644 --- a/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/module/InboxServiceModule.java +++ b/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/module/InboxServiceModule.java @@ -14,14 +14,11 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.starter.module; -import static org.apache.bifromq.starter.module.EngineConfUtil.buildDataEngineConf; -import static org.apache.bifromq.starter.module.EngineConfUtil.buildWALEngineConf; - import com.google.inject.AbstractModule; import com.google.inject.Inject; import com.google.inject.Key; @@ -50,7 +47,6 @@ import org.apache.bifromq.starter.config.StandaloneConfig; import org.apache.bifromq.starter.config.model.inbox.InboxServerConfig; import org.apache.bifromq.starter.config.model.inbox.InboxStoreConfig; -import org.apache.bifromq.sysprops.props.InboxStoreLoadEstimationWindowSeconds; public class InboxServiceModule extends AbstractModule { @Override @@ -123,7 +119,6 @@ public Optional get() { .workerThreads(storeConfig.getWorkerThreads()) .bgTaskExecutor( injector.getInstance(Key.get(ScheduledExecutorService.class, Names.named("bgTaskScheduler")))) - .loadEstimateWindow(Duration.ofSeconds(InboxStoreLoadEstimationWindowSeconds.INSTANCE.get())) .expireRateLimit(storeConfig.getExpireRateLimit()) .gcInterval( Duration.ofSeconds(storeConfig.getGcIntervalSeconds())) @@ -136,11 +131,13 @@ public Optional get() { .storeOptions(new KVRangeStoreOptions() .setKvRangeOptions(new KVRangeOptions() .setMaxWALFatchBatchSize(storeConfig.getMaxWALFetchSize()) - .setCompactWALThreshold(storeConfig - .getCompactWALThreshold()) + .setCompactWALThreshold(storeConfig.getCompactWALThreshold()) .setEnableLoadEstimation(true)) - .setDataEngineConfigurator(buildDataEngineConf(storeConfig.getDataEngineConfig(), "inbox_data")) - .setWalEngineConfigurator(buildWALEngineConf(storeConfig.getWalEngineConfig(), "inbox_wal"))) + .setSplitHinterFactoryConfig(storeConfig.getSplitHinterConfig().getHinters()) + .setDataEngineType(storeConfig.getDataEngineConfig().getType()) + .setDataEngineConf(storeConfig.getDataEngineConfig().toStruct()) + .setWalEngineType(storeConfig.getWalEngineConfig().getType()) + .setWalEngineConf(storeConfig.getWalEngineConfig().toStruct())) .attributes(storeConfig.getAttributes()) .build()); } diff --git a/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/module/RetainServiceModule.java b/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/module/RetainServiceModule.java index f964bf63b..1e51351a3 100644 --- a/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/module/RetainServiceModule.java +++ b/build/build-bifromq-starter/src/main/java/org/apache/bifromq/starter/module/RetainServiceModule.java @@ -14,14 +14,11 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.starter.module; -import static org.apache.bifromq.starter.module.EngineConfUtil.buildDataEngineConf; -import static org.apache.bifromq.starter.module.EngineConfUtil.buildWALEngineConf; - import com.google.inject.AbstractModule; import com.google.inject.Inject; import com.google.inject.Key; @@ -46,7 +43,6 @@ import org.apache.bifromq.starter.config.StandaloneConfig; import org.apache.bifromq.starter.config.model.retain.RetainServerConfig; import org.apache.bifromq.starter.config.model.retain.RetainStoreConfig; -import org.apache.bifromq.sysprops.props.RetainStoreLoadEstimationWindowSeconds; public class RetainServiceModule extends AbstractModule { @Override @@ -118,14 +114,16 @@ public Optional get() { .zombieProbeDelay(Duration.ofMillis(storeConfig.getBalanceConfig().getZombieProbeDelayInMS())) .balancerRetryDelay(Duration.ofMillis(storeConfig.getBalanceConfig().getRetryDelayInMS())) .balancerFactoryConfig(storeConfig.getBalanceConfig().getBalancers()) - .loadEstimateWindow(Duration.ofSeconds(RetainStoreLoadEstimationWindowSeconds.INSTANCE.get())) .gcInterval(Duration.ofSeconds(storeConfig.getGcIntervalSeconds())) .storeOptions(new KVRangeStoreOptions() .setKvRangeOptions(new KVRangeOptions() .setMaxWALFatchBatchSize(storeConfig.getMaxWALFetchSize()) .setCompactWALThreshold(storeConfig.getCompactWALThreshold())) - .setDataEngineConfigurator(buildDataEngineConf(storeConfig.getDataEngineConfig(), "retain_data")) - .setWalEngineConfigurator(buildWALEngineConf(storeConfig.getWalEngineConfig(), "retain_wal"))) + .setSplitHinterFactoryConfig(storeConfig.getSplitHinterConfig().getHinters()) + .setDataEngineType(storeConfig.getDataEngineConfig().getType()) + .setDataEngineConf(storeConfig.getDataEngineConfig().toStruct()) + .setWalEngineType(storeConfig.getWalEngineConfig().getType()) + .setWalEngineConf(storeConfig.getWalEngineConfig().toStruct())) .attributes(storeConfig.getAttributes()) .build()); } diff --git a/build/build-bifromq-starter/src/test/java/org/apache/bifromq/starter/config/EngineConfigMergeTest.java b/build/build-bifromq-starter/src/test/java/org/apache/bifromq/starter/config/EngineConfigMergeTest.java new file mode 100644 index 000000000..8366bcde4 --- /dev/null +++ b/build/build-bifromq-starter/src/test/java/org/apache/bifromq/starter/config/EngineConfigMergeTest.java @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.starter.config; + +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.COMPACT_MIN_TOMBSTONE_KEYS; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.COMPACT_MIN_TOMBSTONE_RANGES; +import static org.apache.bifromq.basekv.localengine.rocksdb.RocksDBDefaultConfigs.MANUAL_COMPACTION; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertTrue; + +import java.io.File; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import org.apache.bifromq.starter.config.model.EngineConfig; +import org.apache.bifromq.starter.config.model.dist.DistWorkerConfig; +import org.apache.bifromq.starter.utils.ConfigFileUtil; +import org.testng.annotations.Test; + +public class EngineConfigMergeTest { + // Verify EngineConfig fields merge instead of overwrite + @Test + public void testInboxEngineConfigMerge() throws Exception { + String yaml = "inboxServiceConfig:\n" + + " store:\n" + + " dataEngineConfig:\n" + + " type: \"rocksdb\"\n" + + " compactMinTombstoneKeys: 50000000\n" + + " walEngineConfig:\n" + + " type: \"rocksdb\"\n" + + " compactMinTombstoneKeys: 50000000\n"; + + File f = File.createTempFile("bifromq-starter-config", ".yaml"); + Files.writeString(f.toPath(), yaml, StandardCharsets.UTF_8); + + StandaloneConfig cfg = ConfigFileUtil.build(f, StandaloneConfig.class); + + EngineConfig data = cfg.getInboxServiceConfig().getStore().getDataEngineConfig(); + assertEquals(data.getType(), "rocksdb"); + assertEquals(((Number) data.get(COMPACT_MIN_TOMBSTONE_KEYS)).longValue(), 50000000L); + assertTrue((Boolean) data.get(MANUAL_COMPACTION)); + assertEquals(((Number) data.get(COMPACT_MIN_TOMBSTONE_RANGES)).intValue(), 100); + + EngineConfig wal = cfg.getInboxServiceConfig().getStore().getWalEngineConfig(); + assertEquals(wal.getType(), "rocksdb"); + assertEquals(((Number) wal.get(COMPACT_MIN_TOMBSTONE_KEYS)).longValue(), 50000000L); + assertTrue((Boolean) wal.get(MANUAL_COMPACTION)); + assertEquals(((Number) wal.get(COMPACT_MIN_TOMBSTONE_RANGES)).intValue(), 2); + } + + // Verify merge also applies to retain and dist worker configs + @Test + public void testRetainAndDistEngineConfigMerge() throws Exception { + String yaml = "retainServiceConfig:\n" + + " store:\n" + + " dataEngineConfig:\n" + + " compactMinTombstoneKeys: 12345\n" + + " walEngineConfig:\n" + + " compactMinTombstoneKeys: 67890\n" + + "distServiceConfig:\n" + + " worker:\n" + + " dataEngineConfig:\n" + + " compactMinTombstoneKeys: 111\n" + + " walEngineConfig:\n" + + " compactMinTombstoneKeys: 222\n"; + + File f = File.createTempFile("bifromq-starter-config", ".yaml"); + Files.writeString(f.toPath(), yaml, StandardCharsets.UTF_8); + + StandaloneConfig cfg = ConfigFileUtil.build(f, StandaloneConfig.class); + + EngineConfig rData = cfg.getRetainServiceConfig().getStore().getDataEngineConfig(); + assertEquals(((Number) rData.get(COMPACT_MIN_TOMBSTONE_KEYS)).intValue(), 12345); + assertTrue((Boolean) rData.get(MANUAL_COMPACTION)); + assertEquals(((Number) rData.get(COMPACT_MIN_TOMBSTONE_RANGES)).intValue(), 2); + + EngineConfig rWal = cfg.getRetainServiceConfig().getStore().getWalEngineConfig(); + assertEquals(((Number) rWal.get(COMPACT_MIN_TOMBSTONE_KEYS)).intValue(), 67890); + assertTrue((Boolean) rWal.get(MANUAL_COMPACTION)); + assertEquals(((Number) rWal.get(COMPACT_MIN_TOMBSTONE_RANGES)).intValue(), 2); + + DistWorkerConfig workerCfg = cfg.getDistServiceConfig().getWorker(); + assertNotNull(workerCfg); + EngineConfig dData = workerCfg.getDataEngineConfig(); + assertEquals(((Number) dData.get(COMPACT_MIN_TOMBSTONE_KEYS)).intValue(), 111); + assertTrue((Boolean) dData.get(MANUAL_COMPACTION)); + assertEquals(((Number) dData.get(COMPACT_MIN_TOMBSTONE_RANGES)).intValue(), 2); + + EngineConfig dWal = workerCfg.getWalEngineConfig(); + assertEquals(((Number) dWal.get(COMPACT_MIN_TOMBSTONE_KEYS)).intValue(), 222); + assertTrue((Boolean) dWal.get(MANUAL_COMPACTION)); + assertEquals(((Number) dWal.get(COMPACT_MIN_TOMBSTONE_RANGES)).intValue(), 2); + } +} + diff --git a/coverage-report/pom.xml b/coverage-report/pom.xml index 6f42a1630..500e07c2f 100644 --- a/coverage-report/pom.xml +++ b/coverage-report/pom.xml @@ -50,7 +50,15 @@ org.apache.bifromq - base-kv-local-engine + base-kv-local-engine-spi + + + org.apache.bifromq + base-kv-local-engine-rocksdb + + + org.apache.bifromq + base-kv-local-engine-memory org.apache.bifromq @@ -193,4 +201,4 @@ bifromq-apiserver - \ No newline at end of file +