AndrewJSchofield commented on code in PR #16969:
URL: https://github.com/apache/kafka/pull/16969#discussion_r1729216536
##########
core/src/main/java/kafka/server/share/SharePartitionManager.java:
##########
@@ -533,178 +538,52 @@ void maybeProcessFetchQueue() {
shareFetchPartitionData.groupId,
topicIdPartition
);
- SharePartition sharePartition =
partitionCacheMap.computeIfAbsent(sharePartitionKey,
- k -> {
- long start = time.hiResClockMs();
- SharePartition partition = new
SharePartition(shareFetchPartitionData.groupId, topicIdPartition,
maxInFlightMessages, maxDeliveryCount,
- recordLockDurationMs, timer, time, persister);
- this.shareGroupMetrics.partitionLoadTime(start);
- return partition;
- });
- int partitionMaxBytes =
shareFetchPartitionData.partitionMaxBytes.getOrDefault(topicIdPartition, 0);
- // Add the share partition to the list of partitions to be
fetched only if we can
- // acquire the fetch lock on it.
- if (sharePartition.maybeAcquireFetchLock()) {
- // If the share partition is already at capacity, we
should not attempt to fetch.
- if (sharePartition.canAcquireRecords()) {
- topicPartitionData.put(
- topicIdPartition,
- new FetchRequest.PartitionData(
- topicIdPartition.topicId(),
- sharePartition.nextFetchOffset(),
- 0,
- partitionMaxBytes,
- Optional.empty()
- )
- );
- } else {
- sharePartition.releaseFetchLock();
- log.info("Record lock partition limit exceeded for
SharePartition with key {}, " +
- "cannot acquire more records", sharePartitionKey);
- }
- }
+ partitionCacheMap.computeIfAbsent(sharePartitionKey, k -> {
+ long start = time.hiResClockMs();
+ SharePartition partition = new
SharePartition(shareFetchPartitionData.groupId, topicIdPartition,
maxInFlightMessages, maxDeliveryCount,
+ recordLockDurationMs, timer, time, persister);
+ this.shareGroupMetrics.partitionLoadTime(start);
+ return partition;
+ });
});
- if (topicPartitionData.isEmpty()) {
- // No locks for share partitions could be acquired, so we
complete the request and
- // will re-fetch for the client in next poll.
-
shareFetchPartitionData.future.complete(Collections.emptyMap());
- // Though if no partitions can be locked then there must be
some other request which
- // is in-flight and should release the lock. But it's safe to
release the lock as
- // the lock on share partition already exists which
facilitates correct behaviour
- // with multiple requests from queue being processed.
- releaseProcessFetchQueueLock();
- if (!fetchQueue.isEmpty())
- maybeProcessFetchQueue();
- return;
- }
+ Set<Object> delayedShareFetchWatchKeys = new HashSet<>();
+ delayedShareFetchWatchKeys.add(new DelayedShareFetchKey(
+ shareFetchPartitionData.partitionMaxBytes.keySet(),
+ shareFetchPartitionData.groupId,
+ shareFetchPartitionData.memberId));
- log.trace("Fetchable share partitions data: {} with groupId: {}
fetch params: {}",
- topicPartitionData, shareFetchPartitionData.groupId,
shareFetchPartitionData.fetchParams);
-
- replicaManager.fetchMessages(
- shareFetchPartitionData.fetchParams,
- CollectionConverters.asScala(
- topicPartitionData.entrySet().stream().map(entry ->
- new Tuple2<>(entry.getKey(),
entry.getValue())).collect(Collectors.toList())
- ),
- QuotaFactory.UnboundedQuota$.MODULE$,
- responsePartitionData -> {
- log.trace("Data successfully retrieved by replica manager:
{}", responsePartitionData);
- List<Tuple2<TopicIdPartition, FetchPartitionData>>
responseData = CollectionConverters.asJava(
- responsePartitionData);
- processFetchResponse(shareFetchPartitionData,
responseData).whenComplete(
- (result, throwable) -> {
- if (throwable != null) {
- log.error("Error processing fetch response for
share partitions", throwable);
-
shareFetchPartitionData.future.completeExceptionally(throwable);
- } else {
-
shareFetchPartitionData.future.complete(result);
- }
- // Releasing the lock to move ahead with the next
request in queue.
-
releaseFetchQueueAndPartitionsLock(shareFetchPartitionData.groupId,
topicPartitionData.keySet());
- });
- return BoxedUnit.UNIT;
- });
+ // Add the share fetch to the delayed share fetch purgatory to
process the fetch request.
+ addDelayedShareFetch(new
DelayedShareFetch(shareFetchPartitionData, replicaManager, partitionCacheMap),
+ delayedShareFetchWatchKeys);
+ // Release the lock so that other threads can process the queue.
+ releaseProcessFetchQueueLock();
Review Comment:
If `maybeProcessFetchQueue` throws an exception, you might find the catch
block releases the fetch queue lock which has been obtained by another thread.
##########
core/src/main/java/kafka/server/share/DelayedShareFetch.java:
##########
@@ -0,0 +1,271 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package kafka.server.share;
+
+import kafka.server.DelayedOperation;
+import kafka.server.QuotaFactory;
+import kafka.server.ReplicaManager;
+
+import org.apache.kafka.common.TopicIdPartition;
+import org.apache.kafka.common.message.ShareFetchResponseData;
+import org.apache.kafka.common.protocol.Errors;
+import org.apache.kafka.common.record.FileRecords;
+import org.apache.kafka.common.requests.FetchRequest;
+import org.apache.kafka.common.requests.ListOffsetsRequest;
+import org.apache.kafka.storage.internals.log.FetchPartitionData;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Set;
+import java.util.concurrent.CompletableFuture;
+import java.util.stream.Collectors;
+
+import scala.Option;
+import scala.Tuple2;
+import scala.jdk.javaapi.CollectionConverters;
+import scala.runtime.BoxedUnit;
+
+/**
+ * A delayed share fetch operation has been introduced in case there is no
share partition for which we can acquire records. We will try to wait
+ * for MaxWaitMs for records to be released else complete the share fetch
request.
+ */
+public class DelayedShareFetch extends DelayedOperation {
+ private final SharePartitionManager.ShareFetchPartitionData
shareFetchPartitionData;
+ private final ReplicaManager replicaManager;
+ private final Map<SharePartitionManager.SharePartitionKey, SharePartition>
partitionCacheMap;
+
+ private static final Logger log =
LoggerFactory.getLogger(DelayedShareFetch.class);
+
+ DelayedShareFetch(
+ SharePartitionManager.ShareFetchPartitionData
shareFetchPartitionData,
+ ReplicaManager replicaManager,
+ Map<SharePartitionManager.SharePartitionKey, SharePartition>
partitionCacheMap) {
+ super(shareFetchPartitionData.fetchParams().maxWaitMs, Option.empty());
+ this.shareFetchPartitionData = shareFetchPartitionData;
+ this.replicaManager = replicaManager;
+ this.partitionCacheMap = partitionCacheMap;
+ }
+
+ @Override
+ public void onExpiration() {
+ }
+
+ /**
+ * Complete the share fetch operation by fetching records for all
partitions in the share fetch request irrespective
+ * of whether they have any acquired records. This is called when the
fetch operation is forced to complete either
+ * because records can be acquired for some partitions or due to MaxWaitMs
timeout.
+ */
+ @Override
+ public void onComplete() {
+ log.trace("onCompletion of delayed share fetch request for group {},
member {}, " +
+ "topic partitions {}",
shareFetchPartitionData.groupId(),
+ shareFetchPartitionData.memberId(),
shareFetchPartitionData.partitionMaxBytes().keySet());
+
+ Map<TopicIdPartition, FetchRequest.PartitionData> topicPartitionData =
topicPartitionDataForAcquirablePartitions();
+ try {
+ if (topicPartitionData.isEmpty()) {
+ // No locks for share partitions could be acquired, so we
complete the request with an empty response.
+
shareFetchPartitionData.future().complete(Collections.emptyMap());
+ return;
+ }
+ log.trace("Fetchable share partitions data: {} with groupId: {}
fetch params: {}",
+ topicPartitionData, shareFetchPartitionData.groupId(),
shareFetchPartitionData.fetchParams());
+
+ replicaManager.fetchMessages(
+ shareFetchPartitionData.fetchParams(),
+ CollectionConverters.asScala(
+ topicPartitionData.entrySet().stream().map(entry ->
+ new Tuple2<>(entry.getKey(),
entry.getValue())).collect(Collectors.toList())
+ ),
+ QuotaFactory.UnboundedQuota$.MODULE$,
+ responsePartitionData -> {
+ log.trace("Data successfully retrieved by replica
manager: {}", responsePartitionData);
+ List<Tuple2<TopicIdPartition, FetchPartitionData>>
responseData = CollectionConverters.asJava(
+ responsePartitionData);
+ processFetchResponse(shareFetchPartitionData,
responseData).whenComplete(
+ (result, throwable) -> {
+ if (throwable != null) {
+ log.error("Error processing fetch
response for share partitions", throwable);
+
shareFetchPartitionData.future().completeExceptionally(throwable);
+ } else {
+
shareFetchPartitionData.future().complete(result);
+ }
+ // Releasing the lock to move ahead with
the next request in queue.
+
releasePartitionsLock(shareFetchPartitionData.groupId(),
topicPartitionData.keySet());
+ });
+ return BoxedUnit.UNIT;
+ });
+ } catch (Exception e) {
+ // Release the locks acquired for the partitions in the share
fetch request in case there is an exception
+ log.error("Error processing delayed share fetch request", e);
+ shareFetchPartitionData.future().completeExceptionally(e);
+ releasePartitionsLock(shareFetchPartitionData.groupId(),
topicPartitionData.keySet());
+ }
+ }
+
+ /**
+ * Try to complete the fetch operation if we can acquire records for any
partition in the share fetch request.
+ */
+ @Override
+ public boolean tryComplete() {
+ log.trace("onTry of delayed share fetch request for group {}, member
{}, topic partitions {}",
Review Comment:
onTry seems like a typo.
##########
core/src/test/java/kafka/server/share/DelayedShareFetchKeyTest.java:
##########
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package kafka.server.share;
+
+import org.apache.kafka.common.TopicIdPartition;
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.Uuid;
+
+import org.junit.jupiter.api.Test;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotEquals;
+
+public class DelayedShareFetchKeyTest {
+
+ @Test
+ public void testDelayedShareFetchEqualsAndHashcode() {
+ TopicIdPartition tp0 = new TopicIdPartition(Uuid.randomUuid(), new
TopicPartition("topic", 0));
Review Comment:
You really ought to use the same UUID for the partitions of the topic
"topic".
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]