dajac commented on code in PR #15721:
URL: https://github.com/apache/kafka/pull/15721#discussion_r1571079283
##########
group-coordinator/src/main/java/org/apache/kafka/coordinator/group/GroupMetadataManager.java:
##########
@@ -777,11 +778,78 @@ public ClassicGroup classicGroup(
}
}
+ /**
+ * Validates the online downgrade if a consumer member is fenced from the
consumer group.
+ *
+ * @param consumerGroup The ConsumerGroup.
+ * @param memberId The fenced member id.
+ * @return A boolean indicating whether it's valid to online downgrade the
consumer group.
+ */
+ private boolean validateOnlineDowngrade(ConsumerGroup consumerGroup,
String memberId) {
+ if (!consumerGroupMigrationPolicy.isDowngradeEnabled()) {
+ log.info("Cannot downgrade consumer group {} to classic group
because the online downgrade is disabled.",
+ consumerGroup.groupId());
+ return false;
+ } else if
(!consumerGroup.allMembersUseClassicProtocolExcept(memberId)) {
+ log.debug("Cannot downgrade consumer group {} to classic group
because not all its members use the classic protocol.",
+ consumerGroup.groupId());
+ return false;
+ } else if (consumerGroup.numMembers() <= 1) {
+ log.info("Skip downgrading the consumer group {} to classic group
because it's empty.",
+ consumerGroup.groupId());
+ return false;
+ } else if (consumerGroup.numMembers() - 1 > classicGroupMaxSize) {
+ log.info("Cannot downgrade consumer group {} to classic group
because its group size is greater than classic group max size.",
+ consumerGroup.groupId());
+ }
+ return true;
+ }
+
+ public CompletableFuture<Void> convertToClassicGroup(ConsumerGroup
consumerGroup, String leavingMemberId, List<Record> records) {
+ consumerGroup.createGroupTombstoneRecords(records);
+
+ ClassicGroup classicGroup;
+ try {
+ classicGroup = ClassicGroup.fromConsumerGroup(
+ consumerGroup,
+ leavingMemberId,
+ logContext,
+ time,
+ metrics,
+ consumerGroupSessionTimeoutMs,
+ metadataImage
+ );
+ } catch (SchemaException e) {
+ log.warn("Cannot downgrade the consumer group " +
consumerGroup.groupId() + ": fail to parse " +
+ "the Consumer Protocol " + ConsumerProtocol.PROTOCOL_TYPE +
".", e);
+
+ throw new GroupIdNotFoundException(String.format("Cannot downgrade
the classic group %s: %s.",
+ consumerGroup.groupId(), e.getMessage()));
+ }
+
classicGroup.createConsumerGroupRecords(metadataImage.features().metadataVersion(),
records);
+
+ removeGroup(consumerGroup.groupId());
+
+ groups.put(consumerGroup.groupId(), classicGroup);
+ metrics.onClassicGroupStateTransition(null,
classicGroup.currentState());
+
+ classicGroup.allMembers().forEach(member ->
rescheduleClassicGroupMemberHeartbeat(classicGroup, member));
+ prepareRebalance(classicGroup, String.format("Downgrade group %s.",
classicGroup.groupId()));
+
+ CompletableFuture<Void> appendFuture = new CompletableFuture<>();
+ appendFuture.whenComplete((__, t) -> {
Review Comment:
I wonder if we could use `exceptionally` here.
##########
group-coordinator/src/main/java/org/apache/kafka/coordinator/group/GroupMetadataManager.java:
##########
@@ -777,11 +778,78 @@ public ClassicGroup classicGroup(
}
}
+ /**
+ * Validates the online downgrade if a consumer member is fenced from the
consumer group.
+ *
+ * @param consumerGroup The ConsumerGroup.
+ * @param memberId The fenced member id.
+ * @return A boolean indicating whether it's valid to online downgrade the
consumer group.
+ */
+ private boolean validateOnlineDowngrade(ConsumerGroup consumerGroup,
String memberId) {
+ if (!consumerGroupMigrationPolicy.isDowngradeEnabled()) {
+ log.info("Cannot downgrade consumer group {} to classic group
because the online downgrade is disabled.",
+ consumerGroup.groupId());
+ return false;
+ } else if
(!consumerGroup.allMembersUseClassicProtocolExcept(memberId)) {
+ log.debug("Cannot downgrade consumer group {} to classic group
because not all its members use the classic protocol.",
+ consumerGroup.groupId());
+ return false;
+ } else if (consumerGroup.numMembers() <= 1) {
+ log.info("Skip downgrading the consumer group {} to classic group
because it's empty.",
+ consumerGroup.groupId());
+ return false;
+ } else if (consumerGroup.numMembers() - 1 > classicGroupMaxSize) {
+ log.info("Cannot downgrade consumer group {} to classic group
because its group size is greater than classic group max size.",
+ consumerGroup.groupId());
+ }
+ return true;
+ }
+
+ public CompletableFuture<Void> convertToClassicGroup(ConsumerGroup
consumerGroup, String leavingMemberId, List<Record> records) {
+ consumerGroup.createGroupTombstoneRecords(records);
+
+ ClassicGroup classicGroup;
+ try {
+ classicGroup = ClassicGroup.fromConsumerGroup(
+ consumerGroup,
+ leavingMemberId,
+ logContext,
+ time,
+ metrics,
+ consumerGroupSessionTimeoutMs,
+ metadataImage
+ );
+ } catch (SchemaException e) {
+ log.warn("Cannot downgrade the consumer group " +
consumerGroup.groupId() + ": fail to parse " +
+ "the Consumer Protocol " + ConsumerProtocol.PROTOCOL_TYPE +
".", e);
+
+ throw new GroupIdNotFoundException(String.format("Cannot downgrade
the classic group %s: %s.",
+ consumerGroup.groupId(), e.getMessage()));
+ }
+
classicGroup.createConsumerGroupRecords(metadataImage.features().metadataVersion(),
records);
+
+ removeGroup(consumerGroup.groupId());
Review Comment:
Let's put a comment about this one. We should explain that we don't replay
so we have to remove it ourselves.
##########
group-coordinator/src/main/java/org/apache/kafka/coordinator/group/GroupMetadataManager.java:
##########
@@ -777,11 +778,78 @@ public ClassicGroup classicGroup(
}
}
+ /**
+ * Validates the online downgrade if a consumer member is fenced from the
consumer group.
+ *
+ * @param consumerGroup The ConsumerGroup.
+ * @param memberId The fenced member id.
+ * @return A boolean indicating whether it's valid to online downgrade the
consumer group.
+ */
+ private boolean validateOnlineDowngrade(ConsumerGroup consumerGroup,
String memberId) {
+ if (!consumerGroupMigrationPolicy.isDowngradeEnabled()) {
+ log.info("Cannot downgrade consumer group {} to classic group
because the online downgrade is disabled.",
+ consumerGroup.groupId());
+ return false;
+ } else if
(!consumerGroup.allMembersUseClassicProtocolExcept(memberId)) {
+ log.debug("Cannot downgrade consumer group {} to classic group
because not all its members use the classic protocol.",
+ consumerGroup.groupId());
+ return false;
+ } else if (consumerGroup.numMembers() <= 1) {
+ log.info("Skip downgrading the consumer group {} to classic group
because it's empty.",
+ consumerGroup.groupId());
+ return false;
+ } else if (consumerGroup.numMembers() - 1 > classicGroupMaxSize) {
+ log.info("Cannot downgrade consumer group {} to classic group
because its group size is greater than classic group max size.",
+ consumerGroup.groupId());
+ }
+ return true;
+ }
+
+ public CompletableFuture<Void> convertToClassicGroup(ConsumerGroup
consumerGroup, String leavingMemberId, List<Record> records) {
Review Comment:
Could we make it private or package private?
##########
group-coordinator/src/test/java/org/apache/kafka/coordinator/group/GroupMetadataManagerTest.java:
##########
@@ -10331,6 +10333,312 @@ public void
testClassicGroupOnUnloadedCompletingRebalance() throws Exception {
.setErrorCode(NOT_COORDINATOR.code()),
pendingMemberSyncResult.syncFuture.get());
}
+ @Test
+ public void testLastClassicProtocolMemberLeavingConsumerGroup() {
+ String groupId = "group-id";
+ String memberId1 = Uuid.randomUuid().toString();
+ String memberId2 = Uuid.randomUuid().toString();
+
+ Uuid fooTopicId = Uuid.randomUuid();
+ String fooTopicName = "foo";
+ Uuid barTopicId = Uuid.randomUuid();
+ String barTopicName = "bar";
+ Uuid zarTopicId = Uuid.randomUuid();
+ String zarTopicName = "zar";
+
+ MockPartitionAssignor assignor = new MockPartitionAssignor("range");
+
+ List<ConsumerGroupMemberMetadataValue.ClassicProtocol> protocols =
Collections.singletonList(
+ new ConsumerGroupMemberMetadataValue.ClassicProtocol()
+ .setName("range")
+
.setMetadata(Utils.toArray(ConsumerProtocol.serializeSubscription(new
ConsumerPartitionAssignor.Subscription(
+ Arrays.asList(fooTopicName, barTopicName),
+ null,
+ Arrays.asList(
+ new TopicPartition(fooTopicName, 0),
+ new TopicPartition(fooTopicName, 1),
+ new TopicPartition(fooTopicName, 2),
+ new TopicPartition(barTopicName, 0),
+ new TopicPartition(barTopicName, 1)
+ )
+ ))))
+ );
+
+ ConsumerGroupMember member1 = new
ConsumerGroupMember.Builder(memberId1)
+ .setState(MemberState.STABLE)
+ .setMemberEpoch(10)
+ .setPreviousMemberEpoch(9)
+ .setClientId("client")
+ .setClientHost("localhost/127.0.0.1")
+ .setSubscribedTopicNames(Arrays.asList("foo", "bar"))
+ .setServerAssignorName("range")
+ .setRebalanceTimeoutMs(45000)
+ .setClassicMemberMetadata(new
ConsumerGroupMemberMetadataValue.ClassicMemberMetadata()
+ .setSupportedProtocols(protocols))
+ .setAssignedPartitions(mkAssignment(
+ mkTopicAssignment(fooTopicId, 0, 1, 2),
+ mkTopicAssignment(barTopicId, 0, 1)))
+ .build();
+ ConsumerGroupMember member2 = new
ConsumerGroupMember.Builder(memberId2)
+ .setState(MemberState.STABLE)
+ .setMemberEpoch(10)
+ .setPreviousMemberEpoch(9)
+ .setClientId("client")
+ .setClientHost("localhost/127.0.0.1")
+ // Use zar only here to ensure that metadata needs to be
recomputed.
+ .setSubscribedTopicNames(Arrays.asList("foo", "bar", "zar"))
+ .setServerAssignorName("range")
+ .setRebalanceTimeoutMs(45000)
+ .setAssignedPartitions(mkAssignment(
+ mkTopicAssignment(fooTopicId, 3, 4, 5),
+ mkTopicAssignment(barTopicId, 2)))
+ .build();
+
+ // Consumer group with two members.
+ // Member 1 uses the classic protocol and member 2 uses the consumer
protocol.
+ GroupMetadataManagerTestContext context = new
GroupMetadataManagerTestContext.Builder()
+
.withConsumerGroupMigrationPolicy(ConsumerGroupMigrationPolicy.DOWNGRADE)
+ .withAssignors(Collections.singletonList(assignor))
+ .withMetadataImage(new MetadataImageBuilder()
+ .addTopic(fooTopicId, fooTopicName, 6)
+ .addTopic(barTopicId, barTopicName, 3)
+ .addTopic(zarTopicId, zarTopicName, 1)
+ .addRacks()
+ .build())
+ .withConsumerGroup(new ConsumerGroupBuilder(groupId, 10)
+ .withMember(member1)
+ .withMember(member2)
+ .withAssignment(memberId1, mkAssignment(
+ mkTopicAssignment(fooTopicId, 0, 1, 2),
+ mkTopicAssignment(barTopicId, 0, 1)))
+ .withAssignment(memberId2, mkAssignment(
+ mkTopicAssignment(fooTopicId, 3, 4, 5),
+ mkTopicAssignment(barTopicId, 2)))
+ .withAssignmentEpoch(10))
+ .build();
+
+ context.commit();
+ ConsumerGroup consumerGroup =
context.groupMetadataManager.consumerGroup(groupId);
+
+ // Member 2 leaves the consumer group, triggering the downgrade.
+ CoordinatorResult<ConsumerGroupHeartbeatResponseData, Record> result =
context.consumerGroupHeartbeat(
+ new ConsumerGroupHeartbeatRequestData()
+ .setGroupId(groupId)
+ .setMemberId(memberId2)
+ .setMemberEpoch(LEAVE_GROUP_MEMBER_EPOCH)
+ .setRebalanceTimeoutMs(5000)
+ .setSubscribedTopicNames(Arrays.asList("foo", "bar"))
+ .setTopicPartitions(Collections.emptyList()));
+
+
+ byte[] assignment =
Utils.toArray(ConsumerProtocol.serializeAssignment(new
ConsumerPartitionAssignor.Assignment(Arrays.asList(
+ new TopicPartition(fooTopicName, 0), new
TopicPartition(fooTopicName, 1), new TopicPartition(fooTopicName, 2),
+ new TopicPartition(barTopicName, 0), new
TopicPartition(barTopicName, 1)
+ ))));
+ Map<String, byte[]> assignments = new HashMap<String, byte[]>() {
+ {
+ put(memberId1, assignment);
+ }
+ };
+
+ ClassicGroup expectedClassicGroup = new ClassicGroup(
+ new LogContext(),
+ groupId,
+ STABLE,
+ context.time,
+ context.metrics,
+ 10,
+ Optional.ofNullable(ConsumerProtocol.PROTOCOL_TYPE),
+ Optional.ofNullable("range"),
+ Optional.ofNullable(memberId1),
+ Optional.of(context.time.milliseconds())
+ );
+ expectedClassicGroup.add(
+ new ClassicGroupMember(
+ memberId1,
+ Optional.ofNullable(member1.instanceId()),
+ member1.clientId(),
+ member1.clientHost(),
+ member1.rebalanceTimeoutMs(),
+ 45000,
+ ConsumerProtocol.PROTOCOL_TYPE,
+ member1.supportedJoinGroupRequestProtocols(),
+ assignment
+ )
+ );
+
+ List<Record> expectedRecords = Arrays.asList(
+ RecordHelpers.newCurrentAssignmentTombstoneRecord(groupId,
memberId2),
+ RecordHelpers.newTargetAssignmentTombstoneRecord(groupId,
memberId2),
+ RecordHelpers.newMemberSubscriptionTombstoneRecord(groupId,
memberId2),
+ // Subscription metadata is recomputed because zar is no longer
there.
+ RecordHelpers.newGroupSubscriptionMetadataRecord(groupId, new
HashMap<String, TopicMetadata>() {
+ {
+ put(fooTopicName, new TopicMetadata(fooTopicId,
fooTopicName, 6, mkMapOfPartitionRacks(6)));
+ put(barTopicName, new TopicMetadata(barTopicId,
barTopicName, 3, mkMapOfPartitionRacks(3)));
+ }
+ }),
+ RecordHelpers.newGroupEpochRecord(groupId, 11),
+
+ RecordHelpers.newTargetAssignmentEpochTombstoneRecord(groupId),
+ RecordHelpers.newGroupSubscriptionMetadataTombstoneRecord(groupId),
+ RecordHelpers.newGroupEpochTombstoneRecord(groupId),
+ RecordHelpers.newGroupMetadataRecord(expectedClassicGroup,
assignments, MetadataVersion.latestTesting())
+ );
+
+ assertRecordsEquals(expectedRecords, result.records());
+ verify(context.metrics,
times(1)).onConsumerGroupStateTransition(ConsumerGroup.ConsumerGroupState.STABLE,
null);
+ verify(context.metrics, times(1)).onClassicGroupStateTransition(null,
STABLE);
+
+ // The new classic member 1 has a heartbeat timeout.
+ ScheduledTimeout<Void, Record> heartbeatTimeout =
context.timer.timeout(
+ classicGroupHeartbeatKey(groupId, memberId1));
+ assertNotNull(heartbeatTimeout);
+ // The new rebalance has a groupJoin timeout.
+ ScheduledTimeout<Void, Record> groupJoinTimeout =
context.timer.timeout(
+ classicGroupJoinKey(groupId));
+ assertNotNull(groupJoinTimeout);
+
+ // A new rebalance is triggered.
+ ClassicGroup classicGroup =
context.groupMetadataManager.getOrMaybeCreateClassicGroup(groupId, false);
+ assertTrue(classicGroup.isInState(PREPARING_REBALANCE));
+
+ // Simulate a failed write to the log.
+ result.appendFuture().completeExceptionally(new
NotLeaderOrFollowerException());
+ context.rollback();
+
+ // The group is reverted back to the consumer group.
+ assertEquals(consumerGroup,
context.groupMetadataManager.consumerGroup(groupId));
+ verify(context.metrics,
times(1)).onClassicGroupStateTransition(PREPARING_REBALANCE, null);
+ }
+
+ @Test
+ public void testLastClassicProtocolMemberSessionTimeoutInConsumerGroup() {
Review Comment:
Should we also add a test case for the rebalance timeout path?
##########
group-coordinator/src/test/java/org/apache/kafka/coordinator/group/Assertions.java:
##########
@@ -198,6 +204,62 @@ private static void assertApiMessageAndVersionEquals(
}
}
}
+ } else if (actual.message() instanceof GroupMetadataValue) {
Review Comment:
I understand that you replicated the pattern already in place. However, I
don't like it even if I wrote it. Recently, I have been using a different
approach which is better, I think.
I do the following:
1) I duplicate both messages (with `duplicate()` method)
2) I normalize them in place (e.g. sort lists, etc.)
3) I use `assertEquals` to compare them.
The benefit of this approach is that it automatically includes new fields.
Would it work here too?
##########
group-coordinator/src/main/java/org/apache/kafka/coordinator/group/GroupMetadataManager.java:
##########
@@ -777,11 +778,78 @@ public ClassicGroup classicGroup(
}
}
+ /**
+ * Validates the online downgrade if a consumer member is fenced from the
consumer group.
+ *
+ * @param consumerGroup The ConsumerGroup.
+ * @param memberId The fenced member id.
+ * @return A boolean indicating whether it's valid to online downgrade the
consumer group.
+ */
+ private boolean validateOnlineDowngrade(ConsumerGroup consumerGroup,
String memberId) {
+ if (!consumerGroupMigrationPolicy.isDowngradeEnabled()) {
+ log.info("Cannot downgrade consumer group {} to classic group
because the online downgrade is disabled.",
+ consumerGroup.groupId());
+ return false;
+ } else if
(!consumerGroup.allMembersUseClassicProtocolExcept(memberId)) {
Review Comment:
Could we use `numMembers() - numClassicProtocolMembers() <= 1`? I think
that we know that the remaining member is the one using the consumer group
protocol.
##########
group-coordinator/src/main/java/org/apache/kafka/coordinator/group/GroupMetadataManager.java:
##########
@@ -1440,9 +1508,20 @@ private
CoordinatorResult<ConsumerGroupHeartbeatResponseData, Record> consumerGr
records = consumerGroupFenceMember(group, member);
}
}
- return new CoordinatorResult<>(records, new
ConsumerGroupHeartbeatResponseData()
- .setMemberId(memberId)
- .setMemberEpoch(memberEpoch));
+
+ CompletableFuture<Void> appendFuture = null;
+ if ((instanceId == null || memberEpoch !=
LEAVE_GROUP_STATIC_MEMBER_EPOCH) &&
Review Comment:
Instead of repeating the conditions, could we use a boolean that we set in
the relevant places?
##########
group-coordinator/src/main/java/org/apache/kafka/coordinator/group/GroupMetadataManager.java:
##########
@@ -777,11 +778,78 @@ public ClassicGroup classicGroup(
}
}
+ /**
+ * Validates the online downgrade if a consumer member is fenced from the
consumer group.
+ *
+ * @param consumerGroup The ConsumerGroup.
+ * @param memberId The fenced member id.
+ * @return A boolean indicating whether it's valid to online downgrade the
consumer group.
+ */
+ private boolean validateOnlineDowngrade(ConsumerGroup consumerGroup,
String memberId) {
+ if (!consumerGroupMigrationPolicy.isDowngradeEnabled()) {
+ log.info("Cannot downgrade consumer group {} to classic group
because the online downgrade is disabled.",
+ consumerGroup.groupId());
+ return false;
+ } else if
(!consumerGroup.allMembersUseClassicProtocolExcept(memberId)) {
+ log.debug("Cannot downgrade consumer group {} to classic group
because not all its members use the classic protocol.",
+ consumerGroup.groupId());
+ return false;
+ } else if (consumerGroup.numMembers() <= 1) {
+ log.info("Skip downgrading the consumer group {} to classic group
because it's empty.",
+ consumerGroup.groupId());
+ return false;
+ } else if (consumerGroup.numMembers() - 1 > classicGroupMaxSize) {
+ log.info("Cannot downgrade consumer group {} to classic group
because its group size is greater than classic group max size.",
+ consumerGroup.groupId());
+ }
+ return true;
+ }
+
+ public CompletableFuture<Void> convertToClassicGroup(ConsumerGroup
consumerGroup, String leavingMemberId, List<Record> records) {
+ consumerGroup.createGroupTombstoneRecords(records);
+
+ ClassicGroup classicGroup;
+ try {
+ classicGroup = ClassicGroup.fromConsumerGroup(
+ consumerGroup,
+ leavingMemberId,
+ logContext,
+ time,
+ metrics,
+ consumerGroupSessionTimeoutMs,
+ metadataImage
+ );
+ } catch (SchemaException e) {
+ log.warn("Cannot downgrade the consumer group " +
consumerGroup.groupId() + ": fail to parse " +
+ "the Consumer Protocol " + ConsumerProtocol.PROTOCOL_TYPE +
".", e);
+
+ throw new GroupIdNotFoundException(String.format("Cannot downgrade
the classic group %s: %s.",
+ consumerGroup.groupId(), e.getMessage()));
+ }
+
classicGroup.createConsumerGroupRecords(metadataImage.features().metadataVersion(),
records);
Review Comment:
nit: `createClassicGroupRecords`?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]