singhpk234 commented on code in PR #14510:
URL: https://github.com/apache/iceberg/pull/14510#discussion_r2501915158
##########
kafka-connect/kafka-connect/src/main/java/org/apache/iceberg/connect/channel/Coordinator.java:
##########
@@ -302,21 +329,37 @@ private Snapshot latestSnapshot(Table table, String
branch) {
private Map<Integer, Long> lastCommittedOffsetsForTable(Table table, String
branch) {
Snapshot snapshot = latestSnapshot(table, branch);
- while (snapshot != null) {
- Map<String, String> summary = snapshot.summary();
- String value = summary.get(snapshotOffsetsProp);
- if (value != null) {
- TypeReference<Map<Integer, Long>> typeRef = new
TypeReference<Map<Integer, Long>>() {};
- try {
- return MAPPER.readValue(value, typeRef);
- } catch (IOException e) {
- throw new UncheckedIOException(e);
- }
- }
- Long parentSnapshotId = snapshot.parentId();
- snapshot = parentSnapshotId != null ? table.snapshot(parentSnapshotId) :
null;
+
+ if (snapshot == null) {
+ return Map.of();
+ }
+
+ Iterable<Snapshot> branchAncestry =
+ SnapshotUtil.ancestorsOf(snapshot.snapshotId(), table::snapshot);
+ return lastCommittedOffsets(branchAncestry);
+ }
+
+ private Map<Integer, Long> lastCommittedOffsets(Iterable<Snapshot>
snapshots) {
+ return Streams.stream(snapshots)
+ .filter(Objects::nonNull)
+ .filter(snapshot ->
snapshot.summary().containsKey(snapshotOffsetsProp))
+ .map(snapshot -> snapshot.summary().get(snapshotOffsetsProp))
+ .map(this::parseOffsets)
+ .findFirst()
+ .orElseGet(Map::of);
+ }
+
+ private Map<Integer, Long> parseOffsets(String value) {
+ if (value == null) {
+ return Map.of();
+ }
Review Comment:
if there is a key `snapshotOffsetsProp` but values are empty / null is it a
illegal state ?
##########
kafka-connect/kafka-connect/src/main/java/org/apache/iceberg/connect/channel/Coordinator.java:
##########
@@ -288,6 +289,32 @@ private void commitToTable(
}
}
+ private SnapshotAncestryValidator offsetValidator(
+ TableIdentifier tableIdentifier, Map<Integer, Long> expectedOffsets) {
+
+ return new SnapshotAncestryValidator() {
+ private Map<Integer, Long> lastCommittedOffsets;
+
+ @Override
+ public Boolean apply(Iterable<Snapshot> baseSnapshots) {
+ lastCommittedOffsets = lastCommittedOffsets(baseSnapshots);
+
+ if (expectedOffsets.isEmpty() && lastCommittedOffsets.isEmpty()) {
+ return true; // there are no stored offsets, so assume we're
starting with new offsets
+ }
Review Comment:
can remove this equals check below is gonna take care of this ?
##########
kafka-connect/kafka-connect/src/test/java/org/apache/iceberg/connect/channel/TestCoordinator.java:
##########
@@ -229,4 +230,45 @@ public void testCoordinatorRunning() {
sourceConsumer.rebalance(ImmutableList.of(tp1));
assertThat(mockIcebergSinkTask.isCoordinatorRunning()).isFalse();
}
+
+ @Test
+ public void testCoordinatorCommittedOffsetValidation() {
+ // This test demonstrates that the Coordinator's validateAndCommit method
+ // prevents commits when another independent commit has updated the offsets
+ // during the commit process
+
+ // Set the initial offsets
+ table
+ .newAppend()
+ .appendFile(EventTestUtil.createDataFile())
+ .set(OFFSETS_SNAPSHOT_PROP, "{\"0\":1}")
+ .commit();
+
+ Table frozenTable = catalog.loadTable(TABLE_IDENTIFIER);
+
+ // return the original table state on the first load, so that the update
will happen
+ // during the commit refresh
+
when(catalog.loadTable(TABLE_IDENTIFIER)).thenReturn(frozenTable).thenCallRealMethod();
+
+ // Independently update the offsets
+ table
+ .newAppend()
+ .appendFile(EventTestUtil.createDataFile())
+ .set(OFFSETS_SNAPSHOT_PROP, "{\"0\":7}")
+ .commit();
+
+ table.refresh();
+ assertThat(table.snapshots()).hasSize(2);
+ Snapshot firstSnapshot = table.currentSnapshot();
+ assertThat(firstSnapshot.summary()).containsEntry(OFFSETS_SNAPSHOT_PROP,
"{\"0\":7}");
+
+ // Trigger commit to the table
+ coordinatorTest(
+ ImmutableList.of(EventTestUtil.createDataFile()), ImmutableList.of(),
EventTestUtil.now());
+
+ // Assert that the table was not updated and offsets remain
+ table.refresh();
+ assertThat(table.snapshots()).hasSize(2);
Review Comment:
minor if we can just assert the 2 snapshot ids instead ?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]