ajantha-bhat commented on code in PR #9455: URL: https://github.com/apache/iceberg/pull/9455#discussion_r1447345339
########## spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/source/SparkTable.java: ########## @@ -405,15 +407,25 @@ public boolean equals(Object other) { return false; } - // use only name in order to correctly invalidate Spark cache + // use name and effective snapshot id to support time travel SparkTable that = (SparkTable) other; - return icebergTable.name().equals(that.icebergTable.name()); + return icebergTable.name().equals(that.icebergTable.name()) + && Objects.equals(effectiveSnapshotId(), that.effectiveSnapshotId()); } @Override public int hashCode() { - // use only name in order to correctly invalidate Spark cache - return icebergTable.name().hashCode(); + // use name and effective snapshot id to support time travel + return Objects.hash(icebergTable.name(), effectiveSnapshotId()); + } + + public Long effectiveSnapshotId() { Review Comment: should we have a local variable `effectiveSnapshotId` and initialize the `effectiveSnapshotId` in the constructor instead of computing on every equals() and hashCode() call? ########## spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkTable.java: ########## @@ -53,4 +53,41 @@ public void testTableEquality() throws NoSuchTableException { assertThat(table1).as("References must be different").isNotSameAs(table2); assertThat(table1).as("Tables must be equivalent").isEqualTo(table2); } + + @TestTemplate + public void testEffectiveSnapshotIdEquality() throws NoSuchTableException { + CatalogManager catalogManager = spark.sessionState().catalogManager(); + TableCatalog catalog = (TableCatalog) catalogManager.catalog(catalogName); + Identifier identifier = Identifier.of(tableIdent.namespace().levels(), tableIdent.name()); + + sql("INSERT INTO TABLE %s VALUES (1, 'a')", tableName); + + SparkTable table = (SparkTable) catalog.loadTable(identifier); + final long version1Snapshot = table.effectiveSnapshotId(); + final String version1 = "VERSION_1"; + table.table().manageSnapshots().createTag(version1, version1Snapshot).commit(); + + SparkTable firstSnapshotTable = table.copyWithSnapshotId(version1Snapshot); + SparkTable firstTagTable = table.copyWithBranch(version1); + + sql("UPDATE %s SET data = 'b'", tableName); + + final String version2 = "VERSION_2"; + table.table().manageSnapshots().createTag(version2, table.effectiveSnapshotId()).commit(); + + SparkTable secondTagTable = table.copyWithBranch(version2); + + assertThat(firstSnapshotTable) + .as("References for two different SparkTables must be different") + .isNotSameAs(firstTagTable); + assertThat(firstSnapshotTable) + .as("The different snapshots with same id must be equal") + .isEqualTo(firstTagTable); + assertThat(firstTagTable) + .as("The different snapshots should not match") + .isNotEqualTo(secondTagTable); + assertThat(table) + .as("The SparkTable should points to latest snapshot") + .isEqualTo(secondTagTable); Review Comment: Can we also add a SQL query that queries both the tags and validate the results like you have mentioned in the issue? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org