mzzz-zzm commented on code in PR #1039:
URL: https://github.com/apache/iceberg-go/pull/1039#discussion_r3212353070
##########
manifest_test.go:
##########
@@ -2115,6 +2115,67 @@ func (m *ManifestTestSuite)
TestManifestRoundTripSortOrderID() {
m.Equal(expectedSortOrderID, *got)
}
+// writeManifestWithDistinctCounts builds a single-entry manifest at the given
+// version with distinct_counts populated for one column, round-trips it
through
+// WriteManifest + ReadManifest, and returns the distinct counts observed by
+// the reader.
+func (m *ManifestTestSuite) writeManifestWithDistinctCounts(version int)
map[int]int64 {
+ partitionSpec := NewPartitionSpecID(0)
+ snapshotID := int64(12345678)
+ seqNum := int64(9876)
+
+ dataFileBuilder, err := NewDataFileBuilder(
+ partitionSpec,
+ EntryContentData,
+ "s3://bucket/ns/table/data/distinct.parquet",
+ ParquetFile,
+ map[int]any{},
+ map[int]string{},
+ map[int]int{},
+ 10,
+ 10*1000,
+ )
+ m.Require().NoError(err)
+ dataFileBuilder.DistinctValueCounts(map[int]int64{1: 42})
+
+ var buf bytes.Buffer
+ file, err := WriteManifest(
+ "s3://bucket/ns/table/metadata/distinct.avro", &buf, version,
+ partitionSpec,
+ NewSchema(0,
+ NestedField{ID: 1, Name: "id", Type: Int64Type{},
Required: true},
+ ),
+ snapshotID,
+ []ManifestEntry{NewManifestEntry(
+ EntryStatusADDED,
+ &snapshotID,
+ &seqNum, &seqNum,
+ dataFileBuilder.Build(),
+ )},
+ )
+ m.Require().NoError(err)
+
+ entries, err := ReadManifest(file, &buf, false)
+ m.Require().NoError(err)
+ m.Require().Len(entries, 1)
+
+ return entries[0].DataFile().DistinctValueCounts()
+}
+
+// TestWriteManifestV3OmitsDistinctCounts verifies that v3 manifest writers do
+// not surface data_file.distinct_counts on the read side (deprecated by the v3
+// spec; parity with Java apache/iceberg#12182). Today this passes because none
+// of the per-version Avro record schemas in internal/avro_schemas.go declare
+// a distinct_counts field, so the field is dropped at encode time on every
+// version. The v3writerImpl.prepareEntry guard that clears DistinctCounts is
+// forward-compatibility insurance: if the v2 schema is later fixed to declare
+// distinct_counts (the v2 spec requires it; tracked separately), this test
+// keeps the v3 path compliant without further code changes.
Review Comment:
Fixed. The test comment is now short and only states the behavior under test
plus the Java parity reference / #1038 note. The PR-body-style explanation was
removed.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]