zeroshade commented on code in PR #427: URL: https://github.com/apache/iceberg-go/pull/427#discussion_r2082119647
########## manifest.go: ########## @@ -735,6 +739,7 @@ func (c *ManifestReader) ReadEntry() (ManifestEntry, error) { tmp.inherit(c.file) if fieldToIDMap, ok := tmp.DataFile().(hasFieldToIDMap); ok { fieldToIDMap.setFieldNameToIDMap(c.fieldNameToID) + fieldToIDMap.setFieldIDToNameMap(c.fieldIDToName) Review Comment: we could probably just have `setFieldNameToIDMap` automatically generate the inverse map (or vice versa) rather than having to generate both maps in `getFieldIDMap` ########## manifest.go: ########## @@ -1480,6 +1485,33 @@ func avroPartitionData(input map[string]any, nameToID map[string]int, logicalTyp return out } +func avroFieldPartitionData(input map[int]any, idToName map[int]string, logicalTypes map[int]avro.LogicalType) map[int]any { Review Comment: the `idToName` map isn't being used at all, we should probably remove that as an argument ########## manifest.go: ########## @@ -1527,11 +1561,22 @@ func (d *dataFile) initializeMapData() { d.distinctCntMap = avroColMapToMap(d.DistinctCounts) d.lowerBoundMap = avroColMapToMap(d.LowerBounds) d.upperBoundMap = avroColMapToMap(d.UpperBounds) + // dataFile read from avro + if len(d.fieldIDToPartitionData) < len(d.PartitionData) { + d.fieldIDToPartitionData = make(map[int]any, len(d.PartitionData)) + for k, v := range d.PartitionData { + if id, ok := d.fieldNameToID[k]; ok { + d.fieldIDToPartitionData[id] = v + } + } + } d.PartitionData = avroPartitionData(d.PartitionData, d.fieldNameToID, d.fieldIDToLogicalType) + d.fieldIDToPartitionData = avroFieldPartitionData(d.fieldIDToPartitionData, d.fieldIDToName, d.fieldIDToLogicalType) Review Comment: could we pass the NameToID map into `avroPartitionData` and just do the mapping there? ########## manifest.go: ########## @@ -415,31 +415,33 @@ func getFieldIDMap(sc avro.Schema) (map[string]int, map[int]avro.LogicalType) { } result := make(map[string]int) + fieldIDToName := make(map[int]string) Review Comment: do we really need both of these maps? especially given that one is just the inverse of the other. ########## manifest_test.go: ########## @@ -1074,7 +1074,7 @@ func (m *ManifestTestSuite) TestManifestEntryBuilder() { EntryContentData, "sample.parquet", ParquetFile, - map[string]any{"int": int(1), "datetime": time.Unix(1925, 0).UnixMicro()}, + nil, Review Comment: instead of changing this to nil, we should be using the correct mapping here -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org