zeroshade commented on code in PR #427:
URL: https://github.com/apache/iceberg-go/pull/427#discussion_r2082119647


##########
manifest.go:
##########
@@ -735,6 +739,7 @@ func (c *ManifestReader) ReadEntry() (ManifestEntry, error) 
{
        tmp.inherit(c.file)
        if fieldToIDMap, ok := tmp.DataFile().(hasFieldToIDMap); ok {
                fieldToIDMap.setFieldNameToIDMap(c.fieldNameToID)
+               fieldToIDMap.setFieldIDToNameMap(c.fieldIDToName)

Review Comment:
   we could probably just have `setFieldNameToIDMap` automatically generate the 
inverse map (or vice versa) rather than having to generate both maps in 
`getFieldIDMap`



##########
manifest.go:
##########
@@ -1480,6 +1485,33 @@ func avroPartitionData(input map[string]any, nameToID 
map[string]int, logicalTyp
        return out
 }
 
+func avroFieldPartitionData(input map[int]any, idToName map[int]string, 
logicalTypes map[int]avro.LogicalType) map[int]any {

Review Comment:
   the `idToName` map isn't being used at all, we should probably remove that 
as an argument



##########
manifest.go:
##########
@@ -1527,11 +1561,22 @@ func (d *dataFile) initializeMapData() {
                d.distinctCntMap = avroColMapToMap(d.DistinctCounts)
                d.lowerBoundMap = avroColMapToMap(d.LowerBounds)
                d.upperBoundMap = avroColMapToMap(d.UpperBounds)
+               // dataFile read from avro
+               if len(d.fieldIDToPartitionData) < len(d.PartitionData) {
+                       d.fieldIDToPartitionData = make(map[int]any, 
len(d.PartitionData))
+                       for k, v := range d.PartitionData {
+                               if id, ok := d.fieldNameToID[k]; ok {
+                                       d.fieldIDToPartitionData[id] = v
+                               }
+                       }
+               }
                d.PartitionData = avroPartitionData(d.PartitionData, 
d.fieldNameToID, d.fieldIDToLogicalType)
+               d.fieldIDToPartitionData = 
avroFieldPartitionData(d.fieldIDToPartitionData, d.fieldIDToName, 
d.fieldIDToLogicalType)

Review Comment:
   could we pass the NameToID map into `avroPartitionData` and just do the 
mapping there?



##########
manifest.go:
##########
@@ -415,31 +415,33 @@ func getFieldIDMap(sc avro.Schema) (map[string]int, 
map[int]avro.LogicalType) {
        }
 
        result := make(map[string]int)
+       fieldIDToName := make(map[int]string)

Review Comment:
   do we really need both of these maps? especially given that one is just the 
inverse of the other.



##########
manifest_test.go:
##########
@@ -1074,7 +1074,7 @@ func (m *ManifestTestSuite) TestManifestEntryBuilder() {
                EntryContentData,
                "sample.parquet",
                ParquetFile,
-               map[string]any{"int": int(1), "datetime": time.Unix(1925, 
0).UnixMicro()},
+               nil,

Review Comment:
   instead of changing this to nil, we should be using the correct mapping here



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to