kevinjqliu commented on code in PR #329:
URL: https://github.com/apache/iceberg-go/pull/329#discussion_r1989722553


##########
table/arrow_utils.go:
##########
@@ -892,3 +899,356 @@ func ToRequestedSchema(ctx context.Context, requested, 
fileSchema *iceberg.Schem
 
        return out, nil
 }
+
+func must[T any](v T, err error) T {
+       if err != nil {
+               panic(err)
+       }
+
+       return v
+}
+
+type metricModeType string
+
+const (
+       metricModeTruncate metricModeType = "truncate"
+       metricModeNone     metricModeType = "none"
+       metricModeCounts   metricModeType = "counts"
+       metricModeFull     metricModeType = "full"
+)
+
+type metricsMode struct {
+       typ metricModeType
+       len int
+}
+
+var truncationExpr = regexp.MustCompile(`^truncate\((\d+)\)$`)
+
+func matchMetricsMode(mode string) (metricsMode, error) {
+       sanitized := strings.ToLower(strings.TrimSpace(mode))
+       if strings.HasPrefix(sanitized, string(metricModeTruncate)) {
+               m := truncationExpr.FindStringSubmatch(sanitized)
+               if len(m) < 2 {
+                       return metricsMode{}, fmt.Errorf("malformed truncate 
metrics mode: %s", mode)
+               }
+
+               truncLen, err := strconv.Atoi(m[1])
+               if err != nil {
+                       return metricsMode{}, fmt.Errorf("malformed truncate 
metrics mode: %s", mode)
+               }
+
+               if truncLen <= 0 {
+                       return metricsMode{}, fmt.Errorf("invalid truncate 
length: %d", truncLen)
+               }
+
+               return metricsMode{typ: metricModeTruncate, len: truncLen}, nil
+       }
+
+       switch sanitized {
+       case string(metricModeNone):
+               return metricsMode{typ: metricModeNone}, nil
+       case string(metricModeCounts):
+               return metricsMode{typ: metricModeCounts}, nil
+       case string(metricModeFull):
+               return metricsMode{typ: metricModeFull}, nil
+       default:
+               return metricsMode{}, fmt.Errorf("unsupported metrics mode: 
%s", mode)
+       }
+}
+
+type statisticsCollector struct {
+       fieldID    int
+       icebergTyp iceberg.PrimitiveType
+       mode       metricsMode
+       colName    string
+}
+
+type arrowStatsCollector struct {
+       fieldID     int
+       schema      *iceberg.Schema
+       props       iceberg.Properties
+       defaultMode string
+}
+
+func (a *arrowStatsCollector) Schema(_ *iceberg.Schema, results func() 
[]statisticsCollector) []statisticsCollector {
+       return results()
+}
+
+func (a *arrowStatsCollector) Struct(_ iceberg.StructType, results []func() 
[]statisticsCollector) []statisticsCollector {
+       result := make([]statisticsCollector, 0, len(results))
+       for _, res := range results {
+               result = append(result, res()...)
+       }
+
+       return result
+}
+
+func (a *arrowStatsCollector) Field(field iceberg.NestedField, fieldRes func() 
[]statisticsCollector) []statisticsCollector {
+       a.fieldID = field.ID
+
+       return fieldRes()
+}
+
+func (a *arrowStatsCollector) List(list iceberg.ListType, elemResult func() 
[]statisticsCollector) []statisticsCollector {
+       a.fieldID = list.ElementID
+
+       return elemResult()
+}
+
+func (a *arrowStatsCollector) Map(m iceberg.MapType, keyResult func() 
[]statisticsCollector, valResult func() []statisticsCollector) 
[]statisticsCollector {
+       a.fieldID = m.KeyID
+       keyRes := keyResult()
+
+       a.fieldID = m.ValueID
+       valRes := valResult()
+
+       return append(keyRes, valRes...)
+}
+
+func (a *arrowStatsCollector) Primitive(dt iceberg.PrimitiveType) 
[]statisticsCollector {
+       colName, ok := a.schema.FindColumnName(a.fieldID)
+       if !ok {
+               return []statisticsCollector{}
+       }
+
+       metMode, err := matchMetricsMode(a.defaultMode)
+       if err != nil {
+               panic(err)
+       }
+
+       colMode, ok := a.props[MetricsModeColumnConfPrefix+"."+colName]
+       if ok {
+               metMode, err = matchMetricsMode(colMode)
+               if err != nil {
+                       panic(err)
+               }
+       }
+
+       switch dt.(type) {
+       case iceberg.StringType:
+       case iceberg.BinaryType:
+       default:
+               if metMode.typ == metricModeTruncate {
+                       metMode = metricsMode{typ: metricModeFull, len: 0}
+               }
+       }
+
+       isNested := strings.Contains(colName, ".")

Review Comment:
   this might be an issue for columns with `.` in its name



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to