zeroshade commented on code in PR #524:
URL: https://github.com/apache/iceberg-go/pull/524#discussion_r2314393801
##########
manifest.go:
##########
@@ -1461,33 +1598,114 @@ func mapToAvroColMap[K comparable, V any](m map[K]V)
*[]colMap[K, V] {
return &out
}
-func avroPartitionData(input map[int]any, logicalTypes
map[int]avro.LogicalType) map[int]any {
+func avroPartitionData(input map[int]any, logicalTypes
map[int]avro.LogicalType, fixedSizes map[int]int) map[int]any {
out := make(map[int]any)
for k, v := range input {
if logical, ok := logicalTypes[k]; ok {
- switch logical {
- case avro.Date:
- out[k] =
Date(v.(time.Time).Truncate(24*time.Hour).Unix() / int64((time.Hour *
24).Seconds()))
- case avro.TimeMillis:
- out[k] = Time(v.(time.Duration).Milliseconds())
- case avro.TimeMicros:
- out[k] = Time(v.(time.Duration).Microseconds())
- case avro.TimestampMillis:
- out[k] =
Timestamp(v.(time.Time).UTC().UnixMilli())
- case avro.TimestampMicros:
- out[k] =
Timestamp(v.(time.Time).UTC().UnixMicro())
- default:
- out[k] = v
- }
-
- continue
+ out[k] = convertLogicalTypeValue(v, logical,
fixedSizes[k])
+ } else {
+ out[k] = convertDefaultValue(v, fixedSizes[k])
}
- out[k] = v
}
return out
}
+func convertLogicalTypeValue(v any, logicalType avro.LogicalType, fixedSize
int) any {
+ switch logicalType {
+ case avro.Date:
+ return convertDateValue(v)
+ case avro.TimeMicros:
+ return convertTimeMicrosValue(v)
+ case avro.TimestampMicros:
+ return convertTimestampMicrosValue(v)
+ case avro.Decimal:
+ return convertDecimalValue(v, fixedSize)
+ default:
+ return v
+ }
+}
+
+func convertDateValue(v any) any {
+ if t, ok := v.(time.Time); ok {
+ return map[string]any{"int.date":
int32(t.Truncate(24*time.Hour).Unix() / int64((time.Hour * 24).Seconds()))}
+ }
+ if d, ok := v.(Date); ok {
+ return map[string]any{"int.date": int32(d)}
+ }
+
+ return v
+}
+
+func convertTimeMicrosValue(v any) any {
+ if t, ok := v.(Time); ok {
+ return map[string]any{"long.time-micros": int64(t)}
+ }
+ if d, ok := v.(time.Duration); ok {
+ return map[string]any{"long.time-micros": d.Microseconds()}
+ }
+
+ return v
+}
+
+func convertTimestampMicrosValue(v any) any {
+ if t, ok := v.(time.Time); ok {
+ return map[string]any{"long.timestamp-micros":
t.UTC().UnixMicro()}
+ }
+ if ts, ok := v.(Timestamp); ok {
+ return map[string]any{"long.timestamp-micros": int64(ts)}
+ }
+
+ return v
+}
+
+func convertDecimalValue(v any, fixedSize int) any {
+ if v == nil {
+ return map[string]any{"null": nil}
+ }
+
+ dec, ok := v.(Decimal)
+ if !ok {
+ return v
+ }
+
+ bytes, err := DecimalLiteral(dec).MarshalBinary()
+ if err != nil {
+ return v
+ }
+ fixedArray := convertToFixedArray(padOrTruncateBytes(bytes, fixedSize),
fixedSize)
Review Comment:
why converting to fixed array instead of just returning the decimal type?
##########
manifest.go:
##########
@@ -1461,33 +1598,114 @@ func mapToAvroColMap[K comparable, V any](m map[K]V)
*[]colMap[K, V] {
return &out
}
-func avroPartitionData(input map[int]any, logicalTypes
map[int]avro.LogicalType) map[int]any {
+func avroPartitionData(input map[int]any, logicalTypes
map[int]avro.LogicalType, fixedSizes map[int]int) map[int]any {
out := make(map[int]any)
for k, v := range input {
if logical, ok := logicalTypes[k]; ok {
- switch logical {
- case avro.Date:
- out[k] =
Date(v.(time.Time).Truncate(24*time.Hour).Unix() / int64((time.Hour *
24).Seconds()))
- case avro.TimeMillis:
- out[k] = Time(v.(time.Duration).Milliseconds())
- case avro.TimeMicros:
- out[k] = Time(v.(time.Duration).Microseconds())
- case avro.TimestampMillis:
- out[k] =
Timestamp(v.(time.Time).UTC().UnixMilli())
- case avro.TimestampMicros:
- out[k] =
Timestamp(v.(time.Time).UTC().UnixMicro())
- default:
- out[k] = v
- }
-
- continue
+ out[k] = convertLogicalTypeValue(v, logical,
fixedSizes[k])
+ } else {
+ out[k] = convertDefaultValue(v, fixedSizes[k])
}
- out[k] = v
}
return out
}
+func convertLogicalTypeValue(v any, logicalType avro.LogicalType, fixedSize
int) any {
+ switch logicalType {
+ case avro.Date:
+ return convertDateValue(v)
+ case avro.TimeMicros:
+ return convertTimeMicrosValue(v)
+ case avro.TimestampMicros:
+ return convertTimestampMicrosValue(v)
+ case avro.Decimal:
+ return convertDecimalValue(v, fixedSize)
+ default:
+ return v
+ }
+}
+
+func convertDateValue(v any) any {
+ if t, ok := v.(time.Time); ok {
+ return map[string]any{"int.date":
int32(t.Truncate(24*time.Hour).Unix() / int64((time.Hour * 24).Seconds()))}
+ }
+ if d, ok := v.(Date); ok {
+ return map[string]any{"int.date": int32(d)}
+ }
+
+ return v
+}
+
+func convertTimeMicrosValue(v any) any {
+ if t, ok := v.(Time); ok {
+ return map[string]any{"long.time-micros": int64(t)}
+ }
+ if d, ok := v.(time.Duration); ok {
+ return map[string]any{"long.time-micros": d.Microseconds()}
+ }
+
+ return v
+}
+
+func convertTimestampMicrosValue(v any) any {
+ if t, ok := v.(time.Time); ok {
+ return map[string]any{"long.timestamp-micros":
t.UTC().UnixMicro()}
+ }
+ if ts, ok := v.(Timestamp); ok {
+ return map[string]any{"long.timestamp-micros": int64(ts)}
+ }
+
+ return v
+}
+
+func convertDecimalValue(v any, fixedSize int) any {
+ if v == nil {
+ return map[string]any{"null": nil}
+ }
+
+ dec, ok := v.(Decimal)
+ if !ok {
+ return v
+ }
+
+ bytes, err := DecimalLiteral(dec).MarshalBinary()
+ if err != nil {
+ return v
+ }
+ fixedArray := convertToFixedArray(padOrTruncateBytes(bytes, fixedSize),
fixedSize)
+
+ return map[string]any{"fixed": fixedArray}
+}
+
+func convertDefaultValue(v any, fixedSize int) any {
+ if uuidVal, ok := v.(uuid.UUID); ok {
+ return uuidVal.String()
+ }
+
+ if bytes, ok := v.([]byte); ok && fixedSize > 0 {
+ return convertToFixedArray(padOrTruncateBytes(bytes,
fixedSize), fixedSize)
Review Comment:
why do we need to do this? We can't just return the uuid?
##########
exprs.go:
##########
@@ -482,8 +482,33 @@ func (b *boundRef[T]) Equals(other BoundTerm) bool {
}
func (b *boundRef[T]) Ref() BoundReference { return b }
-func (b *boundRef[T]) Field() NestedField { return b.field }
-func (b *boundRef[T]) Type() Type { return b.field.Type }
+
+func unwrapLogicalTypeValue(v any) any {
+ if m, ok := v.(map[string]any); ok {
+ if val, exists := m["long.timestamp-micros"]; exists {
+ if microseconds, ok := val.(int64); ok {
+ return Timestamp(microseconds)
+ }
+ }
+
+ if val, exists := m["int.date"]; exists {
+ if days, ok := val.(int32); ok {
+ return days
+ }
+ }
+
+ if val, exists := m["long.time-micros"]; exists {
+ if microseconds, ok := val.(int64); ok {
+ return Time(microseconds)
+ }
+ }
+ }
+
+ return v
Review Comment:
this conversion should be happening before we ever get this far. What
workflow would result in us getting here using `map[string]any`?
##########
manifest.go:
##########
@@ -416,30 +429,53 @@ func getFieldIDMap(sc avro.Schema) (map[string]int,
map[int]avro.LogicalType) {
result := make(map[string]int)
logicalTypes := make(map[int]avro.LogicalType)
+ fixedSizes := make(map[int]int)
+
entryField := getField(sc.(*avro.RecordSchema), "data_file")
partitionField := getField(entryField.Type().(*avro.RecordSchema),
"partition")
for _, field := range
partitionField.Type().(*avro.RecordSchema).Fields() {
- if fid, ok := field.Prop("field-id").(float64); ok {
- result[field.Name()] = int(fid)
- avroTyp := field.Type()
- if us, ok := avroTyp.(*avro.UnionSchema); ok {
- for _, t := range us.Types() {
- avroTyp = t
- }
+ var fid int
+ switch v := field.Prop("field-id").(type) {
Review Comment:
does the `field-id` come back as a float instead of an `int` for some reason?
##########
table/arrow_utils.go:
##########
@@ -1030,11 +1031,19 @@ func (sc *schemaCompatVisitor) isFieldCompat(lhs
iceberg.NestedField) bool {
func (sc *schemaCompatVisitor) Schema(s *iceberg.Schema, v func() bool) bool {
if !v() {
- pterm.DisableColor()
- tbl :=
pterm.DefaultTable.WithHasHeader(true).WithData(sc.errorData)
- tbl.Render()
- txt, _ := tbl.Srender()
- pterm.EnableColor()
+ var lines []string
+ lines = append(lines, " | Table Field |
Requested Field")
+
+ for i, row := range sc.errorData {
+ if i == 0 {
+ continue
+ }
+ if len(row) >= 3 {
+ lines = append(lines, fmt.Sprintf("%s | %-24s |
%s", row[0], row[1], row[2]))
+ }
+ }
Review Comment:
why the change here?
##########
manifest.go:
##########
@@ -416,30 +429,53 @@ func getFieldIDMap(sc avro.Schema) (map[string]int,
map[int]avro.LogicalType) {
result := make(map[string]int)
logicalTypes := make(map[int]avro.LogicalType)
+ fixedSizes := make(map[int]int)
+
entryField := getField(sc.(*avro.RecordSchema), "data_file")
partitionField := getField(entryField.Type().(*avro.RecordSchema),
"partition")
for _, field := range
partitionField.Type().(*avro.RecordSchema).Fields() {
- if fid, ok := field.Prop("field-id").(float64); ok {
- result[field.Name()] = int(fid)
- avroTyp := field.Type()
- if us, ok := avroTyp.(*avro.UnionSchema); ok {
- for _, t := range us.Types() {
- avroTyp = t
- }
+ var fid int
+ switch v := field.Prop("field-id").(type) {
+ case int:
+ fid = v
+ case int32:
+ fid = int(v)
+ case int64:
+ fid = int(v)
+ case float64:
+ fid = int(v)
+ default:
+ continue
+ }
+
+ result[field.Name()] = fid
+ avroTyp := field.Type()
+ if us, ok := avroTyp.(*avro.UnionSchema); ok {
+ for _, t := range us.Types() {
+ avroTyp = t
}
Review Comment:
should we confirm that we're not using null?
##########
manifest.go:
##########
@@ -960,6 +1003,37 @@ func (p *partitionFieldStats[T]) update(value any) (err
error) {
return nil
}
+func extractBytesFromFixed(fixedBytes interface{}) []byte {
+ switch fb := fixedBytes.(type) {
+ case []interface{}:
Review Comment:
What situation does this happen with? Where we get `[]interface{}` instead
of `[]byte`?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]