tanmayrauth commented on code in PR #1112:
URL: https://github.com/apache/iceberg-go/pull/1112#discussion_r3283697419
##########
table/partitioned_fanout_writer.go:
##########
@@ -464,3 +472,66 @@ func getArrowValueAsIcebergLiteral(column arrow.Array, row
int) (iceberg.Literal
return nil, fmt.Errorf("unsupported value type: %T", val)
}
}
+
+func timestampLiteralFromArrow(arr *array.Timestamp, row int, sourceType
iceberg.Type) (iceberg.Literal, error) {
+ timestampType := arr.DataType().(*arrow.TimestampType)
+ value := int64(arr.Value(row))
+
+ switch sourceType.(type) {
+ case iceberg.TimestampType, iceberg.TimestampTzType:
+ micros, err := arrowTimestampToMicros(value, timestampType.Unit)
+ if err != nil {
+ return nil, err
+ }
+
+ return iceberg.NewLiteral(iceberg.Timestamp(micros)), nil
+ case iceberg.TimestampNsType, iceberg.TimestampTzNsType:
+ nanos, err := arrowTimestampToNanos(value, timestampType.Unit)
+ if err != nil {
+ return nil, err
+ }
+
+ return iceberg.NewLiteral(iceberg.TimestampNano(nanos)), nil
+ default:
+ return nil, fmt.Errorf("cannot convert arrow timestamp to
iceberg literal for source type %v", sourceType)
+ }
+}
+
+func arrowTimestampToMicros(value int64, unit arrow.TimeUnit) (int64, error) {
+ switch unit {
+ case arrow.Second:
+ return scaleTimestamp(value, 1_000_000)
+ case arrow.Millisecond:
+ return scaleTimestamp(value, 1_000)
+ case arrow.Microsecond:
+ return value, nil
+ case arrow.Nanosecond:
+ return value / 1_000, nil
Review Comment:
Go's / truncates toward zero, but unit downconversion should floor. For
pre-epoch values this rounds the wrong way: ns=-1500 gives -1 here, but the
correct μs bin is -2 ([-2000, -1000)). Same class of bug this PR is fixing,
wrong partition routing for negative timestamps.
Try
```
case arrow.Nanosecond:
return math.FloorDiv(value, 1_000), nil
```
Can you please add a regression test with a negative ns value (e.g. one
second before epoch with a sub-μs offset) asserting the partition path.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]