aihuaxu commented on code in PR #13137:
URL: https://github.com/apache/iceberg/pull/13137#discussion_r2280549899
##########
api/src/test/java/org/apache/iceberg/variants/VariantTestUtil.java:
##########
@@ -140,6 +144,15 @@ static SerializedShortString createShortString(String
string) {
return SerializedShortString.from(buffer, buffer.get(0));
}
+ public static VariantPrimitive<?> createSerializedPrimitive(int
primitiveType, byte[] bytes) {
+ byte[] header = new byte[1];
+ header[0] = primitiveHeader(primitiveType);
+ byte[] primitives = new byte[bytes.length + header.length];
+ System.arraycopy(header, 0, primitives, 0, header.length);
+ System.arraycopy(bytes, 0, primitives, header.length, bytes.length);
+ return SerializedPrimitive.from(primitives);
+ }
Review Comment:
Probably can implement using byteBuffer:
```
public static VariantPrimitive<?> createSerializedPrimitive(
int primitiveType, byte.[] bytes) {
byte header = primitiveHeader(primitiveType);
ByteBuffer buffer = ByteBuffer.allocate(1 +
bytes.length).order(ByteOrder.LITTLE_ENDIAN);
buffer.put(0, header);
buffer.position(1);
buffer.put(bytes);
buffer.flip();
return SerializedPrimitive.from(buffer, header);
}
```
##########
api/src/test/java/org/apache/iceberg/expressions/TestExpressionUtil.java:
##########
@@ -1050,6 +1063,248 @@ public void testSelectsPartitions() {
.isFalse();
}
+ @Test
+ public void testSanitizeVariantArray() {
+ Expression bound =
+ Binder.bind(
+ STRUCT,
+ equal("var", Variant.of(VariantMetadata.empty(),
createArrayWithNestedTypes())));
+ assertEquals(
+ Expressions.equal(
+ "var",
+ "[(hash-2024a117), {(hash-a): (2-digit-INT8)}, [(2-digit-INT8),
(timestamp), (date)]]"),
+ ExpressionUtil.sanitize(bound));
+ }
+
+ @Test
+ public void testSanitizeVariantPrimitive() {
+ Expression int8Bound =
+ Binder.bind(
+ STRUCT,
+ equal(
+ "var",
+ Variant.of(
+ VariantMetadata.empty(),
+ VariantTestUtil.createSerializedPrimitive(3, new byte[]
{(byte) 32}))));
+ assertEquals(Expressions.equal("var", "(2-digit-INT8)"),
ExpressionUtil.sanitize(int8Bound));
+ Expression int16Bound =
+ Binder.bind(
+ STRUCT,
+ equal(
+ "var",
+ Variant.of(
+ VariantMetadata.empty(),
+ VariantTestUtil.createSerializedPrimitive(4, new byte[]
{(byte) 0xD2, 0x04}))));
+ assertEquals(Expressions.equal("var", "(4-digit-INT16)"),
ExpressionUtil.sanitize(int16Bound));
+ Expression doubleBound =
+ Binder.bind(
+ STRUCT,
+ equal(
+ "var",
+ Variant.of(
+ VariantMetadata.empty(),
+ VariantTestUtil.createSerializedPrimitive(
+ 7,
+ new byte[] {
+ (byte) 0xB1, 0x1C, 0x6C, (byte) 0xB1, (byte) 0xF4,
0x10, 0x22, 0x11
+ }))));
+ assertEquals(
+ Expressions.equal("var", "(-224-digit-DOUBLE)"),
ExpressionUtil.sanitize(doubleBound));
+
+ Expression timestamp =
+ Binder.bind(
+ STRUCT,
+ equal(
+ "var",
+ Variant.of(
+ VariantMetadata.empty(),
+ VariantTestUtil.createSerializedPrimitive(
+ 12,
+ new byte[] {
+ 0x18, (byte) 0xD3, (byte) 0xB1, (byte) 0xD6, 0x07,
0x57, 0x05, 0x00
+ }))));
+ assertEquals(Expressions.equal("var", "(timestamp)"),
ExpressionUtil.sanitize(timestamp));
+ }
+
+ @Test
+ public void testSanitizeVariantObject() {
+ Map<String, VariantValue> data = new HashMap<String, VariantValue>();
+ data.put("event_id_8", VariantTestUtil.createSerializedPrimitive(3, new
byte[] {(byte) 32}));
+ data.put(
+ "event_id_16",
+ VariantTestUtil.createSerializedPrimitive(4, new byte[] {(byte) 0xD2,
0x04}));
+ data.put(
+ "event_id_32",
+ VariantTestUtil.createSerializedPrimitive(
+ 5, new byte[] {(byte) 0xD2, 0x02, (byte) 0x96, 0x49}));
+ data.put(
+ "event_id_64",
+ VariantTestUtil.createSerializedPrimitive(
+ 6, new byte[] {(byte) 0xB1, 0x1C, 0x6C, (byte) 0xB1, (byte) 0xF4,
0x10, 0x22, 0x11}));
+ data.put("event_name", VariantTestUtil.createString("test"));
+ data.put(
+ "event_float",
+ VariantTestUtil.createSerializedPrimitive(
+ 14, new byte[] {(byte) 0xD2, 0x02, (byte) 0x96, 0x49}));
+ data.put(
+ "event_double",
+ VariantTestUtil.createSerializedPrimitive(
+ 7, new byte[] {(byte) 0xB1, 0x1C, 0x6C, (byte) 0xB1, (byte) 0xF4,
0x10, 0x22, 0x11}));
+ byte[] empty = new byte[0];
+ data.put("event_bool_t", VariantTestUtil.createSerializedPrimitive(1,
empty));
+ data.put("event_bool_f", VariantTestUtil.createSerializedPrimitive(2,
empty));
+ data.put(
+ "event_id_dec4",
+ VariantTestUtil.createSerializedPrimitive(
+ 8, new byte[] {0x04, (byte) 0xD2, 0x02, (byte) 0x96, 0x49}));
+ data.put(
+ "event_id_dec8", // scale=9
+ VariantTestUtil.createSerializedPrimitive(
+ 9,
+ new byte[] {
+ 0x09, (byte) 0xB1, 0x1C, 0x6C, (byte) 0xB1, (byte) 0xF4, 0x10,
0x22, 0x11
+ }));
+ data.put(
+ "event_id_dec16", // scale=9
+ VariantTestUtil.createSerializedPrimitive(
+ 10,
+ new byte[] {
+ 0x09,
+ 0x15,
+ 0x71,
+ 0x34,
+ (byte) 0xB0,
+ (byte) 0xB8,
+ (byte) 0x87,
+ 0x10,
+ (byte) 0x89,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00,
+ 0x00
+ }));
+ data.put(
+ "event_date",
+ VariantTestUtil.createSerializedPrimitive(11, new byte[] {(byte) 0xF4,
0x43, 0x00, 0x00}));
+ data.put("event_timestamp_tz", createTimestamp(12));
+ data.put("event_timestamp_ntz", createTimestamp(13));
+ data.put(
+ "event_time",
+ VariantTestUtil.createSerializedPrimitive(
+ 17,
+ new byte[] {
+ (byte) 0x80,
+ (byte) 0xa8,
+ (byte) 0x4b,
+ (byte) 0xb7,
+ (byte) 0x02,
+ (byte) 0x00,
+ (byte) 0x00,
+ 0x00
+ }));
+ data.put("event_timestamp_tz_nanos", createTimestampNanos(18));
+ data.put("event_timestamp_ntz_nanos", createTimestampNanos(19));
+ data.put(
+ "event_uuid",
+ VariantTestUtil.createSerializedPrimitive(
+ 20,
+ new byte[] {
+ (byte) 0xf2,
+ 0x4f,
+ (byte) 0x9b,
+ 0x64,
+ (byte) 0x81,
+ (byte) 0xfa,
+ 0x49,
+ (byte) 0xd1,
+ (byte) 0xb7,
+ 0x4e,
+ (byte) 0x8c,
+ 0x09,
+ (byte) 0xa6,
+ (byte) 0xe3,
+ 0x1c,
+ 0x56
+ }));
+ data.put(
+ "event_binary",
+ VariantTestUtil.createSerializedPrimitive(
+ 15, new byte[] {0x05, 0x00, 0x00, 0x00, 'a', 'b', 'c', 'd', 'e'}));
+
+ data.put("event_array", createArrayWithNestedTypes());
+ Variant value = VariantTestUtil.variant(data);
+ Expression bound = Binder.bind(STRUCT, equal("var", value));
+ assertEquals(
+ Expressions.equal(
+ "var",
+ "{(hash-event_array): "
+ + "[(hash-2024a117), {(hash-event_array): (2-digit-INT8)},
[(2-digit-INT8), (timestamp), (date)]], (hash-event_binary): (hash-04584bd6), "
+ + "(hash-event_bool_f): (hash-3682a0b4), "
+ + "(hash-event_bool_t): (hash-451a5465), "
+ + "(hash-event_date): (date), "
+ + "(hash-event_double): (-224-digit-DOUBLE), "
+ + "(hash-event_float): (7-digit-FLOAT), "
+ + "(hash-event_id_16): (4-digit-INT16), "
+ + "(hash-event_id_32): (10-digit-INT32), "
+ + "(hash-event_id_64): (19-digit-INT64), "
+ + "(hash-event_id_8): (2-digit-INT8), "
+ + "(hash-event_id_dec16): (10-digit-DECIMAL16), "
+ + "(hash-event_id_dec4): (6-digit-DECIMAL4), "
+ + "(hash-event_id_dec8): (10-digit-DECIMAL8), "
+ + "(hash-event_name): (hash-79b17dd6), "
+ + "(hash-event_time): (time), "
+ + "(hash-event_timestamp_ntz): (timestamp), "
+ + "(hash-event_timestamp_ntz_nanos): (timestamp), "
+ + "(hash-event_timestamp_tz): (timestamp), "
+ + "(hash-event_timestamp_tz_nanos): (timestamp), "
+ + "(hash-event_uuid): (hash-54e07fa7)}"),
+ ExpressionUtil.sanitize(bound));
+ }
+
+ private VariantPrimitive<?> createTimestampNanos(int primitiveHeader) {
Review Comment:
You can change this to static method. Same for the following two.
##########
api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java:
##########
@@ -419,7 +427,7 @@ public <T> String predicate(BoundPredicate<T> pred) {
.map(lit -> sanitize(pred.term().type(), lit,
nowMicros, today))
.collect(Collectors.toList()))
.stream()
- .collect(Collectors.joining(", ", "(", ")"));
Review Comment:
Can you revert this change? Seems unnecessary.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]