aihuaxu commented on code in PR #13137:
URL: https://github.com/apache/iceberg/pull/13137#discussion_r2229003818


##########
api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java:
##########
@@ -646,6 +655,92 @@ private static String sanitizeSimpleString(CharSequence 
value) {
     return String.format(Locale.ROOT, "(hash-%08x)", HASH_FUNC.apply(value));
   }
 
+  private static String sanitizeVariant(Variant value, long now, int today) {
+    return sanitizeVariant(value.value(), now, today);
+  }
+
+  private static String sanitizeVariant(VariantValue value, long now, int 
today) {
+    if (value instanceof VariantObject) {
+      return sanitizeVariantObject(value.asObject(), now, today);
+    } else if (value instanceof VariantPrimitive) {
+      return sanitizeVariantValue(value.asPrimitive(), value.type(), now, 
today);
+    } else {
+      return sanitizeVariantArray(value.asArray(), now, today);
+    }
+  }
+
+  private static String sanitizeVariantObject(VariantObject value, long now, 
int today) {
+    StringBuilder builder = new StringBuilder();
+    builder.append("{");
+    boolean first = true;
+    for (String field : value.fieldNames()) {
+      if (first) {
+        first = false;
+      } else {
+        builder.append(", ");
+      }
+      builder.append(String.format(Locale.ROOT, "(hash-%s)", field)).append(": 
");
+      VariantValue fieldValue = value.get(field);
+      PhysicalType fieldType = fieldValue.type();
+      builder.append(sanitizeVariantValue(fieldValue, fieldType, now, today));
+    }
+    builder.append("}");
+    return builder.toString();
+  }
+
+  private static String sanitizeVariantArray(VariantArray value, long now, int 
today) {
+    StringBuilder builder = new StringBuilder();
+    builder.append("{");

Review Comment:
   I guess we should use '[' instead of '{' for array.



##########
api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java:
##########
@@ -646,6 +655,92 @@ private static String sanitizeSimpleString(CharSequence 
value) {
     return String.format(Locale.ROOT, "(hash-%08x)", HASH_FUNC.apply(value));
   }
 
+  private static String sanitizeVariant(Variant value, long now, int today) {
+    return sanitizeVariant(value.value(), now, today);
+  }
+
+  private static String sanitizeVariant(VariantValue value, long now, int 
today) {
+    if (value instanceof VariantObject) {
+      return sanitizeVariantObject(value.asObject(), now, today);
+    } else if (value instanceof VariantPrimitive) {
+      return sanitizeVariantValue(value.asPrimitive(), value.type(), now, 
today);
+    } else {
+      return sanitizeVariantArray(value.asArray(), now, today);
+    }
+  }
+
+  private static String sanitizeVariantObject(VariantObject value, long now, 
int today) {
+    StringBuilder builder = new StringBuilder();
+    builder.append("{");
+    boolean first = true;
+    for (String field : value.fieldNames()) {
+      if (first) {
+        first = false;
+      } else {
+        builder.append(", ");
+      }
+      builder.append(String.format(Locale.ROOT, "(hash-%s)", field)).append(": 
");
+      VariantValue fieldValue = value.get(field);
+      PhysicalType fieldType = fieldValue.type();
+      builder.append(sanitizeVariantValue(fieldValue, fieldType, now, today));
+    }
+    builder.append("}");
+    return builder.toString();
+  }
+
+  private static String sanitizeVariantArray(VariantArray value, long now, int 
today) {
+    StringBuilder builder = new StringBuilder();
+    builder.append("{");
+    boolean first = true;
+    for (int i = 0; i < value.numElements(); i++) {
+      if (first) {
+        first = false;
+      } else {
+        builder.append(", ");
+      }
+      builder.append(sanitizeVariantValue(value.get(i), value.get(i).type(), 
now, today));
+    }
+    builder.append("}");
+    return builder.toString();
+  }
+
+  private static String sanitizeVariantValue(
+      VariantValue fieldValue, PhysicalType fieldType, long now, int today) {
+    StringBuilder builder = new StringBuilder();
+    switch (fieldType) {
+      case INT8:
+      case INT16:
+      case INT32:
+      case INT64:
+      case FLOAT:
+      case DOUBLE:
+      case DECIMAL4:
+      case DECIMAL8:
+      case DECIMAL16:
+        builder.append(sanitizeNumber((Number) fieldValue.asPrimitive().get(), 
fieldType.name()));
+        break;
+      case DATE:
+        builder.append(sanitizeDate(((Number) 
fieldValue.asPrimitive().get()).intValue(), today));
+        break;
+      case TIMESTAMPTZ:
+      case TIMESTAMPNTZ:
+      case TIMESTAMPTZ_NANOS:
+      case TIMESTAMPNTZ_NANOS:
+        builder.append(
+            sanitizeTimestamp(((Number) 
fieldValue.asPrimitive().get()).longValue(), now));
+        break;
+      case TIME:
+        return "(time)";
+      case ARRAY:

Review Comment:
   We are missing `case OBJECT` here so if we have nested object, then it's not 
handled. 



##########
api/src/test/java/org/apache/iceberg/expressions/TestExpressionUtil.java:
##########
@@ -1050,6 +1059,194 @@ public void testSelectsPartitions() {
         .isFalse();
   }
 
+  @Test
+  public void testSanitizeVariantArray() {

Review Comment:
   Can you add nested array and nested object in the array for testing?



##########
api/src/test/java/org/apache/iceberg/variants/VariantTestUtil.java:
##########
@@ -107,6 +126,28 @@ static SerializedPrimitive createString(String string) {
     return SerializedPrimitive.from(buffer, buffer.get(0));
   }
 
+  public static SerializedArray createMixedArray() {

Review Comment:
   I think we can use the existing createArray() to pass in the elements rather 
than add a new method?



##########
api/src/test/java/org/apache/iceberg/variants/VariantTestUtil.java:
##########
@@ -107,6 +126,28 @@ static SerializedPrimitive createString(String string) {
     return SerializedPrimitive.from(buffer, buffer.get(0));
   }
 
+  public static SerializedArray createMixedArray() {
+    ByteBuffer nestedBuffer = VariantTestUtil.createArray(A, C, D);
+    SerializedArray nested =
+        SerializedArray.from(EMPTY_METADATA, nestedBuffer, 
nestedBuffer.get(0));
+    ByteBuffer buffer =
+        VariantTestUtil.createArray(DATE, I34, STR, nested, NULL, E, B, FALSE, 
TRUE, I1234);
+    return SerializedArray.from(EMPTY_METADATA, buffer, buffer.get(0));
+  }
+
+  static byte primitiveHeader(int primitiveType) {

Review Comment:
   Why do we remove private? Seems it's only used here.



##########
api/src/test/java/org/apache/iceberg/expressions/TestExpressionUtil.java:
##########
@@ -1050,6 +1059,194 @@ public void testSelectsPartitions() {
         .isFalse();
   }
 
+  @Test
+  public void testSanitizeVariantArray() {
+    Expression bound =
+        Binder.bind(
+            STRUCT,
+            equal("var", Variant.of(VariantMetadata.empty(), 
VariantTestUtil.createMixedArray())));
+    assertEquals(
+        Expressions.equal(
+            "var",
+            "{(date), (2-digit-INT8), (hash-2024a117), {(hash-7b813b77), 
(hash-512b3ddf), (hash-36f6eef4)}, (hash-6926fdc4), "
+                + "(hash-06eb5017), (hash-64f79f23), (hash-3682a0b4), 
(hash-451a5465), (4-digit-INT16)}"),
+        ExpressionUtil.sanitize(bound));
+  }
+
+  @Test
+  public void testSanitizeVariantPrimitive() {
+    Expression int8Bound =
+        Binder.bind(
+            STRUCT,
+            equal(
+                "var",
+                Variant.of(
+                    VariantMetadata.empty(),
+                    VariantTestUtil.createSerializedPrimitive(3, new byte[] 
{(byte) 32}))));
+    assertEquals(Expressions.equal("var", "(2-digit-INT8)"), 
ExpressionUtil.sanitize(int8Bound));
+    Expression int16Bound =
+        Binder.bind(
+            STRUCT,
+            equal(
+                "var",
+                Variant.of(
+                    VariantMetadata.empty(),
+                    VariantTestUtil.createSerializedPrimitive(4, new byte[] 
{(byte) 0xD2, 0x04}))));
+    assertEquals(Expressions.equal("var", "(4-digit-INT16)"), 
ExpressionUtil.sanitize(int16Bound));
+    Expression doubleBound =
+        Binder.bind(
+            STRUCT,
+            equal(
+                "var",
+                Variant.of(
+                    VariantMetadata.empty(),
+                    VariantTestUtil.createSerializedPrimitive(
+                        7,
+                        new byte[] {
+                          (byte) 0xB1, 0x1C, 0x6C, (byte) 0xB1, (byte) 0xF4, 
0x10, 0x22, 0x11
+                        }))));
+    assertEquals(
+        Expressions.equal("var", "(-224-digit-DOUBLE)"), 
ExpressionUtil.sanitize(doubleBound));
+
+    Expression timestamp =
+        Binder.bind(
+            STRUCT,
+            equal(
+                "var",
+                Variant.of(
+                    VariantMetadata.empty(),
+                    VariantTestUtil.createSerializedPrimitive(
+                        12,
+                        new byte[] {
+                          0x18, (byte) 0xD3, (byte) 0xB1, (byte) 0xD6, 0x07, 
0x57, 0x05, 0x00
+                        }))));
+    assertEquals(Expressions.equal("var", "(timestamp)"), 
ExpressionUtil.sanitize(timestamp));
+  }
+
+  @Test
+  public void testSanitizeVariantObject() {

Review Comment:
   Can you add a nested object in the object for testing?



##########
api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java:
##########
@@ -646,6 +655,92 @@ private static String sanitizeSimpleString(CharSequence 
value) {
     return String.format(Locale.ROOT, "(hash-%08x)", HASH_FUNC.apply(value));
   }
 
+  private static String sanitizeVariant(Variant value, long now, int today) {
+    return sanitizeVariant(value.value(), now, today);
+  }
+
+  private static String sanitizeVariant(VariantValue value, long now, int 
today) {
+    if (value instanceof VariantObject) {
+      return sanitizeVariantObject(value.asObject(), now, today);
+    } else if (value instanceof VariantPrimitive) {
+      return sanitizeVariantValue(value.asPrimitive(), value.type(), now, 
today);
+    } else {
+      return sanitizeVariantArray(value.asArray(), now, today);
+    }
+  }
+
+  private static String sanitizeVariantObject(VariantObject value, long now, 
int today) {
+    StringBuilder builder = new StringBuilder();
+    builder.append("{");
+    boolean first = true;
+    for (String field : value.fieldNames()) {
+      if (first) {
+        first = false;
+      } else {
+        builder.append(", ");
+      }
+      builder.append(String.format(Locale.ROOT, "(hash-%s)", field)).append(": 
");
+      VariantValue fieldValue = value.get(field);
+      PhysicalType fieldType = fieldValue.type();
+      builder.append(sanitizeVariantValue(fieldValue, fieldType, now, today));
+    }
+    builder.append("}");
+    return builder.toString();
+  }
+
+  private static String sanitizeVariantArray(VariantArray value, long now, int 
today) {
+    StringBuilder builder = new StringBuilder();
+    builder.append("{");

Review Comment:
   Also you can simply the code using the following syntax:
   
   `list.stream().collect(Collectors.joining(", ", "[", "]"));`



##########
api/src/test/java/org/apache/iceberg/variants/VariantTestUtil.java:
##########
@@ -98,7 +117,7 @@ private static int writeBufferAbsolute(ByteBuffer buffer, 
int offset, ByteBuffer
   }
 
   /** Creates a random string primitive of the given length for forcing large 
offset sizes */
-  static SerializedPrimitive createString(String string) {
+  public static SerializedPrimitive createString(String string) {

Review Comment:
   We are exposing SerializedPrimitive, SerializedArray as public while they 
are package scope.
   
   Class 'SerializedPrimitive' is exposed outside its defined visibility scope.
   
   I think we can return VariantPrimitive<?>, VariantArray instead.  



##########
api/src/test/java/org/apache/iceberg/variants/VariantTestUtil.java:
##########
@@ -107,6 +126,28 @@ static SerializedPrimitive createString(String string) {
     return SerializedPrimitive.from(buffer, buffer.get(0));
   }
 
+  public static SerializedArray createMixedArray() {
+    ByteBuffer nestedBuffer = VariantTestUtil.createArray(A, C, D);
+    SerializedArray nested =
+        SerializedArray.from(EMPTY_METADATA, nestedBuffer, 
nestedBuffer.get(0));
+    ByteBuffer buffer =
+        VariantTestUtil.createArray(DATE, I34, STR, nested, NULL, E, B, FALSE, 
TRUE, I1234);
+    return SerializedArray.from(EMPTY_METADATA, buffer, buffer.get(0));
+  }
+
+  static byte primitiveHeader(int primitiveType) {
+    return (byte) (primitiveType << 2);
+  }
+
+  public static SerializedPrimitive createSerializedPrimitive(int 
primitiveType, byte[] bytes) {

Review Comment:
   We can call createPrimitive() and return VariantPrimitive<?> instead.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to