RussellSpitzer commented on code in PR #13284:
URL: https://github.com/apache/iceberg/pull/13284#discussion_r2190648642


##########
api/src/test/java/org/apache/iceberg/variants/TestSerializedObject.java:
##########
@@ -182,66 +182,81 @@ public void testMixedValueTypes() {
     assertThat(actualInner.get("f").asPrimitive().get()).isEqualTo((byte) 3);
   }
 
-  @Test
-  public void testTwoByteOffsets() {
-    // a string larger than 255 bytes to push the value offset size above 1 
byte
-    String randomString = RandomUtil.generateString(300, random);
+  @ParameterizedTest
+  @ValueSource(
+      ints = {
+        300, // a big string larger than 255 bytes to push the value offset 
size above 1 byte to
+        // test TwoByteOffsets
+        70_000 // a really-big string larger than 65535 bytes to push the 
value offset size above 1
+        // byte to test ThreeByteOffsets
+      })
+  public void testMultiByteOffsets(int multiByteOffset) {
+    String randomString = RandomUtil.generateString(multiByteOffset, random);
     SerializedPrimitive bigString = VariantTestUtil.createString(randomString);
 
     // note that order doesn't matter. fields are sorted by name
-    Map<String, VariantValue> data = ImmutableMap.of("big", bigString, "a", 
I1, "b", I2, "c", I3);
+    Map<String, VariantValue> data =
+        ImmutableMap.of(
+            "small",
+            VariantTestUtil.createShortString("iceberg"),
+            "big",
+            bigString,
+            "a",
+            I1,
+            "b",
+            I2,
+            "c",
+            I3);
     ByteBuffer meta = VariantTestUtil.createMetadata(data.keySet(), true /* 
sort names */);
     ByteBuffer value = VariantTestUtil.createObject(meta, data);
 
     VariantMetadata metadata = VariantMetadata.from(meta);
     SerializedObject object = SerializedObject.from(metadata, value, 
value.get(0));
 
     assertThat(object.type()).isEqualTo(PhysicalType.OBJECT);
-    assertThat(object.numFields()).isEqualTo(4);
+    assertThat(object.numFields()).isEqualTo(5);
 
     assertThat(object.get("a").type()).isEqualTo(PhysicalType.INT8);
     assertThat(object.get("a").asPrimitive().get()).isEqualTo((byte) 1);
     assertThat(object.get("b").type()).isEqualTo(PhysicalType.INT8);
     assertThat(object.get("b").asPrimitive().get()).isEqualTo((byte) 2);
     assertThat(object.get("c").type()).isEqualTo(PhysicalType.INT8);
     assertThat(object.get("c").asPrimitive().get()).isEqualTo((byte) 3);
-    assertThat(object.get("big").type()).isEqualTo(PhysicalType.STRING);
-    assertThat(object.get("big").asPrimitive().get()).isEqualTo(randomString);
+    VariantTestUtil.assertVariantString(object.get("big"), randomString);
+    VariantTestUtil.assertVariantString(object.get("small"), "iceberg");
   }
 
-  @Test
-  public void testThreeByteOffsets() {
-    // a string larger than 65535 bytes to push the value offset size above 1 
byte
-    String randomString = RandomUtil.generateString(70_000, random);
-    SerializedPrimitive reallyBigString = 
VariantTestUtil.createString(randomString);
+  @ParameterizedTest
+  @ValueSource(booleans = {true, false})
+  @SuppressWarnings({"unchecked", "rawtypes"})
+  public void testLargeObject(boolean sortFieldNames) {
+    Map<String, VariantPrimitive<?>> fields = Maps.newHashMap();
+    for (int i = 0; i < 10_000; i += 1) {
+      fields.put(
+          RandomUtil.generateString(10, random),
+          VariantTestUtil.createShortString(RandomUtil.generateString(10, 
random)));
+    }
 
-    // note that order doesn't matter. fields are sorted by name
-    Map<String, VariantValue> data =
-        ImmutableMap.of("really-big", reallyBigString, "a", I1, "b", I2, "c", 
I3);
-    ByteBuffer meta = VariantTestUtil.createMetadata(data.keySet(), true /* 
sort names */);
-    ByteBuffer value = VariantTestUtil.createObject(meta, data);
+    ByteBuffer meta = VariantTestUtil.createMetadata(fields.keySet(), 
sortFieldNames);
+    ByteBuffer value = VariantTestUtil.createObject(meta, (Map) fields);
 
     VariantMetadata metadata = VariantMetadata.from(meta);
     SerializedObject object = SerializedObject.from(metadata, value, 
value.get(0));
 
     assertThat(object.type()).isEqualTo(PhysicalType.OBJECT);
-    assertThat(object.numFields()).isEqualTo(4);
+    assertThat(object.numFields()).isEqualTo(10_000);
 
-    assertThat(object.get("a").type()).isEqualTo(PhysicalType.INT8);
-    assertThat(object.get("a").asPrimitive().get()).isEqualTo((byte) 1);
-    assertThat(object.get("b").type()).isEqualTo(PhysicalType.INT8);
-    assertThat(object.get("b").asPrimitive().get()).isEqualTo((byte) 2);
-    assertThat(object.get("c").type()).isEqualTo(PhysicalType.INT8);
-    assertThat(object.get("c").asPrimitive().get()).isEqualTo((byte) 3);
-    assertThat(object.get("really-big").type()).isEqualTo(PhysicalType.STRING);
-    
assertThat(object.get("really-big").asPrimitive().get()).isEqualTo(randomString);
+    for (Map.Entry<String, VariantPrimitive<?>> entry : fields.entrySet()) {
+      VariantValue fieldValue = object.get(entry.getKey());
+      VariantTestUtil.assertVariantString(fieldValue, 
entry.getValue().get().toString());
+    }
   }
 
   @ParameterizedTest
   @ValueSource(booleans = {true, false})
   @SuppressWarnings({"unchecked", "rawtypes"})
-  public void testLargeObject(boolean sortFieldNames) {
-    Map<String, SerializedPrimitive> fields = Maps.newHashMap();
+  public void testLargeObjectUsingShortStringWithBigHeader(boolean 
sortFieldNames) {
+    Map<String, VariantPrimitive<?>> fields = Maps.newHashMap();

Review Comment:
   Should the type here and below be Serialized Primitive? Since we are trying 
to make sure we only working with primitives? 
   
   I think as I noted above we should change this test to one that is 
explicitly making short Primitives (not short strings) and making sure we can 
read them correctly.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to