This is an automated email from the ASF dual-hosted git repository.

tingchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 447c518712 Remove emitting null value fields during data 
transformation for SchemaConformingTransformer  (#14351)
447c518712 is described below

commit 447c5187129b830d819eac4db956e791674875bf
Author: Ting Chen <tingc...@uber.com>
AuthorDate: Tue Nov 12 10:37:25 2024 -0800

    Remove emitting null value fields during data transformation for 
SchemaConformingTransformer  (#14351)
    
    * Remove emitting null value fields during data transformation.
    
    * Fix lint issues.
    
    * Revise based on comments
---
 .../SchemaConformingTransformerV2.java             |   7 +-
 .../SchemaConformingTransformerV2Test.java         | 215 +++++++++------------
 2 files changed, 94 insertions(+), 128 deletions(-)

diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/SchemaConformingTransformerV2.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/SchemaConformingTransformerV2.java
index 923b49625c..2aed00f0c3 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/SchemaConformingTransformerV2.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/SchemaConformingTransformerV2.java
@@ -92,7 +92,6 @@ import org.slf4j.LoggerFactory;
  * <pre>
  * {
  *   "a": 1,
- *   "c": null,
  *   "c.d": 3,
  *   "json_data": {
  *     "b": "2",
@@ -400,6 +399,9 @@ public class SchemaConformingTransformerV2 implements 
RecordTransformer {
     SchemaTreeNode currentNode = parentNode == null ? null : 
parentNode.getChild(key);
     String unindexableFieldSuffix = 
_transformerConfig.getUnindexableFieldSuffix();
     isIndexable = isIndexable && (null == unindexableFieldSuffix || 
!key.endsWith(unindexableFieldSuffix));
+    if (value == null) {
+      return extraFieldsContainer;
+    }
     if (!(value instanceof Map)) {
       // leaf node
       if (!isIndexable) {
@@ -413,7 +415,8 @@ public class SchemaConformingTransformerV2 implements 
RecordTransformer {
           }
           mergedTextIndexMap.put(keyJsonPath, value);
         } else {
-          // Out of schema
+          // The field is not mapped to one of the dedicated columns in the 
Pinot table schema. Thus it will be put
+          // into the extraField column of the table.
           if (storeIndexableExtras) {
             extraFieldsContainer.addIndexableEntry(key, value);
             mergedTextIndexMap.put(keyJsonPath, value);
diff --git 
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/recordtransformer/SchemaConformingTransformerV2Test.java
 
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/recordtransformer/SchemaConformingTransformerV2Test.java
index 6ea6d66cf9..d004f703f6 100644
--- 
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/recordtransformer/SchemaConformingTransformerV2Test.java
+++ 
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/recordtransformer/SchemaConformingTransformerV2Test.java
@@ -80,17 +80,22 @@ public class SchemaConformingTransformerV2Test {
   private static final CustomObjectNode TEST_JSON_MAP_NODE =
       CustomObjectNode.create().set(TEST_JSON_ARRAY_FIELD_NAME, 
TEST_JSON_ARRAY_NODE)
           .set(TEST_JSON_NULL_FIELD_NAME, 
TEST_JSON_NULL_NODE).set(TEST_JSON_STRING_FIELD_NAME, TEST_JSON_STRING_NODE);
+  private static final CustomObjectNode TEST_JSON_MAP_NODE_WITHOUT_NULL_FIELD =
+      CustomObjectNode.create().set(TEST_JSON_ARRAY_FIELD_NAME, 
TEST_JSON_ARRAY_NODE)
+          .set(TEST_JSON_STRING_FIELD_NAME, TEST_JSON_STRING_NODE);
+
   private static final CustomObjectNode TEST_JSON_MAP_NO_IDX_NODE =
       CustomObjectNode.create().set(TEST_JSON_INT_NO_IDX_FIELD_NAME, 
TEST_INT_NODE)
           .set(TEST_JSON_STRING_NO_IDX_FIELD_NAME, 
TEST_JSON_STRING_NO_IDX_NODE);
   private static final CustomObjectNode TEST_JSON_MAP_NODE_WITH_NO_IDX =
       CustomObjectNode.create().set(TEST_JSON_ARRAY_FIELD_NAME, 
TEST_JSON_ARRAY_NODE)
-          .set(TEST_JSON_NULL_FIELD_NAME, 
TEST_JSON_NULL_NODE).set(TEST_JSON_STRING_FIELD_NAME, TEST_JSON_STRING_NODE)
-          .set(TEST_JSON_INT_NO_IDX_FIELD_NAME, TEST_INT_NODE)
+          .set(TEST_JSON_STRING_FIELD_NAME, 
TEST_JSON_STRING_NODE).set(TEST_JSON_INT_NO_IDX_FIELD_NAME, TEST_INT_NODE)
           .set(TEST_JSON_STRING_NO_IDX_FIELD_NAME, 
TEST_JSON_STRING_NO_IDX_NODE);
+
   static {
     ServerMetrics.register(mock(ServerMetrics.class));
   }
+
   private static final SchemaConformingTransformerV2 _RECORD_TRANSFORMER =
       new SchemaConformingTransformerV2(createDefaultBasicTableConfig(), 
createDefaultSchema());
 
@@ -131,20 +136,16 @@ public class SchemaConformingTransformerV2Test {
     /*
     {
       "arrayField" : [ 0, 1, 2, 3 ],
-      "nullField" : null,
       "stringField" : "a",
       "mapField" : {
         "arrayField" : [ 0, 1, 2, 3 ],
-        "nullField" : null,
         "stringField" : "a"
       },
       "nestedField" : {
         "arrayField" : [ 0, 1, 2, 3 ],
-        "nullField" : null,
         "stringField" : "a",
         "mapField" : {
           "arrayField" : [ 0, 1, 2, 3 ],
-          "nullField" : null,
           "stringField" : "a"
         }
       }
@@ -163,20 +164,16 @@ public class SchemaConformingTransformerV2Test {
     {
       "json_data" : {
         "arrayField" : [ 0, 1, 2, 3 ],
-        "nullField" : null,
         "stringField" : "a",
         "mapField" : {
           "arrayField" : [ 0, 1, 2, 3 ],
-          "nullField" : null,
           "stringField" : "a"
         },
         "nestedField" : {
           "arrayField" : [ 0, 1, 2, 3 ],
-          "nullField" : null,
           "stringField" : "a",
           "mapField" : {
             "arrayField" : [ 0, 1, 2, 3 ],
-            "nullField" : null,
             "stringField" : "a"
           }
         }
@@ -184,7 +181,14 @@ public class SchemaConformingTransformerV2Test {
     }
     */
     schema = createDefaultSchemaBuilder().build();
-    expectedJsonNode = 
CustomObjectNode.create().set(INDEXABLE_EXTRAS_FIELD_NAME, inputJsonNode);
+    // The input json node stripped of null fields.
+    final CustomObjectNode inputJsonNodeWithoutNullFields =
+        CustomObjectNode.create().setAll(TEST_JSON_MAP_NODE_WITHOUT_NULL_FIELD)
+            .set(TEST_JSON_MAP_FIELD_NAME, 
TEST_JSON_MAP_NODE_WITHOUT_NULL_FIELD).set(TEST_JSON_NESTED_MAP_FIELD_NAME,
+                
CustomObjectNode.create().setAll(TEST_JSON_MAP_NODE_WITHOUT_NULL_FIELD)
+                    .set(TEST_JSON_MAP_FIELD_NAME, 
TEST_JSON_MAP_NODE_WITHOUT_NULL_FIELD));
+
+    expectedJsonNode = 
CustomObjectNode.create().set(INDEXABLE_EXTRAS_FIELD_NAME, 
inputJsonNodeWithoutNullFields);
     transformWithIndexableFields(schema, inputJsonNode, expectedJsonNode);
 
     // Three dedicated columns in schema, only two are populated, one ignored
@@ -195,17 +199,13 @@ public class SchemaConformingTransformerV2Test {
       "<indexableExtras>":{
         "mapField": {
           "arrayField":[0, 1, 2, 3],
-          "nullField":null,
           "stringField":"a"
         },
-        "nullField":null,
         "stringField":"a",
         "nestedFields":{
           "arrayField":[0, 1, 2, 3],
-          "nullField":null,
           "mapField":{
             "arrayField":[0, 1, 2, 3],
-            "nullField":null,
             "stringField":"a"
           }
         }
@@ -218,33 +218,29 @@ public class SchemaConformingTransformerV2Test {
         .build();
     expectedJsonNode = 
CustomObjectNode.create().set(TEST_JSON_ARRAY_FIELD_NAME, TEST_JSON_ARRAY_NODE)
         .set(TEST_JSON_NESTED_MAP_FIELD_NAME + "." + 
TEST_JSON_STRING_FIELD_NAME, TEST_JSON_STRING_NODE)
-
-        .set(INDEXABLE_EXTRAS_FIELD_NAME, 
CustomObjectNode.create().set(TEST_JSON_MAP_FIELD_NAME, TEST_JSON_MAP_NODE)
-            
.setAll(TEST_JSON_MAP_NODE.deepCopy().removeAndReturn(TEST_JSON_ARRAY_FIELD_NAME))
-            .set(TEST_JSON_NESTED_MAP_FIELD_NAME, CustomObjectNode.create()
-                
.setAll(TEST_JSON_MAP_NODE.deepCopy().removeAndReturn(TEST_JSON_STRING_FIELD_NAME))
-                .set(TEST_JSON_MAP_FIELD_NAME, TEST_JSON_MAP_NODE)));
+        .set(INDEXABLE_EXTRAS_FIELD_NAME,
+            CustomObjectNode.create().set(TEST_JSON_MAP_FIELD_NAME, 
TEST_JSON_MAP_NODE_WITHOUT_NULL_FIELD)
+                
.setAll(TEST_JSON_MAP_NODE_WITHOUT_NULL_FIELD.deepCopy().removeAndReturn(TEST_JSON_ARRAY_FIELD_NAME))
+                .set(TEST_JSON_NESTED_MAP_FIELD_NAME, 
CustomObjectNode.create().setAll(
+                        
TEST_JSON_MAP_NODE_WITHOUT_NULL_FIELD.deepCopy().removeAndReturn(TEST_JSON_STRING_FIELD_NAME))
+                    .set(TEST_JSON_MAP_FIELD_NAME, 
TEST_JSON_MAP_NODE_WITHOUT_NULL_FIELD)));
     transformWithIndexableFields(schema, inputJsonNode, expectedJsonNode);
 
     // 8 dedicated columns, only 6 are populated
     /*
     {
       "arrayField" : [ 0, 1, 2, 3 ],
-      "nullField" : null,
       "stringField" : "a",
       "nestedField.arrayField" : [ 0, 1, 2, 3 ],
-      "nestedField.nullField" : null,
       "nestedField.stringField" : "a",
       "json_data" : {
         "mapField" : {
           "arrayField" : [ 0, 1, 2, 3 ],
-          "nullField" : null,
           "stringField" : "a"
         },
         "nestedField" : {
           "mapField" : {
             "arrayField" : [ 0, 1, 2, 3 ],
-            "nullField" : null,
             "stringField" : "a"
           }
         }
@@ -260,13 +256,13 @@ public class SchemaConformingTransformerV2Test {
         .addSingleValueDimension(TEST_JSON_NESTED_MAP_FIELD_NAME + "." + 
TEST_JSON_STRING_FIELD_NAME, DataType.STRING)
         .addSingleValueDimension(TEST_JSON_NESTED_MAP_FIELD_NAME + "." + 
TEST_JSON_MAP_FIELD_NAME, DataType.JSON)
         .build();
-    expectedJsonNode = CustomObjectNode.create().setAll(TEST_JSON_MAP_NODE)
+    expectedJsonNode = 
CustomObjectNode.create().setAll(TEST_JSON_MAP_NODE_WITHOUT_NULL_FIELD)
         .set(TEST_JSON_NESTED_MAP_FIELD_NAME + "." + 
TEST_JSON_ARRAY_FIELD_NAME, TEST_JSON_ARRAY_NODE)
-        .set(TEST_JSON_NESTED_MAP_FIELD_NAME + "." + 
TEST_JSON_NULL_FIELD_NAME, TEST_JSON_NULL_NODE)
         .set(TEST_JSON_NESTED_MAP_FIELD_NAME + "." + 
TEST_JSON_STRING_FIELD_NAME, TEST_JSON_STRING_NODE)
-        .set(INDEXABLE_EXTRAS_FIELD_NAME, 
CustomObjectNode.create().set(TEST_JSON_MAP_FIELD_NAME, TEST_JSON_MAP_NODE)
-            .set(TEST_JSON_NESTED_MAP_FIELD_NAME,
-                CustomObjectNode.create().set(TEST_JSON_MAP_FIELD_NAME, 
TEST_JSON_MAP_NODE)));
+        .set(INDEXABLE_EXTRAS_FIELD_NAME,
+            CustomObjectNode.create().set(TEST_JSON_MAP_FIELD_NAME, 
TEST_JSON_MAP_NODE_WITHOUT_NULL_FIELD)
+                .set(TEST_JSON_NESTED_MAP_FIELD_NAME,
+                    CustomObjectNode.create().set(TEST_JSON_MAP_FIELD_NAME, 
TEST_JSON_MAP_NODE_WITHOUT_NULL_FIELD)));
     transformWithIndexableFields(schema, inputJsonNode, expectedJsonNode);
   }
 
@@ -275,31 +271,26 @@ public class SchemaConformingTransformerV2Test {
     /*
     {
       "arrayField":[0, 1, 2, 3],
-      "nullField":null,
       "stringField":"a",
       "intField_noIndex":9,
       "string_noIndex":"z",
       "mapField":{
         "arrayField":[0, 1, 2, 3],
-        "nullField":null,
         "stringField":"a",
         "intField_noIndex":9,
         "string_noIndex":"z"
       },
       "mapField_noIndex":{
         "arrayField":[0, 1, 2, 3],
-        "nullField":null,
         "stringField":"a",
       },
       "nestedFields":{
         "arrayField":[0, 1, 2, 3],
-        "nullField":null,
         "stringField":"a",
         "intField_noIndex":9,
         "string_noIndex":"z",
         "mapField":{
           "arrayField":[0, 1, 2, 3],
-          "nullField":null,
           "stringField":"a",
           "intField_noIndex":9,
           "string_noIndex":"z"
@@ -314,12 +305,13 @@ public class SchemaConformingTransformerV2Test {
             .set(TEST_JSON_STRING_NO_IDX_FIELD_NAME, 
TEST_JSON_STRING_NO_IDX_NODE)
             .set(TEST_JSON_MAP_FIELD_NAME, TEST_JSON_MAP_NODE_WITH_NO_IDX)
             .set(TEST_JSON_MAP_NO_IDX_FIELD_NAME, 
TEST_JSON_MAP_NODE).set(TEST_JSON_NESTED_MAP_FIELD_NAME,
-            
CustomObjectNode.create().setAll(TEST_JSON_MAP_NODE).set(TEST_JSON_ARRAY_FIELD_NAME,
 TEST_JSON_ARRAY_NODE)
-                .set(TEST_JSON_NULL_FIELD_NAME, TEST_JSON_NULL_NODE)
-                .set(TEST_JSON_STRING_FIELD_NAME, TEST_JSON_STRING_NODE)
-                .set(TEST_JSON_INT_NO_IDX_FIELD_NAME, TEST_INT_NODE)
-                .set(TEST_JSON_STRING_NO_IDX_FIELD_NAME, 
TEST_JSON_STRING_NO_IDX_NODE)
-                .set(TEST_JSON_MAP_FIELD_NAME, 
TEST_JSON_MAP_NODE_WITH_NO_IDX));
+                
CustomObjectNode.create().setAll(TEST_JSON_MAP_NODE).set(TEST_JSON_ARRAY_FIELD_NAME,
+                        TEST_JSON_ARRAY_NODE)
+                    .set(TEST_JSON_NULL_FIELD_NAME, TEST_JSON_NULL_NODE)
+                    .set(TEST_JSON_STRING_FIELD_NAME, TEST_JSON_STRING_NODE)
+                    .set(TEST_JSON_INT_NO_IDX_FIELD_NAME, TEST_INT_NODE)
+                    .set(TEST_JSON_STRING_NO_IDX_FIELD_NAME, 
TEST_JSON_STRING_NO_IDX_NODE)
+                    .set(TEST_JSON_MAP_FIELD_NAME, 
TEST_JSON_MAP_NODE_WITH_NO_IDX));
 
     CustomObjectNode expectedJsonNode;
     CustomObjectNode expectedJsonNodeWithMergedTextIndex;
@@ -331,20 +323,16 @@ public class SchemaConformingTransformerV2Test {
     {
       "indexableExtras":{
         "arrayField":[0, 1, 2, 3],
-        "nullField":null,
         "stringField":"a",
         "mapField":{
           "arrayField":[0, 1, 2, 3],
-          "nullField":null,
           "stringField":"a"
         },
         "nestedFields":{
           "arrayField":[0, 1, 2, 3],
-          "nullField":null,
           "stringField":"a",
           "mapField":{
             "arrayField":[0, 1, 2, 3],
-            "nullField":null,
             "stringField":"a"
           }
         }
@@ -358,7 +346,6 @@ public class SchemaConformingTransformerV2Test {
         },
         "mapField_noIndex":{
           "arrayField":[0, 1, 2, 3],
-          "nullField":null,
           "stringField":"a",
         },
         "nestedFields":{
@@ -376,22 +363,22 @@ public class SchemaConformingTransformerV2Test {
     }
     */
     expectedJsonNode = 
CustomObjectNode.create().set(INDEXABLE_EXTRAS_FIELD_NAME,
-        CustomObjectNode.create().set(TEST_JSON_ARRAY_FIELD_NAME, 
TEST_JSON_ARRAY_NODE)
-            .set(TEST_JSON_NULL_FIELD_NAME, 
TEST_JSON_NULL_NODE).set(TEST_JSON_STRING_FIELD_NAME, TEST_JSON_STRING_NODE)
-            .set(TEST_JSON_MAP_FIELD_NAME, 
TEST_JSON_MAP_NODE).set(TEST_JSON_NESTED_MAP_FIELD_NAME,
             CustomObjectNode.create().set(TEST_JSON_ARRAY_FIELD_NAME, 
TEST_JSON_ARRAY_NODE)
-                .set(TEST_JSON_NULL_FIELD_NAME, TEST_JSON_NULL_NODE)
                 .set(TEST_JSON_STRING_FIELD_NAME, TEST_JSON_STRING_NODE)
-                .set(TEST_JSON_MAP_FIELD_NAME, TEST_JSON_MAP_NODE)))
-
+                .set(TEST_JSON_MAP_FIELD_NAME, 
TEST_JSON_MAP_NODE_WITHOUT_NULL_FIELD)
+                .set(TEST_JSON_NESTED_MAP_FIELD_NAME,
+                    CustomObjectNode.create().set(TEST_JSON_ARRAY_FIELD_NAME, 
TEST_JSON_ARRAY_NODE)
+                        .set(TEST_JSON_STRING_FIELD_NAME, 
TEST_JSON_STRING_NODE)
+                        .set(TEST_JSON_MAP_FIELD_NAME, 
TEST_JSON_MAP_NODE_WITHOUT_NULL_FIELD)))
         .set(UNINDEXABLE_EXTRAS_FIELD_NAME,
             CustomObjectNode.create().set(TEST_JSON_INT_NO_IDX_FIELD_NAME, 
TEST_INT_NODE)
                 .set(TEST_JSON_STRING_NO_IDX_FIELD_NAME, 
TEST_JSON_STRING_NO_IDX_NODE)
                 .set(TEST_JSON_MAP_FIELD_NAME, TEST_JSON_MAP_NO_IDX_NODE)
-                .set(TEST_JSON_MAP_NO_IDX_FIELD_NAME, 
TEST_JSON_MAP_NODE).set(TEST_JSON_NESTED_MAP_FIELD_NAME,
-                CustomObjectNode.create().set(TEST_JSON_INT_NO_IDX_FIELD_NAME, 
TEST_INT_NODE)
-                    .set(TEST_JSON_STRING_NO_IDX_FIELD_NAME, 
TEST_JSON_STRING_NO_IDX_NODE)
-                    .set(TEST_JSON_MAP_FIELD_NAME, 
TEST_JSON_MAP_NO_IDX_NODE)));
+                .set(TEST_JSON_MAP_NO_IDX_FIELD_NAME, 
TEST_JSON_MAP_NODE_WITHOUT_NULL_FIELD)
+                .set(TEST_JSON_NESTED_MAP_FIELD_NAME,
+                    
CustomObjectNode.create().set(TEST_JSON_INT_NO_IDX_FIELD_NAME, TEST_INT_NODE)
+                        .set(TEST_JSON_STRING_NO_IDX_FIELD_NAME, 
TEST_JSON_STRING_NO_IDX_NODE)
+                        .set(TEST_JSON_MAP_FIELD_NAME, 
TEST_JSON_MAP_NO_IDX_NODE)));
     transformWithUnIndexableFieldsAndMergedTextIndex(schemaBuilder.build(), 
inputJsonNode, expectedJsonNode);
 
     expectedJsonNodeWithMergedTextIndex = 
expectedJsonNode.deepCopy().set(MERGED_TEXT_INDEX_FIELD_NAME,
@@ -418,19 +405,15 @@ public class SchemaConformingTransformerV2Test {
       "arrayField":[0, 1, 2, 3],
       "nestedFields.stringField":"a",
       "indexableExtras":{
-        "nullField":null,
         "stringField":"a",
         "mapField":{
           "arrayField":[0, 1, 2, 3],
-          "nullField":null,
           "stringField":"a"
         },
         "nestedFields":{
           "arrayField":[0, 1, 2, 3],
-          "nullField":null,
           "mapField":{
             "arrayField":[0, 1, 2, 3],
-            "nullField":null,
             "stringField":"a"
           }
         }
@@ -444,7 +427,6 @@ public class SchemaConformingTransformerV2Test {
         },
         "mapField_noIndex":{
           "arrayField":[0, 1, 2, 3],
-          "nullField":null,
           "stringField":"a",
         },
         "nestedFields":{
@@ -463,21 +445,22 @@ public class SchemaConformingTransformerV2Test {
     */
     expectedJsonNode = 
CustomObjectNode.create().set(TEST_JSON_ARRAY_FIELD_NAME, TEST_JSON_ARRAY_NODE)
         .set(TEST_JSON_NESTED_MAP_FIELD_NAME + "." + 
TEST_JSON_STRING_FIELD_NAME, TEST_JSON_STRING_NODE)
-        .set(INDEXABLE_EXTRAS_FIELD_NAME, 
CustomObjectNode.create().set(TEST_JSON_NULL_FIELD_NAME, TEST_JSON_NULL_NODE)
-            .set(TEST_JSON_STRING_FIELD_NAME, 
TEST_JSON_STRING_NODE).set(TEST_JSON_MAP_FIELD_NAME, TEST_JSON_MAP_NODE)
-            .set(TEST_JSON_NESTED_MAP_FIELD_NAME,
-                CustomObjectNode.create().set(TEST_JSON_ARRAY_FIELD_NAME, 
TEST_JSON_ARRAY_NODE)
-                    .set(TEST_JSON_NULL_FIELD_NAME, TEST_JSON_NULL_NODE)
-                    .set(TEST_JSON_MAP_FIELD_NAME, TEST_JSON_MAP_NODE)))
+        .set(INDEXABLE_EXTRAS_FIELD_NAME,
+            CustomObjectNode.create().set(TEST_JSON_STRING_FIELD_NAME, 
TEST_JSON_STRING_NODE)
+                .set(TEST_JSON_MAP_FIELD_NAME, 
TEST_JSON_MAP_NODE_WITHOUT_NULL_FIELD)
+                .set(TEST_JSON_NESTED_MAP_FIELD_NAME,
+                    CustomObjectNode.create().set(TEST_JSON_ARRAY_FIELD_NAME, 
TEST_JSON_ARRAY_NODE)
+                        .set(TEST_JSON_MAP_FIELD_NAME, 
TEST_JSON_MAP_NODE_WITHOUT_NULL_FIELD)))
 
         .set(UNINDEXABLE_EXTRAS_FIELD_NAME,
             CustomObjectNode.create().set(TEST_JSON_INT_NO_IDX_FIELD_NAME, 
TEST_INT_NODE)
                 .set(TEST_JSON_STRING_NO_IDX_FIELD_NAME, 
TEST_JSON_STRING_NO_IDX_NODE)
                 .set(TEST_JSON_MAP_FIELD_NAME, TEST_JSON_MAP_NO_IDX_NODE)
-                .set(TEST_JSON_MAP_NO_IDX_FIELD_NAME, 
TEST_JSON_MAP_NODE).set(TEST_JSON_NESTED_MAP_FIELD_NAME,
-                CustomObjectNode.create().set(TEST_JSON_INT_NO_IDX_FIELD_NAME, 
TEST_INT_NODE)
-                    .set(TEST_JSON_STRING_NO_IDX_FIELD_NAME, 
TEST_JSON_STRING_NO_IDX_NODE)
-                    .set(TEST_JSON_MAP_FIELD_NAME, 
TEST_JSON_MAP_NO_IDX_NODE)));
+                .set(TEST_JSON_MAP_NO_IDX_FIELD_NAME, 
TEST_JSON_MAP_NODE_WITHOUT_NULL_FIELD)
+                .set(TEST_JSON_NESTED_MAP_FIELD_NAME,
+                    
CustomObjectNode.create().set(TEST_JSON_INT_NO_IDX_FIELD_NAME, TEST_INT_NODE)
+                        .set(TEST_JSON_STRING_NO_IDX_FIELD_NAME, 
TEST_JSON_STRING_NO_IDX_NODE)
+                        .set(TEST_JSON_MAP_FIELD_NAME, 
TEST_JSON_MAP_NO_IDX_NODE)));
     transformWithUnIndexableFieldsAndMergedTextIndex(schemaBuilder.build(), 
inputJsonNode, expectedJsonNode);
 
     expectedJsonNodeWithMergedTextIndex = 
expectedJsonNode.deepCopy().set(MERGED_TEXT_INDEX_FIELD_NAME,
@@ -506,21 +489,17 @@ public class SchemaConformingTransformerV2Test {
     /*
     {
       "arrayField":[0, 1, 2, 3],
-      "nullField":null,
       "stringField":"a",
       "nestedFields.arrayField":[0, 1, 2, 3],
-      "nestedFields.nullField":null,
       "nestedFields.stringField":"a",
       "indexableExtras":{
         "mapField":{
           "arrayField":[0, 1, 2, 3],
-          "nullField":null,
           "stringField":"a"
         },
         "nestedFields":{
           mapField":{
             "arrayField":[0, 1, 2, 3],
-            "nullField":null,
             "stringField":"a"
           }
         }
@@ -534,7 +513,6 @@ public class SchemaConformingTransformerV2Test {
         },
         "mapField_noIndex":{
           "arrayField":[0, 1, 2, 3],
-          "nullField":null,
           "stringField":"a",
         },
         "nestedFields":{
@@ -552,23 +530,23 @@ public class SchemaConformingTransformerV2Test {
     }
     */
     expectedJsonNode = 
CustomObjectNode.create().set(TEST_JSON_ARRAY_FIELD_NAME, TEST_JSON_ARRAY_NODE)
-        .set(TEST_JSON_NULL_FIELD_NAME, 
TEST_JSON_NULL_NODE).set(TEST_JSON_STRING_FIELD_NAME, TEST_JSON_STRING_NODE)
+        .set(TEST_JSON_STRING_FIELD_NAME, TEST_JSON_STRING_NODE)
         .set(TEST_JSON_NESTED_MAP_FIELD_NAME + "." + 
TEST_JSON_ARRAY_FIELD_NAME, TEST_JSON_ARRAY_NODE)
-        .set(TEST_JSON_NESTED_MAP_FIELD_NAME + "." + 
TEST_JSON_NULL_FIELD_NAME, TEST_JSON_NULL_NODE)
         .set(TEST_JSON_NESTED_MAP_FIELD_NAME + "." + 
TEST_JSON_STRING_FIELD_NAME, TEST_JSON_STRING_NODE)
-
-        .set(INDEXABLE_EXTRAS_FIELD_NAME, 
CustomObjectNode.create().set(TEST_JSON_MAP_FIELD_NAME, TEST_JSON_MAP_NODE)
-            .set(TEST_JSON_NESTED_MAP_FIELD_NAME,
-                CustomObjectNode.create().set(TEST_JSON_MAP_FIELD_NAME, 
TEST_JSON_MAP_NODE)))
+        .set(INDEXABLE_EXTRAS_FIELD_NAME,
+            CustomObjectNode.create().set(TEST_JSON_MAP_FIELD_NAME, 
TEST_JSON_MAP_NODE_WITHOUT_NULL_FIELD)
+                .set(TEST_JSON_NESTED_MAP_FIELD_NAME,
+                    CustomObjectNode.create().set(TEST_JSON_MAP_FIELD_NAME, 
TEST_JSON_MAP_NODE_WITHOUT_NULL_FIELD)))
 
         .set(UNINDEXABLE_EXTRAS_FIELD_NAME,
             CustomObjectNode.create().set(TEST_JSON_INT_NO_IDX_FIELD_NAME, 
TEST_INT_NODE)
                 .set(TEST_JSON_STRING_NO_IDX_FIELD_NAME, 
TEST_JSON_STRING_NO_IDX_NODE)
                 .set(TEST_JSON_MAP_FIELD_NAME, TEST_JSON_MAP_NO_IDX_NODE)
-                .set(TEST_JSON_MAP_NO_IDX_FIELD_NAME, 
TEST_JSON_MAP_NODE).set(TEST_JSON_NESTED_MAP_FIELD_NAME,
-                CustomObjectNode.create().set(TEST_JSON_INT_NO_IDX_FIELD_NAME, 
TEST_INT_NODE)
-                    .set(TEST_JSON_STRING_NO_IDX_FIELD_NAME, 
TEST_JSON_STRING_NO_IDX_NODE)
-                    .set(TEST_JSON_MAP_FIELD_NAME, 
TEST_JSON_MAP_NO_IDX_NODE)));
+                .set(TEST_JSON_MAP_NO_IDX_FIELD_NAME, 
TEST_JSON_MAP_NODE_WITHOUT_NULL_FIELD)
+                .set(TEST_JSON_NESTED_MAP_FIELD_NAME,
+                    
CustomObjectNode.create().set(TEST_JSON_INT_NO_IDX_FIELD_NAME, TEST_INT_NODE)
+                        .set(TEST_JSON_STRING_NO_IDX_FIELD_NAME, 
TEST_JSON_STRING_NO_IDX_NODE)
+                        .set(TEST_JSON_MAP_FIELD_NAME, 
TEST_JSON_MAP_NO_IDX_NODE)));
     transformWithUnIndexableFieldsAndMergedTextIndex(schemaBuilder.build(), 
inputJsonNode, expectedJsonNode);
     expectedJsonNodeWithMergedTextIndex = 
expectedJsonNode.deepCopy().set(MERGED_TEXT_INDEX_FIELD_NAME,
         
N.arrayNode().add("[0,1,2,3]:arrayField").add("0:arrayField").add("1:arrayField").add("2:arrayField")
@@ -590,38 +568,32 @@ public class SchemaConformingTransformerV2Test {
     /*
     {
       "arrayField":[0, 1, 2, 3],
-      "nullField":null,
       "stringField":"a",
       "intField_noIndex":9,
       "string_noIndex":"z",
       "mapField":{
         "arrayField":[0, 1, 2, 3],
-        "nullField":null,
         "stringField":"a",
         "intField_noIndex":9,
         "string_noIndex":"z"
       },
       "mapFieldExtra":{
         "arrayField":[0, 1, 2, 3],
-        "nullField":null,
         "stringField":"a",
         "intField_noIndex":9,
         "string_noIndex":"z"
       },
       "mapField_noIndex":{
         "arrayField":[0, 1, 2, 3],
-        "nullField":null,
         "stringField":"a",
       },
       "nestedFields":{
         "arrayField":[0, 1, 2, 3],
-        "nullField":null,
         "stringField":"a",
         "intField_noIndex":9,
         "string_noIndex":"z",
         "mapField":{
           "arrayField":[0, 1, 2, 3],
-          "nullField":null,
           "stringField":"a",
           "intField_noIndex":9,
           "string_noIndex":"z"
@@ -637,12 +609,13 @@ public class SchemaConformingTransformerV2Test {
             .set(TEST_JSON_MAP_FIELD_NAME, TEST_JSON_MAP_NODE_WITH_NO_IDX)
             .set(TEST_JSON_MAP_EXTRA_FIELD_NAME, 
TEST_JSON_MAP_NODE_WITH_NO_IDX)
             .set(TEST_JSON_MAP_NO_IDX_FIELD_NAME, 
TEST_JSON_MAP_NODE).set(TEST_JSON_NESTED_MAP_FIELD_NAME,
-            
CustomObjectNode.create().setAll(TEST_JSON_MAP_NODE).set(TEST_JSON_ARRAY_FIELD_NAME,
 TEST_JSON_ARRAY_NODE)
-                .set(TEST_JSON_NULL_FIELD_NAME, TEST_JSON_NULL_NODE)
-                .set(TEST_JSON_STRING_FIELD_NAME, TEST_JSON_STRING_NODE)
-                .set(TEST_JSON_INT_NO_IDX_FIELD_NAME, TEST_INT_NODE)
-                .set(TEST_JSON_STRING_NO_IDX_FIELD_NAME, 
TEST_JSON_STRING_NO_IDX_NODE)
-                .set(TEST_JSON_MAP_FIELD_NAME, 
TEST_JSON_MAP_NODE_WITH_NO_IDX));
+                
CustomObjectNode.create().setAll(TEST_JSON_MAP_NODE).set(TEST_JSON_ARRAY_FIELD_NAME,
+                        TEST_JSON_ARRAY_NODE)
+                    .set(TEST_JSON_NULL_FIELD_NAME, TEST_JSON_NULL_NODE)
+                    .set(TEST_JSON_STRING_FIELD_NAME, TEST_JSON_STRING_NODE)
+                    .set(TEST_JSON_INT_NO_IDX_FIELD_NAME, TEST_INT_NODE)
+                    .set(TEST_JSON_STRING_NO_IDX_FIELD_NAME, 
TEST_JSON_STRING_NO_IDX_NODE)
+                    .set(TEST_JSON_MAP_FIELD_NAME, 
TEST_JSON_MAP_NODE_WITH_NO_IDX));
 
     CustomObjectNode expectedJsonNode;
     CustomObjectNode expectedJsonNodeWithMergedTextIndex;
@@ -686,24 +659,20 @@ public class SchemaConformingTransformerV2Test {
       "nestedFields.stringField":"a",
       "mapField":{
         "arrayField":[0,1,2,3],
-        "nullField":null,
         "stringField":"a",
         "intField_noIndex":9,
         "string_noIndex":"z"
       },
       "mapFieldExtra":{
         "arrayField":[0,1,2,3],
-        "nullField":null,
         "stringField":"a",
         "intField_noIndex":9,
         "string_noIndex":"z"
       }
       "indexableExtras":{
-        "nullField":null,
         "stringField":"a",
         "nestedFields":{
           "arrayField":[0, 1, 2, 3],
-          "nullField":null,
         }
       },
       "unindexableExtras":{
@@ -711,7 +680,6 @@ public class SchemaConformingTransformerV2Test {
         "string_noIndex":"z",
         "mapField_noIndex":{
           "arrayField":[0, 1, 2, 3],
-          "nullField":null,
           "stringField":"a",
         },
         "nestedFields":{
@@ -724,24 +692,20 @@ public class SchemaConformingTransformerV2Test {
       ]
     }
     */
-    expectedJsonNode = CustomObjectNode.create()
-        .set(TEST_JSON_ARRAY_FIELD_NAME, N.textNode("[0,1,2,3]"))
-        .set(destColumnName, TEST_JSON_STRING_NODE)
-        .set(TEST_JSON_MAP_FIELD_NAME, TEST_JSON_MAP_NODE_WITH_NO_IDX)
-        .set(TEST_JSON_MAP_EXTRA_FIELD_NAME, TEST_JSON_MAP_NODE_WITH_NO_IDX)
-        .set(INDEXABLE_EXTRAS_FIELD_NAME,
-            CustomObjectNode.create().set(TEST_JSON_NULL_FIELD_NAME, 
TEST_JSON_NULL_NODE)
-                .set(TEST_JSON_STRING_FIELD_NAME, TEST_JSON_STRING_NODE)
+    expectedJsonNode = 
CustomObjectNode.create().set(TEST_JSON_ARRAY_FIELD_NAME, 
N.textNode("[0,1,2,3]"))
+        .set(destColumnName, 
TEST_JSON_STRING_NODE).set(TEST_JSON_MAP_FIELD_NAME, 
TEST_JSON_MAP_NODE_WITH_NO_IDX)
+        .set(TEST_JSON_MAP_EXTRA_FIELD_NAME, 
TEST_JSON_MAP_NODE_WITH_NO_IDX).set(INDEXABLE_EXTRAS_FIELD_NAME,
+            CustomObjectNode.create().set(TEST_JSON_STRING_FIELD_NAME, 
TEST_JSON_STRING_NODE)
                 .set(TEST_JSON_NESTED_MAP_FIELD_NAME,
-                    CustomObjectNode.create().set(TEST_JSON_ARRAY_FIELD_NAME, 
TEST_JSON_ARRAY_NODE)
-                        .set(TEST_JSON_NULL_FIELD_NAME, TEST_JSON_NULL_NODE)))
+                    CustomObjectNode.create().set(TEST_JSON_ARRAY_FIELD_NAME, 
TEST_JSON_ARRAY_NODE)))
 
         .set(UNINDEXABLE_EXTRAS_FIELD_NAME,
             CustomObjectNode.create().set(TEST_JSON_INT_NO_IDX_FIELD_NAME, 
TEST_INT_NODE)
                 .set(TEST_JSON_STRING_NO_IDX_FIELD_NAME, 
TEST_JSON_STRING_NO_IDX_NODE)
-                .set(TEST_JSON_MAP_NO_IDX_FIELD_NAME, 
TEST_JSON_MAP_NODE).set(TEST_JSON_NESTED_MAP_FIELD_NAME,
-                CustomObjectNode.create().set(TEST_JSON_INT_NO_IDX_FIELD_NAME, 
TEST_INT_NODE)
-                    .set(TEST_JSON_STRING_NO_IDX_FIELD_NAME, 
TEST_JSON_STRING_NO_IDX_NODE)));
+                .set(TEST_JSON_MAP_NO_IDX_FIELD_NAME, 
TEST_JSON_MAP_NODE_WITHOUT_NULL_FIELD)
+                .set(TEST_JSON_NESTED_MAP_FIELD_NAME,
+                    
CustomObjectNode.create().set(TEST_JSON_INT_NO_IDX_FIELD_NAME, TEST_INT_NODE)
+                        .set(TEST_JSON_STRING_NO_IDX_FIELD_NAME, 
TEST_JSON_STRING_NO_IDX_NODE)));
 
     expectedJsonNodeWithMergedTextIndex = 
expectedJsonNode.deepCopy().set(MERGED_TEXT_INDEX_FIELD_NAME,
         
N.arrayNode().add("0:arrayField").add("1:arrayField").add("2:arrayField").add("3:arrayField")
@@ -749,8 +713,8 @@ public class SchemaConformingTransformerV2Test {
             
.add("0:nestedFields.arrayField").add("1:nestedFields.arrayField").add("2:nestedFields.arrayField")
             .add("3:nestedFields.arrayField").add("a:nestedFields.stringField")
             
.add("[0,1,2,3]:mapFieldExtra.arrayField").add("a:mapFieldExtra.stringField")
-            
.add("0:mapFieldExtra.arrayField").add("1:mapFieldExtra.arrayField")
-            
.add("2:mapFieldExtra.arrayField").add("3:mapFieldExtra.arrayField"));
+            
.add("0:mapFieldExtra.arrayField").add("1:mapFieldExtra.arrayField").add("2:mapFieldExtra.arrayField")
+            .add("3:mapFieldExtra.arrayField"));
     transformKeyValueTransformation(
         schemaBuilder.addMultiValueDimension(MERGED_TEXT_INDEX_FIELD_NAME, 
DataType.STRING).build(), keyMapping,
         pathToDrop, pathToPreserve, pathToPreserveWithIndex, inputJsonNode, 
expectedJsonNodeWithMergedTextIndex);
@@ -777,8 +741,7 @@ public class SchemaConformingTransformerV2Test {
 
   private void testTransform(String indexableExtrasField, String 
unindexableExtrasField, String mergedTextIndexField,
       Schema schema, Map<String, String> keyMapping, Set<String> 
fieldPathsToDrop, Set<String> fieldPathsToPreserve,
-      Set<String> fieldPathsToPreserveWithIndex, String inputRecordJSONString,
-      String expectedOutputRecordJSONString) {
+      Set<String> fieldPathsToPreserveWithIndex, String inputRecordJSONString, 
String expectedOutputRecordJSONString) {
     TableConfig tableConfig =
         createDefaultTableConfig(indexableExtrasField, unindexableExtrasField, 
UNINDEXABLE_FIELD_SUFFIX,
             fieldPathsToDrop, fieldPathsToPreserve, 
fieldPathsToPreserveWithIndex, mergedTextIndexField);
@@ -845,8 +808,8 @@ public class SchemaConformingTransformerV2Test {
       Schema schema = 
createDefaultSchemaBuilder().addSingleValueDimension("a.b", DataType.STRING)
           .addSingleValueDimension("a.b.c", DataType.INT).build();
       SchemaConformingTransformerV2.validateSchema(schema,
-          new SchemaConformingTransformerV2Config(null, 
INDEXABLE_EXTRAS_FIELD_NAME, null, null, null, null, null,
-              null, null, null, null, null, null, null));
+          new SchemaConformingTransformerV2Config(null, 
INDEXABLE_EXTRAS_FIELD_NAME, null, null, null, null, null, null,
+              null, null, null, null, null, null));
     } catch (Exception ex) {
       fail("Should not have thrown any exception when overlapping schema 
occurs");
     }
@@ -895,8 +858,8 @@ public class SchemaConformingTransformerV2Test {
 
     shingleIndexMaxLength = 8;
     shingleIndexOverlapLength = 2;
-    expectedTokenValues = new ArrayList<>(Arrays
-        .asList("0123:key", "2345:key", "4567:key", "6789:key", "89AB:key", 
"ABCD:key", "CDEF:key", "EFGH:key",
+    expectedTokenValues = new ArrayList<>(
+        Arrays.asList("0123:key", "2345:key", "4567:key", "6789:key", 
"89AB:key", "ABCD:key", "CDEF:key", "EFGH:key",
             "GHIJ:key"));
     testShingleIndexWithParams(key, value, shingleIndexMaxLength, 
shingleIndexOverlapLength, expectedTokenValues);
 
@@ -926,8 +889,8 @@ public class SchemaConformingTransformerV2Test {
       Integer shingleIndexOverlapLength, List<String> expectedTokenValues) {
     Map.Entry<String, Object> kv = new AbstractMap.SimpleEntry<>(key, value);
     List<String> shingleIndexTokens = new ArrayList<>();
-    _RECORD_TRANSFORMER
-        .generateShingleTextIndexDocument(kv, shingleIndexTokens, 
shingleIndexMaxLength, shingleIndexOverlapLength);
+    _RECORD_TRANSFORMER.generateShingleTextIndexDocument(kv, 
shingleIndexTokens, shingleIndexMaxLength,
+        shingleIndexOverlapLength);
     int numTokens = shingleIndexTokens.size();
     assertEquals(numTokens, expectedTokenValues.size());
     for (int i = 0; i < numTokens; i++) {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org

Reply via email to