This is an automated email from the ASF dual-hosted git repository. tingchen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push: new 447c518712 Remove emitting null value fields during data transformation for SchemaConformingTransformer (#14351) 447c518712 is described below commit 447c5187129b830d819eac4db956e791674875bf Author: Ting Chen <tingc...@uber.com> AuthorDate: Tue Nov 12 10:37:25 2024 -0800 Remove emitting null value fields during data transformation for SchemaConformingTransformer (#14351) * Remove emitting null value fields during data transformation. * Fix lint issues. * Revise based on comments --- .../SchemaConformingTransformerV2.java | 7 +- .../SchemaConformingTransformerV2Test.java | 215 +++++++++------------ 2 files changed, 94 insertions(+), 128 deletions(-) diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/SchemaConformingTransformerV2.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/SchemaConformingTransformerV2.java index 923b49625c..2aed00f0c3 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/SchemaConformingTransformerV2.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/SchemaConformingTransformerV2.java @@ -92,7 +92,6 @@ import org.slf4j.LoggerFactory; * <pre> * { * "a": 1, - * "c": null, * "c.d": 3, * "json_data": { * "b": "2", @@ -400,6 +399,9 @@ public class SchemaConformingTransformerV2 implements RecordTransformer { SchemaTreeNode currentNode = parentNode == null ? null : parentNode.getChild(key); String unindexableFieldSuffix = _transformerConfig.getUnindexableFieldSuffix(); isIndexable = isIndexable && (null == unindexableFieldSuffix || !key.endsWith(unindexableFieldSuffix)); + if (value == null) { + return extraFieldsContainer; + } if (!(value instanceof Map)) { // leaf node if (!isIndexable) { @@ -413,7 +415,8 @@ public class SchemaConformingTransformerV2 implements RecordTransformer { } mergedTextIndexMap.put(keyJsonPath, value); } else { - // Out of schema + // The field is not mapped to one of the dedicated columns in the Pinot table schema. Thus it will be put + // into the extraField column of the table. if (storeIndexableExtras) { extraFieldsContainer.addIndexableEntry(key, value); mergedTextIndexMap.put(keyJsonPath, value); diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/recordtransformer/SchemaConformingTransformerV2Test.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/recordtransformer/SchemaConformingTransformerV2Test.java index 6ea6d66cf9..d004f703f6 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/recordtransformer/SchemaConformingTransformerV2Test.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/recordtransformer/SchemaConformingTransformerV2Test.java @@ -80,17 +80,22 @@ public class SchemaConformingTransformerV2Test { private static final CustomObjectNode TEST_JSON_MAP_NODE = CustomObjectNode.create().set(TEST_JSON_ARRAY_FIELD_NAME, TEST_JSON_ARRAY_NODE) .set(TEST_JSON_NULL_FIELD_NAME, TEST_JSON_NULL_NODE).set(TEST_JSON_STRING_FIELD_NAME, TEST_JSON_STRING_NODE); + private static final CustomObjectNode TEST_JSON_MAP_NODE_WITHOUT_NULL_FIELD = + CustomObjectNode.create().set(TEST_JSON_ARRAY_FIELD_NAME, TEST_JSON_ARRAY_NODE) + .set(TEST_JSON_STRING_FIELD_NAME, TEST_JSON_STRING_NODE); + private static final CustomObjectNode TEST_JSON_MAP_NO_IDX_NODE = CustomObjectNode.create().set(TEST_JSON_INT_NO_IDX_FIELD_NAME, TEST_INT_NODE) .set(TEST_JSON_STRING_NO_IDX_FIELD_NAME, TEST_JSON_STRING_NO_IDX_NODE); private static final CustomObjectNode TEST_JSON_MAP_NODE_WITH_NO_IDX = CustomObjectNode.create().set(TEST_JSON_ARRAY_FIELD_NAME, TEST_JSON_ARRAY_NODE) - .set(TEST_JSON_NULL_FIELD_NAME, TEST_JSON_NULL_NODE).set(TEST_JSON_STRING_FIELD_NAME, TEST_JSON_STRING_NODE) - .set(TEST_JSON_INT_NO_IDX_FIELD_NAME, TEST_INT_NODE) + .set(TEST_JSON_STRING_FIELD_NAME, TEST_JSON_STRING_NODE).set(TEST_JSON_INT_NO_IDX_FIELD_NAME, TEST_INT_NODE) .set(TEST_JSON_STRING_NO_IDX_FIELD_NAME, TEST_JSON_STRING_NO_IDX_NODE); + static { ServerMetrics.register(mock(ServerMetrics.class)); } + private static final SchemaConformingTransformerV2 _RECORD_TRANSFORMER = new SchemaConformingTransformerV2(createDefaultBasicTableConfig(), createDefaultSchema()); @@ -131,20 +136,16 @@ public class SchemaConformingTransformerV2Test { /* { "arrayField" : [ 0, 1, 2, 3 ], - "nullField" : null, "stringField" : "a", "mapField" : { "arrayField" : [ 0, 1, 2, 3 ], - "nullField" : null, "stringField" : "a" }, "nestedField" : { "arrayField" : [ 0, 1, 2, 3 ], - "nullField" : null, "stringField" : "a", "mapField" : { "arrayField" : [ 0, 1, 2, 3 ], - "nullField" : null, "stringField" : "a" } } @@ -163,20 +164,16 @@ public class SchemaConformingTransformerV2Test { { "json_data" : { "arrayField" : [ 0, 1, 2, 3 ], - "nullField" : null, "stringField" : "a", "mapField" : { "arrayField" : [ 0, 1, 2, 3 ], - "nullField" : null, "stringField" : "a" }, "nestedField" : { "arrayField" : [ 0, 1, 2, 3 ], - "nullField" : null, "stringField" : "a", "mapField" : { "arrayField" : [ 0, 1, 2, 3 ], - "nullField" : null, "stringField" : "a" } } @@ -184,7 +181,14 @@ public class SchemaConformingTransformerV2Test { } */ schema = createDefaultSchemaBuilder().build(); - expectedJsonNode = CustomObjectNode.create().set(INDEXABLE_EXTRAS_FIELD_NAME, inputJsonNode); + // The input json node stripped of null fields. + final CustomObjectNode inputJsonNodeWithoutNullFields = + CustomObjectNode.create().setAll(TEST_JSON_MAP_NODE_WITHOUT_NULL_FIELD) + .set(TEST_JSON_MAP_FIELD_NAME, TEST_JSON_MAP_NODE_WITHOUT_NULL_FIELD).set(TEST_JSON_NESTED_MAP_FIELD_NAME, + CustomObjectNode.create().setAll(TEST_JSON_MAP_NODE_WITHOUT_NULL_FIELD) + .set(TEST_JSON_MAP_FIELD_NAME, TEST_JSON_MAP_NODE_WITHOUT_NULL_FIELD)); + + expectedJsonNode = CustomObjectNode.create().set(INDEXABLE_EXTRAS_FIELD_NAME, inputJsonNodeWithoutNullFields); transformWithIndexableFields(schema, inputJsonNode, expectedJsonNode); // Three dedicated columns in schema, only two are populated, one ignored @@ -195,17 +199,13 @@ public class SchemaConformingTransformerV2Test { "<indexableExtras>":{ "mapField": { "arrayField":[0, 1, 2, 3], - "nullField":null, "stringField":"a" }, - "nullField":null, "stringField":"a", "nestedFields":{ "arrayField":[0, 1, 2, 3], - "nullField":null, "mapField":{ "arrayField":[0, 1, 2, 3], - "nullField":null, "stringField":"a" } } @@ -218,33 +218,29 @@ public class SchemaConformingTransformerV2Test { .build(); expectedJsonNode = CustomObjectNode.create().set(TEST_JSON_ARRAY_FIELD_NAME, TEST_JSON_ARRAY_NODE) .set(TEST_JSON_NESTED_MAP_FIELD_NAME + "." + TEST_JSON_STRING_FIELD_NAME, TEST_JSON_STRING_NODE) - - .set(INDEXABLE_EXTRAS_FIELD_NAME, CustomObjectNode.create().set(TEST_JSON_MAP_FIELD_NAME, TEST_JSON_MAP_NODE) - .setAll(TEST_JSON_MAP_NODE.deepCopy().removeAndReturn(TEST_JSON_ARRAY_FIELD_NAME)) - .set(TEST_JSON_NESTED_MAP_FIELD_NAME, CustomObjectNode.create() - .setAll(TEST_JSON_MAP_NODE.deepCopy().removeAndReturn(TEST_JSON_STRING_FIELD_NAME)) - .set(TEST_JSON_MAP_FIELD_NAME, TEST_JSON_MAP_NODE))); + .set(INDEXABLE_EXTRAS_FIELD_NAME, + CustomObjectNode.create().set(TEST_JSON_MAP_FIELD_NAME, TEST_JSON_MAP_NODE_WITHOUT_NULL_FIELD) + .setAll(TEST_JSON_MAP_NODE_WITHOUT_NULL_FIELD.deepCopy().removeAndReturn(TEST_JSON_ARRAY_FIELD_NAME)) + .set(TEST_JSON_NESTED_MAP_FIELD_NAME, CustomObjectNode.create().setAll( + TEST_JSON_MAP_NODE_WITHOUT_NULL_FIELD.deepCopy().removeAndReturn(TEST_JSON_STRING_FIELD_NAME)) + .set(TEST_JSON_MAP_FIELD_NAME, TEST_JSON_MAP_NODE_WITHOUT_NULL_FIELD))); transformWithIndexableFields(schema, inputJsonNode, expectedJsonNode); // 8 dedicated columns, only 6 are populated /* { "arrayField" : [ 0, 1, 2, 3 ], - "nullField" : null, "stringField" : "a", "nestedField.arrayField" : [ 0, 1, 2, 3 ], - "nestedField.nullField" : null, "nestedField.stringField" : "a", "json_data" : { "mapField" : { "arrayField" : [ 0, 1, 2, 3 ], - "nullField" : null, "stringField" : "a" }, "nestedField" : { "mapField" : { "arrayField" : [ 0, 1, 2, 3 ], - "nullField" : null, "stringField" : "a" } } @@ -260,13 +256,13 @@ public class SchemaConformingTransformerV2Test { .addSingleValueDimension(TEST_JSON_NESTED_MAP_FIELD_NAME + "." + TEST_JSON_STRING_FIELD_NAME, DataType.STRING) .addSingleValueDimension(TEST_JSON_NESTED_MAP_FIELD_NAME + "." + TEST_JSON_MAP_FIELD_NAME, DataType.JSON) .build(); - expectedJsonNode = CustomObjectNode.create().setAll(TEST_JSON_MAP_NODE) + expectedJsonNode = CustomObjectNode.create().setAll(TEST_JSON_MAP_NODE_WITHOUT_NULL_FIELD) .set(TEST_JSON_NESTED_MAP_FIELD_NAME + "." + TEST_JSON_ARRAY_FIELD_NAME, TEST_JSON_ARRAY_NODE) - .set(TEST_JSON_NESTED_MAP_FIELD_NAME + "." + TEST_JSON_NULL_FIELD_NAME, TEST_JSON_NULL_NODE) .set(TEST_JSON_NESTED_MAP_FIELD_NAME + "." + TEST_JSON_STRING_FIELD_NAME, TEST_JSON_STRING_NODE) - .set(INDEXABLE_EXTRAS_FIELD_NAME, CustomObjectNode.create().set(TEST_JSON_MAP_FIELD_NAME, TEST_JSON_MAP_NODE) - .set(TEST_JSON_NESTED_MAP_FIELD_NAME, - CustomObjectNode.create().set(TEST_JSON_MAP_FIELD_NAME, TEST_JSON_MAP_NODE))); + .set(INDEXABLE_EXTRAS_FIELD_NAME, + CustomObjectNode.create().set(TEST_JSON_MAP_FIELD_NAME, TEST_JSON_MAP_NODE_WITHOUT_NULL_FIELD) + .set(TEST_JSON_NESTED_MAP_FIELD_NAME, + CustomObjectNode.create().set(TEST_JSON_MAP_FIELD_NAME, TEST_JSON_MAP_NODE_WITHOUT_NULL_FIELD))); transformWithIndexableFields(schema, inputJsonNode, expectedJsonNode); } @@ -275,31 +271,26 @@ public class SchemaConformingTransformerV2Test { /* { "arrayField":[0, 1, 2, 3], - "nullField":null, "stringField":"a", "intField_noIndex":9, "string_noIndex":"z", "mapField":{ "arrayField":[0, 1, 2, 3], - "nullField":null, "stringField":"a", "intField_noIndex":9, "string_noIndex":"z" }, "mapField_noIndex":{ "arrayField":[0, 1, 2, 3], - "nullField":null, "stringField":"a", }, "nestedFields":{ "arrayField":[0, 1, 2, 3], - "nullField":null, "stringField":"a", "intField_noIndex":9, "string_noIndex":"z", "mapField":{ "arrayField":[0, 1, 2, 3], - "nullField":null, "stringField":"a", "intField_noIndex":9, "string_noIndex":"z" @@ -314,12 +305,13 @@ public class SchemaConformingTransformerV2Test { .set(TEST_JSON_STRING_NO_IDX_FIELD_NAME, TEST_JSON_STRING_NO_IDX_NODE) .set(TEST_JSON_MAP_FIELD_NAME, TEST_JSON_MAP_NODE_WITH_NO_IDX) .set(TEST_JSON_MAP_NO_IDX_FIELD_NAME, TEST_JSON_MAP_NODE).set(TEST_JSON_NESTED_MAP_FIELD_NAME, - CustomObjectNode.create().setAll(TEST_JSON_MAP_NODE).set(TEST_JSON_ARRAY_FIELD_NAME, TEST_JSON_ARRAY_NODE) - .set(TEST_JSON_NULL_FIELD_NAME, TEST_JSON_NULL_NODE) - .set(TEST_JSON_STRING_FIELD_NAME, TEST_JSON_STRING_NODE) - .set(TEST_JSON_INT_NO_IDX_FIELD_NAME, TEST_INT_NODE) - .set(TEST_JSON_STRING_NO_IDX_FIELD_NAME, TEST_JSON_STRING_NO_IDX_NODE) - .set(TEST_JSON_MAP_FIELD_NAME, TEST_JSON_MAP_NODE_WITH_NO_IDX)); + CustomObjectNode.create().setAll(TEST_JSON_MAP_NODE).set(TEST_JSON_ARRAY_FIELD_NAME, + TEST_JSON_ARRAY_NODE) + .set(TEST_JSON_NULL_FIELD_NAME, TEST_JSON_NULL_NODE) + .set(TEST_JSON_STRING_FIELD_NAME, TEST_JSON_STRING_NODE) + .set(TEST_JSON_INT_NO_IDX_FIELD_NAME, TEST_INT_NODE) + .set(TEST_JSON_STRING_NO_IDX_FIELD_NAME, TEST_JSON_STRING_NO_IDX_NODE) + .set(TEST_JSON_MAP_FIELD_NAME, TEST_JSON_MAP_NODE_WITH_NO_IDX)); CustomObjectNode expectedJsonNode; CustomObjectNode expectedJsonNodeWithMergedTextIndex; @@ -331,20 +323,16 @@ public class SchemaConformingTransformerV2Test { { "indexableExtras":{ "arrayField":[0, 1, 2, 3], - "nullField":null, "stringField":"a", "mapField":{ "arrayField":[0, 1, 2, 3], - "nullField":null, "stringField":"a" }, "nestedFields":{ "arrayField":[0, 1, 2, 3], - "nullField":null, "stringField":"a", "mapField":{ "arrayField":[0, 1, 2, 3], - "nullField":null, "stringField":"a" } } @@ -358,7 +346,6 @@ public class SchemaConformingTransformerV2Test { }, "mapField_noIndex":{ "arrayField":[0, 1, 2, 3], - "nullField":null, "stringField":"a", }, "nestedFields":{ @@ -376,22 +363,22 @@ public class SchemaConformingTransformerV2Test { } */ expectedJsonNode = CustomObjectNode.create().set(INDEXABLE_EXTRAS_FIELD_NAME, - CustomObjectNode.create().set(TEST_JSON_ARRAY_FIELD_NAME, TEST_JSON_ARRAY_NODE) - .set(TEST_JSON_NULL_FIELD_NAME, TEST_JSON_NULL_NODE).set(TEST_JSON_STRING_FIELD_NAME, TEST_JSON_STRING_NODE) - .set(TEST_JSON_MAP_FIELD_NAME, TEST_JSON_MAP_NODE).set(TEST_JSON_NESTED_MAP_FIELD_NAME, CustomObjectNode.create().set(TEST_JSON_ARRAY_FIELD_NAME, TEST_JSON_ARRAY_NODE) - .set(TEST_JSON_NULL_FIELD_NAME, TEST_JSON_NULL_NODE) .set(TEST_JSON_STRING_FIELD_NAME, TEST_JSON_STRING_NODE) - .set(TEST_JSON_MAP_FIELD_NAME, TEST_JSON_MAP_NODE))) - + .set(TEST_JSON_MAP_FIELD_NAME, TEST_JSON_MAP_NODE_WITHOUT_NULL_FIELD) + .set(TEST_JSON_NESTED_MAP_FIELD_NAME, + CustomObjectNode.create().set(TEST_JSON_ARRAY_FIELD_NAME, TEST_JSON_ARRAY_NODE) + .set(TEST_JSON_STRING_FIELD_NAME, TEST_JSON_STRING_NODE) + .set(TEST_JSON_MAP_FIELD_NAME, TEST_JSON_MAP_NODE_WITHOUT_NULL_FIELD))) .set(UNINDEXABLE_EXTRAS_FIELD_NAME, CustomObjectNode.create().set(TEST_JSON_INT_NO_IDX_FIELD_NAME, TEST_INT_NODE) .set(TEST_JSON_STRING_NO_IDX_FIELD_NAME, TEST_JSON_STRING_NO_IDX_NODE) .set(TEST_JSON_MAP_FIELD_NAME, TEST_JSON_MAP_NO_IDX_NODE) - .set(TEST_JSON_MAP_NO_IDX_FIELD_NAME, TEST_JSON_MAP_NODE).set(TEST_JSON_NESTED_MAP_FIELD_NAME, - CustomObjectNode.create().set(TEST_JSON_INT_NO_IDX_FIELD_NAME, TEST_INT_NODE) - .set(TEST_JSON_STRING_NO_IDX_FIELD_NAME, TEST_JSON_STRING_NO_IDX_NODE) - .set(TEST_JSON_MAP_FIELD_NAME, TEST_JSON_MAP_NO_IDX_NODE))); + .set(TEST_JSON_MAP_NO_IDX_FIELD_NAME, TEST_JSON_MAP_NODE_WITHOUT_NULL_FIELD) + .set(TEST_JSON_NESTED_MAP_FIELD_NAME, + CustomObjectNode.create().set(TEST_JSON_INT_NO_IDX_FIELD_NAME, TEST_INT_NODE) + .set(TEST_JSON_STRING_NO_IDX_FIELD_NAME, TEST_JSON_STRING_NO_IDX_NODE) + .set(TEST_JSON_MAP_FIELD_NAME, TEST_JSON_MAP_NO_IDX_NODE))); transformWithUnIndexableFieldsAndMergedTextIndex(schemaBuilder.build(), inputJsonNode, expectedJsonNode); expectedJsonNodeWithMergedTextIndex = expectedJsonNode.deepCopy().set(MERGED_TEXT_INDEX_FIELD_NAME, @@ -418,19 +405,15 @@ public class SchemaConformingTransformerV2Test { "arrayField":[0, 1, 2, 3], "nestedFields.stringField":"a", "indexableExtras":{ - "nullField":null, "stringField":"a", "mapField":{ "arrayField":[0, 1, 2, 3], - "nullField":null, "stringField":"a" }, "nestedFields":{ "arrayField":[0, 1, 2, 3], - "nullField":null, "mapField":{ "arrayField":[0, 1, 2, 3], - "nullField":null, "stringField":"a" } } @@ -444,7 +427,6 @@ public class SchemaConformingTransformerV2Test { }, "mapField_noIndex":{ "arrayField":[0, 1, 2, 3], - "nullField":null, "stringField":"a", }, "nestedFields":{ @@ -463,21 +445,22 @@ public class SchemaConformingTransformerV2Test { */ expectedJsonNode = CustomObjectNode.create().set(TEST_JSON_ARRAY_FIELD_NAME, TEST_JSON_ARRAY_NODE) .set(TEST_JSON_NESTED_MAP_FIELD_NAME + "." + TEST_JSON_STRING_FIELD_NAME, TEST_JSON_STRING_NODE) - .set(INDEXABLE_EXTRAS_FIELD_NAME, CustomObjectNode.create().set(TEST_JSON_NULL_FIELD_NAME, TEST_JSON_NULL_NODE) - .set(TEST_JSON_STRING_FIELD_NAME, TEST_JSON_STRING_NODE).set(TEST_JSON_MAP_FIELD_NAME, TEST_JSON_MAP_NODE) - .set(TEST_JSON_NESTED_MAP_FIELD_NAME, - CustomObjectNode.create().set(TEST_JSON_ARRAY_FIELD_NAME, TEST_JSON_ARRAY_NODE) - .set(TEST_JSON_NULL_FIELD_NAME, TEST_JSON_NULL_NODE) - .set(TEST_JSON_MAP_FIELD_NAME, TEST_JSON_MAP_NODE))) + .set(INDEXABLE_EXTRAS_FIELD_NAME, + CustomObjectNode.create().set(TEST_JSON_STRING_FIELD_NAME, TEST_JSON_STRING_NODE) + .set(TEST_JSON_MAP_FIELD_NAME, TEST_JSON_MAP_NODE_WITHOUT_NULL_FIELD) + .set(TEST_JSON_NESTED_MAP_FIELD_NAME, + CustomObjectNode.create().set(TEST_JSON_ARRAY_FIELD_NAME, TEST_JSON_ARRAY_NODE) + .set(TEST_JSON_MAP_FIELD_NAME, TEST_JSON_MAP_NODE_WITHOUT_NULL_FIELD))) .set(UNINDEXABLE_EXTRAS_FIELD_NAME, CustomObjectNode.create().set(TEST_JSON_INT_NO_IDX_FIELD_NAME, TEST_INT_NODE) .set(TEST_JSON_STRING_NO_IDX_FIELD_NAME, TEST_JSON_STRING_NO_IDX_NODE) .set(TEST_JSON_MAP_FIELD_NAME, TEST_JSON_MAP_NO_IDX_NODE) - .set(TEST_JSON_MAP_NO_IDX_FIELD_NAME, TEST_JSON_MAP_NODE).set(TEST_JSON_NESTED_MAP_FIELD_NAME, - CustomObjectNode.create().set(TEST_JSON_INT_NO_IDX_FIELD_NAME, TEST_INT_NODE) - .set(TEST_JSON_STRING_NO_IDX_FIELD_NAME, TEST_JSON_STRING_NO_IDX_NODE) - .set(TEST_JSON_MAP_FIELD_NAME, TEST_JSON_MAP_NO_IDX_NODE))); + .set(TEST_JSON_MAP_NO_IDX_FIELD_NAME, TEST_JSON_MAP_NODE_WITHOUT_NULL_FIELD) + .set(TEST_JSON_NESTED_MAP_FIELD_NAME, + CustomObjectNode.create().set(TEST_JSON_INT_NO_IDX_FIELD_NAME, TEST_INT_NODE) + .set(TEST_JSON_STRING_NO_IDX_FIELD_NAME, TEST_JSON_STRING_NO_IDX_NODE) + .set(TEST_JSON_MAP_FIELD_NAME, TEST_JSON_MAP_NO_IDX_NODE))); transformWithUnIndexableFieldsAndMergedTextIndex(schemaBuilder.build(), inputJsonNode, expectedJsonNode); expectedJsonNodeWithMergedTextIndex = expectedJsonNode.deepCopy().set(MERGED_TEXT_INDEX_FIELD_NAME, @@ -506,21 +489,17 @@ public class SchemaConformingTransformerV2Test { /* { "arrayField":[0, 1, 2, 3], - "nullField":null, "stringField":"a", "nestedFields.arrayField":[0, 1, 2, 3], - "nestedFields.nullField":null, "nestedFields.stringField":"a", "indexableExtras":{ "mapField":{ "arrayField":[0, 1, 2, 3], - "nullField":null, "stringField":"a" }, "nestedFields":{ mapField":{ "arrayField":[0, 1, 2, 3], - "nullField":null, "stringField":"a" } } @@ -534,7 +513,6 @@ public class SchemaConformingTransformerV2Test { }, "mapField_noIndex":{ "arrayField":[0, 1, 2, 3], - "nullField":null, "stringField":"a", }, "nestedFields":{ @@ -552,23 +530,23 @@ public class SchemaConformingTransformerV2Test { } */ expectedJsonNode = CustomObjectNode.create().set(TEST_JSON_ARRAY_FIELD_NAME, TEST_JSON_ARRAY_NODE) - .set(TEST_JSON_NULL_FIELD_NAME, TEST_JSON_NULL_NODE).set(TEST_JSON_STRING_FIELD_NAME, TEST_JSON_STRING_NODE) + .set(TEST_JSON_STRING_FIELD_NAME, TEST_JSON_STRING_NODE) .set(TEST_JSON_NESTED_MAP_FIELD_NAME + "." + TEST_JSON_ARRAY_FIELD_NAME, TEST_JSON_ARRAY_NODE) - .set(TEST_JSON_NESTED_MAP_FIELD_NAME + "." + TEST_JSON_NULL_FIELD_NAME, TEST_JSON_NULL_NODE) .set(TEST_JSON_NESTED_MAP_FIELD_NAME + "." + TEST_JSON_STRING_FIELD_NAME, TEST_JSON_STRING_NODE) - - .set(INDEXABLE_EXTRAS_FIELD_NAME, CustomObjectNode.create().set(TEST_JSON_MAP_FIELD_NAME, TEST_JSON_MAP_NODE) - .set(TEST_JSON_NESTED_MAP_FIELD_NAME, - CustomObjectNode.create().set(TEST_JSON_MAP_FIELD_NAME, TEST_JSON_MAP_NODE))) + .set(INDEXABLE_EXTRAS_FIELD_NAME, + CustomObjectNode.create().set(TEST_JSON_MAP_FIELD_NAME, TEST_JSON_MAP_NODE_WITHOUT_NULL_FIELD) + .set(TEST_JSON_NESTED_MAP_FIELD_NAME, + CustomObjectNode.create().set(TEST_JSON_MAP_FIELD_NAME, TEST_JSON_MAP_NODE_WITHOUT_NULL_FIELD))) .set(UNINDEXABLE_EXTRAS_FIELD_NAME, CustomObjectNode.create().set(TEST_JSON_INT_NO_IDX_FIELD_NAME, TEST_INT_NODE) .set(TEST_JSON_STRING_NO_IDX_FIELD_NAME, TEST_JSON_STRING_NO_IDX_NODE) .set(TEST_JSON_MAP_FIELD_NAME, TEST_JSON_MAP_NO_IDX_NODE) - .set(TEST_JSON_MAP_NO_IDX_FIELD_NAME, TEST_JSON_MAP_NODE).set(TEST_JSON_NESTED_MAP_FIELD_NAME, - CustomObjectNode.create().set(TEST_JSON_INT_NO_IDX_FIELD_NAME, TEST_INT_NODE) - .set(TEST_JSON_STRING_NO_IDX_FIELD_NAME, TEST_JSON_STRING_NO_IDX_NODE) - .set(TEST_JSON_MAP_FIELD_NAME, TEST_JSON_MAP_NO_IDX_NODE))); + .set(TEST_JSON_MAP_NO_IDX_FIELD_NAME, TEST_JSON_MAP_NODE_WITHOUT_NULL_FIELD) + .set(TEST_JSON_NESTED_MAP_FIELD_NAME, + CustomObjectNode.create().set(TEST_JSON_INT_NO_IDX_FIELD_NAME, TEST_INT_NODE) + .set(TEST_JSON_STRING_NO_IDX_FIELD_NAME, TEST_JSON_STRING_NO_IDX_NODE) + .set(TEST_JSON_MAP_FIELD_NAME, TEST_JSON_MAP_NO_IDX_NODE))); transformWithUnIndexableFieldsAndMergedTextIndex(schemaBuilder.build(), inputJsonNode, expectedJsonNode); expectedJsonNodeWithMergedTextIndex = expectedJsonNode.deepCopy().set(MERGED_TEXT_INDEX_FIELD_NAME, N.arrayNode().add("[0,1,2,3]:arrayField").add("0:arrayField").add("1:arrayField").add("2:arrayField") @@ -590,38 +568,32 @@ public class SchemaConformingTransformerV2Test { /* { "arrayField":[0, 1, 2, 3], - "nullField":null, "stringField":"a", "intField_noIndex":9, "string_noIndex":"z", "mapField":{ "arrayField":[0, 1, 2, 3], - "nullField":null, "stringField":"a", "intField_noIndex":9, "string_noIndex":"z" }, "mapFieldExtra":{ "arrayField":[0, 1, 2, 3], - "nullField":null, "stringField":"a", "intField_noIndex":9, "string_noIndex":"z" }, "mapField_noIndex":{ "arrayField":[0, 1, 2, 3], - "nullField":null, "stringField":"a", }, "nestedFields":{ "arrayField":[0, 1, 2, 3], - "nullField":null, "stringField":"a", "intField_noIndex":9, "string_noIndex":"z", "mapField":{ "arrayField":[0, 1, 2, 3], - "nullField":null, "stringField":"a", "intField_noIndex":9, "string_noIndex":"z" @@ -637,12 +609,13 @@ public class SchemaConformingTransformerV2Test { .set(TEST_JSON_MAP_FIELD_NAME, TEST_JSON_MAP_NODE_WITH_NO_IDX) .set(TEST_JSON_MAP_EXTRA_FIELD_NAME, TEST_JSON_MAP_NODE_WITH_NO_IDX) .set(TEST_JSON_MAP_NO_IDX_FIELD_NAME, TEST_JSON_MAP_NODE).set(TEST_JSON_NESTED_MAP_FIELD_NAME, - CustomObjectNode.create().setAll(TEST_JSON_MAP_NODE).set(TEST_JSON_ARRAY_FIELD_NAME, TEST_JSON_ARRAY_NODE) - .set(TEST_JSON_NULL_FIELD_NAME, TEST_JSON_NULL_NODE) - .set(TEST_JSON_STRING_FIELD_NAME, TEST_JSON_STRING_NODE) - .set(TEST_JSON_INT_NO_IDX_FIELD_NAME, TEST_INT_NODE) - .set(TEST_JSON_STRING_NO_IDX_FIELD_NAME, TEST_JSON_STRING_NO_IDX_NODE) - .set(TEST_JSON_MAP_FIELD_NAME, TEST_JSON_MAP_NODE_WITH_NO_IDX)); + CustomObjectNode.create().setAll(TEST_JSON_MAP_NODE).set(TEST_JSON_ARRAY_FIELD_NAME, + TEST_JSON_ARRAY_NODE) + .set(TEST_JSON_NULL_FIELD_NAME, TEST_JSON_NULL_NODE) + .set(TEST_JSON_STRING_FIELD_NAME, TEST_JSON_STRING_NODE) + .set(TEST_JSON_INT_NO_IDX_FIELD_NAME, TEST_INT_NODE) + .set(TEST_JSON_STRING_NO_IDX_FIELD_NAME, TEST_JSON_STRING_NO_IDX_NODE) + .set(TEST_JSON_MAP_FIELD_NAME, TEST_JSON_MAP_NODE_WITH_NO_IDX)); CustomObjectNode expectedJsonNode; CustomObjectNode expectedJsonNodeWithMergedTextIndex; @@ -686,24 +659,20 @@ public class SchemaConformingTransformerV2Test { "nestedFields.stringField":"a", "mapField":{ "arrayField":[0,1,2,3], - "nullField":null, "stringField":"a", "intField_noIndex":9, "string_noIndex":"z" }, "mapFieldExtra":{ "arrayField":[0,1,2,3], - "nullField":null, "stringField":"a", "intField_noIndex":9, "string_noIndex":"z" } "indexableExtras":{ - "nullField":null, "stringField":"a", "nestedFields":{ "arrayField":[0, 1, 2, 3], - "nullField":null, } }, "unindexableExtras":{ @@ -711,7 +680,6 @@ public class SchemaConformingTransformerV2Test { "string_noIndex":"z", "mapField_noIndex":{ "arrayField":[0, 1, 2, 3], - "nullField":null, "stringField":"a", }, "nestedFields":{ @@ -724,24 +692,20 @@ public class SchemaConformingTransformerV2Test { ] } */ - expectedJsonNode = CustomObjectNode.create() - .set(TEST_JSON_ARRAY_FIELD_NAME, N.textNode("[0,1,2,3]")) - .set(destColumnName, TEST_JSON_STRING_NODE) - .set(TEST_JSON_MAP_FIELD_NAME, TEST_JSON_MAP_NODE_WITH_NO_IDX) - .set(TEST_JSON_MAP_EXTRA_FIELD_NAME, TEST_JSON_MAP_NODE_WITH_NO_IDX) - .set(INDEXABLE_EXTRAS_FIELD_NAME, - CustomObjectNode.create().set(TEST_JSON_NULL_FIELD_NAME, TEST_JSON_NULL_NODE) - .set(TEST_JSON_STRING_FIELD_NAME, TEST_JSON_STRING_NODE) + expectedJsonNode = CustomObjectNode.create().set(TEST_JSON_ARRAY_FIELD_NAME, N.textNode("[0,1,2,3]")) + .set(destColumnName, TEST_JSON_STRING_NODE).set(TEST_JSON_MAP_FIELD_NAME, TEST_JSON_MAP_NODE_WITH_NO_IDX) + .set(TEST_JSON_MAP_EXTRA_FIELD_NAME, TEST_JSON_MAP_NODE_WITH_NO_IDX).set(INDEXABLE_EXTRAS_FIELD_NAME, + CustomObjectNode.create().set(TEST_JSON_STRING_FIELD_NAME, TEST_JSON_STRING_NODE) .set(TEST_JSON_NESTED_MAP_FIELD_NAME, - CustomObjectNode.create().set(TEST_JSON_ARRAY_FIELD_NAME, TEST_JSON_ARRAY_NODE) - .set(TEST_JSON_NULL_FIELD_NAME, TEST_JSON_NULL_NODE))) + CustomObjectNode.create().set(TEST_JSON_ARRAY_FIELD_NAME, TEST_JSON_ARRAY_NODE))) .set(UNINDEXABLE_EXTRAS_FIELD_NAME, CustomObjectNode.create().set(TEST_JSON_INT_NO_IDX_FIELD_NAME, TEST_INT_NODE) .set(TEST_JSON_STRING_NO_IDX_FIELD_NAME, TEST_JSON_STRING_NO_IDX_NODE) - .set(TEST_JSON_MAP_NO_IDX_FIELD_NAME, TEST_JSON_MAP_NODE).set(TEST_JSON_NESTED_MAP_FIELD_NAME, - CustomObjectNode.create().set(TEST_JSON_INT_NO_IDX_FIELD_NAME, TEST_INT_NODE) - .set(TEST_JSON_STRING_NO_IDX_FIELD_NAME, TEST_JSON_STRING_NO_IDX_NODE))); + .set(TEST_JSON_MAP_NO_IDX_FIELD_NAME, TEST_JSON_MAP_NODE_WITHOUT_NULL_FIELD) + .set(TEST_JSON_NESTED_MAP_FIELD_NAME, + CustomObjectNode.create().set(TEST_JSON_INT_NO_IDX_FIELD_NAME, TEST_INT_NODE) + .set(TEST_JSON_STRING_NO_IDX_FIELD_NAME, TEST_JSON_STRING_NO_IDX_NODE))); expectedJsonNodeWithMergedTextIndex = expectedJsonNode.deepCopy().set(MERGED_TEXT_INDEX_FIELD_NAME, N.arrayNode().add("0:arrayField").add("1:arrayField").add("2:arrayField").add("3:arrayField") @@ -749,8 +713,8 @@ public class SchemaConformingTransformerV2Test { .add("0:nestedFields.arrayField").add("1:nestedFields.arrayField").add("2:nestedFields.arrayField") .add("3:nestedFields.arrayField").add("a:nestedFields.stringField") .add("[0,1,2,3]:mapFieldExtra.arrayField").add("a:mapFieldExtra.stringField") - .add("0:mapFieldExtra.arrayField").add("1:mapFieldExtra.arrayField") - .add("2:mapFieldExtra.arrayField").add("3:mapFieldExtra.arrayField")); + .add("0:mapFieldExtra.arrayField").add("1:mapFieldExtra.arrayField").add("2:mapFieldExtra.arrayField") + .add("3:mapFieldExtra.arrayField")); transformKeyValueTransformation( schemaBuilder.addMultiValueDimension(MERGED_TEXT_INDEX_FIELD_NAME, DataType.STRING).build(), keyMapping, pathToDrop, pathToPreserve, pathToPreserveWithIndex, inputJsonNode, expectedJsonNodeWithMergedTextIndex); @@ -777,8 +741,7 @@ public class SchemaConformingTransformerV2Test { private void testTransform(String indexableExtrasField, String unindexableExtrasField, String mergedTextIndexField, Schema schema, Map<String, String> keyMapping, Set<String> fieldPathsToDrop, Set<String> fieldPathsToPreserve, - Set<String> fieldPathsToPreserveWithIndex, String inputRecordJSONString, - String expectedOutputRecordJSONString) { + Set<String> fieldPathsToPreserveWithIndex, String inputRecordJSONString, String expectedOutputRecordJSONString) { TableConfig tableConfig = createDefaultTableConfig(indexableExtrasField, unindexableExtrasField, UNINDEXABLE_FIELD_SUFFIX, fieldPathsToDrop, fieldPathsToPreserve, fieldPathsToPreserveWithIndex, mergedTextIndexField); @@ -845,8 +808,8 @@ public class SchemaConformingTransformerV2Test { Schema schema = createDefaultSchemaBuilder().addSingleValueDimension("a.b", DataType.STRING) .addSingleValueDimension("a.b.c", DataType.INT).build(); SchemaConformingTransformerV2.validateSchema(schema, - new SchemaConformingTransformerV2Config(null, INDEXABLE_EXTRAS_FIELD_NAME, null, null, null, null, null, - null, null, null, null, null, null, null)); + new SchemaConformingTransformerV2Config(null, INDEXABLE_EXTRAS_FIELD_NAME, null, null, null, null, null, null, + null, null, null, null, null, null)); } catch (Exception ex) { fail("Should not have thrown any exception when overlapping schema occurs"); } @@ -895,8 +858,8 @@ public class SchemaConformingTransformerV2Test { shingleIndexMaxLength = 8; shingleIndexOverlapLength = 2; - expectedTokenValues = new ArrayList<>(Arrays - .asList("0123:key", "2345:key", "4567:key", "6789:key", "89AB:key", "ABCD:key", "CDEF:key", "EFGH:key", + expectedTokenValues = new ArrayList<>( + Arrays.asList("0123:key", "2345:key", "4567:key", "6789:key", "89AB:key", "ABCD:key", "CDEF:key", "EFGH:key", "GHIJ:key")); testShingleIndexWithParams(key, value, shingleIndexMaxLength, shingleIndexOverlapLength, expectedTokenValues); @@ -926,8 +889,8 @@ public class SchemaConformingTransformerV2Test { Integer shingleIndexOverlapLength, List<String> expectedTokenValues) { Map.Entry<String, Object> kv = new AbstractMap.SimpleEntry<>(key, value); List<String> shingleIndexTokens = new ArrayList<>(); - _RECORD_TRANSFORMER - .generateShingleTextIndexDocument(kv, shingleIndexTokens, shingleIndexMaxLength, shingleIndexOverlapLength); + _RECORD_TRANSFORMER.generateShingleTextIndexDocument(kv, shingleIndexTokens, shingleIndexMaxLength, + shingleIndexOverlapLength); int numTokens = shingleIndexTokens.size(); assertEquals(numTokens, expectedTokenValues.size()); for (int i = 0; i < numTokens; i++) { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org