This is an automated email from the ASF dual-hosted git repository. yupeng pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git
The following commit(s) were added to refs/heads/master by this push: new f4ae3e0 Add collectionToJsonMode to schema inference (#6946) f4ae3e0 is described below commit f4ae3e0a0a46f75bd284a852c936bcb27bab1aa1 Author: Yupeng Fu <yupe...@users.noreply.github.com> AuthorDate: Thu May 20 10:27:08 2021 -0700 Add collectionToJsonMode to schema inference (#6946) --- .../pinot/plugin/inputformat/avro/AvroUtils.java | 41 ++++++++++++++++------ .../plugin/inputformat/avro/AvroUtilsTest.java | 20 +++++++++-- .../java/org/apache/pinot/spi/utils/JsonUtils.java | 39 ++++++++++++++------ .../org/apache/pinot/spi/utils/JsonUtilsTest.java | 37 +++++++++++++------ .../admin/command/AvroSchemaToPinotSchema.java | 17 +++++++-- .../tools/admin/command/JsonToPinotSchema.java | 16 +++++++-- 6 files changed, 133 insertions(+), 37 deletions(-) diff --git a/pinot-plugins/pinot-input-format/pinot-avro-base/src/main/java/org/apache/pinot/plugin/inputformat/avro/AvroUtils.java b/pinot-plugins/pinot-input-format/pinot-avro-base/src/main/java/org/apache/pinot/plugin/inputformat/avro/AvroUtils.java index bbd7a79..ae72c85 100644 --- a/pinot-plugins/pinot-input-format/pinot-avro-base/src/main/java/org/apache/pinot/plugin/inputformat/avro/AvroUtils.java +++ b/pinot-plugins/pinot-input-format/pinot-avro-base/src/main/java/org/apache/pinot/plugin/inputformat/avro/AvroUtils.java @@ -32,6 +32,7 @@ import org.apache.avro.SchemaBuilder; import org.apache.avro.file.DataFileStream; import org.apache.avro.generic.GenericDatumReader; import org.apache.avro.generic.GenericRecord; +import org.apache.pinot.spi.config.table.ingestion.ComplexTypeConfig; import org.apache.pinot.spi.data.DateTimeFieldSpec; import org.apache.pinot.spi.data.DateTimeFormatSpec; import org.apache.pinot.spi.data.DateTimeGranularitySpec; @@ -81,17 +82,18 @@ public class AvroUtils { * @param timeUnit Time unit * @param unnestFields the fields to unnest * @param delimiter the delimiter to separate components in nested structure + * @param collectionToJsonMode the mode of converting collection to JSON * * @return Pinot schema */ public static Schema getPinotSchemaFromAvroSchemaWithComplexTypeHandling(org.apache.avro.Schema avroSchema, @Nullable Map<String, FieldSpec.FieldType> fieldTypeMap, @Nullable TimeUnit timeUnit, List<String> unnestFields, - String delimiter) { + String delimiter, ComplexTypeConfig.CollectionToJsonMode collectionToJsonMode) { Schema pinotSchema = new Schema(); for (Field field : avroSchema.getFields()) { extractSchemaWithComplexTypeHandling(field.schema(), unnestFields, delimiter, field.name(), pinotSchema, - fieldTypeMap, timeUnit); + fieldTypeMap, timeUnit, collectionToJsonMode); } return pinotSchema; } @@ -134,18 +136,19 @@ public class AvroUtils { * @param complexType if allows complex-type handling * @param unnestFields the fields to unnest * @param delimiter the delimiter separating components in nested structure + * @param collectionToJsonMode to mode of converting collection to JSON string * @return Pinot schema */ public static Schema getPinotSchemaFromAvroSchemaFile(File avroSchemaFile, @Nullable Map<String, FieldSpec.FieldType> fieldTypeMap, @Nullable TimeUnit timeUnit, boolean complexType, - List<String> unnestFields, String delimiter) + List<String> unnestFields, String delimiter, ComplexTypeConfig.CollectionToJsonMode collectionToJsonMode) throws IOException { org.apache.avro.Schema avroSchema = new org.apache.avro.Schema.Parser().parse(avroSchemaFile); if (!complexType) { return getPinotSchemaFromAvroSchema(avroSchema, fieldTypeMap, timeUnit); } else { return getPinotSchemaFromAvroSchemaWithComplexTypeHandling(avroSchema, fieldTypeMap, timeUnit, unnestFields, - delimiter); + delimiter, collectionToJsonMode); } } @@ -283,7 +286,8 @@ public class AvroUtils { private static void extractSchemaWithComplexTypeHandling(org.apache.avro.Schema fieldSchema, List<String> unnestFields, String delimiter, String path, Schema pinotSchema, - @Nullable Map<String, FieldSpec.FieldType> fieldTypeMap, @Nullable TimeUnit timeUnit) { + @Nullable Map<String, FieldSpec.FieldType> fieldTypeMap, @Nullable TimeUnit timeUnit, + ComplexTypeConfig.CollectionToJsonMode collectionToJsonMode) { org.apache.avro.Schema.Type fieldType = fieldSchema.getType(); switch (fieldType) { case UNION: @@ -299,7 +303,7 @@ public class AvroUtils { } if (nonNullSchema != null) { extractSchemaWithComplexTypeHandling(nonNullSchema, unnestFields, delimiter, path, pinotSchema, fieldTypeMap, - timeUnit); + timeUnit, collectionToJsonMode); } else { throw new IllegalStateException("Cannot find non-null schema in UNION schema"); } @@ -307,20 +311,23 @@ public class AvroUtils { case RECORD: for (Field innerField : fieldSchema.getFields()) { extractSchemaWithComplexTypeHandling(innerField.schema(), unnestFields, delimiter, - String.join(delimiter, path, innerField.name()), pinotSchema, fieldTypeMap, timeUnit); + String.join(delimiter, path, innerField.name()), pinotSchema, fieldTypeMap, timeUnit, + collectionToJsonMode); } break; case ARRAY: org.apache.avro.Schema elementType = fieldSchema.getElementType(); if (unnestFields.contains(path)) { extractSchemaWithComplexTypeHandling(elementType, unnestFields, delimiter, path, pinotSchema, fieldTypeMap, - timeUnit); - } else if (AvroSchemaUtil.isPrimitiveType(elementType.getType())) { + timeUnit, collectionToJsonMode); + } else if (collectionToJsonMode == ComplexTypeConfig.CollectionToJsonMode.NON_PRIMITIVE && AvroSchemaUtil + .isPrimitiveType(elementType.getType())) { addFieldToPinotSchema(pinotSchema, AvroSchemaUtil.valueOf(elementType.getType()), path, false, fieldTypeMap, timeUnit); - } else { + } else if (shallConvertToJson(collectionToJsonMode, elementType)) { addFieldToPinotSchema(pinotSchema, DataType.STRING, path, true, fieldTypeMap, timeUnit); } + // do not include the node for other cases break; default: DataType dataType = AvroSchemaUtil.valueOf(fieldType); @@ -328,6 +335,20 @@ public class AvroUtils { } } + private static boolean shallConvertToJson(ComplexTypeConfig.CollectionToJsonMode collectionToJsonMode, + org.apache.avro.Schema elementType) { + switch (collectionToJsonMode) { + case ALL: + return true; + case NONE: + return false; + case NON_PRIMITIVE: + return !AvroSchemaUtil.isPrimitiveType(elementType.getType()); + default: + throw new IllegalArgumentException(String.format("Unsupported collectionToJsonMode %s", collectionToJsonMode)); + } + } + private static void addFieldToPinotSchema(Schema pinotSchema, DataType dataType, String name, boolean isSingleValueField, @Nullable Map<String, FieldSpec.FieldType> fieldTypeMap, @Nullable TimeUnit timeUnit) { diff --git a/pinot-plugins/pinot-input-format/pinot-avro-base/src/test/java/org/apache/pinot/plugin/inputformat/avro/AvroUtilsTest.java b/pinot-plugins/pinot-input-format/pinot-avro-base/src/test/java/org/apache/pinot/plugin/inputformat/avro/AvroUtilsTest.java index 1667511..e0140ea 100644 --- a/pinot-plugins/pinot-input-format/pinot-avro-base/src/test/java/org/apache/pinot/plugin/inputformat/avro/AvroUtilsTest.java +++ b/pinot-plugins/pinot-input-format/pinot-avro-base/src/test/java/org/apache/pinot/plugin/inputformat/avro/AvroUtilsTest.java @@ -23,6 +23,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Map; import java.util.concurrent.TimeUnit; +import org.apache.pinot.spi.config.table.ingestion.ComplexTypeConfig; import org.apache.pinot.spi.data.FieldSpec; import org.apache.pinot.spi.data.FieldSpec.DataType; import org.apache.pinot.spi.data.FieldSpec.FieldType; @@ -89,7 +90,7 @@ public class AvroUtilsTest { .put("hoursSinceEpoch", FieldType.TIME).put("m1", FieldType.METRIC).build(); Schema inferredPinotSchema = AvroUtils .getPinotSchemaFromAvroSchemaWithComplexTypeHandling(avroSchema, fieldSpecMap, TimeUnit.HOURS, - new ArrayList<>(), "."); + new ArrayList<>(), ".", ComplexTypeConfig.CollectionToJsonMode.NON_PRIMITIVE); Schema expectedSchema = new Schema.SchemaBuilder().addSingleValueDimension("d1", DataType.STRING).addMetric("m1", DataType.INT) .addSingleValueDimension("tuple.streetaddress", DataType.STRING) @@ -101,7 +102,7 @@ public class AvroUtilsTest { // unnest collection entries inferredPinotSchema = AvroUtils .getPinotSchemaFromAvroSchemaWithComplexTypeHandling(avroSchema, fieldSpecMap, TimeUnit.HOURS, - Lists.newArrayList("entries"), "."); + Lists.newArrayList("entries"), ".", ComplexTypeConfig.CollectionToJsonMode.NON_PRIMITIVE); expectedSchema = new Schema.SchemaBuilder().addSingleValueDimension("d1", DataType.STRING).addMetric("m1", DataType.INT) .addSingleValueDimension("tuple.streetaddress", DataType.STRING) @@ -113,7 +114,7 @@ public class AvroUtilsTest { // change delimiter inferredPinotSchema = AvroUtils .getPinotSchemaFromAvroSchemaWithComplexTypeHandling(avroSchema, fieldSpecMap, TimeUnit.HOURS, - Lists.newArrayList(), "_"); + Lists.newArrayList(), "_", ComplexTypeConfig.CollectionToJsonMode.NON_PRIMITIVE); expectedSchema = new Schema.SchemaBuilder().addSingleValueDimension("d1", DataType.STRING).addMetric("m1", DataType.INT) .addSingleValueDimension("tuple_streetaddress", DataType.STRING) @@ -121,5 +122,18 @@ public class AvroUtilsTest { .addMultiValueDimension("d2", DataType.INT) .addTime(new TimeGranularitySpec(DataType.LONG, TimeUnit.HOURS, "hoursSinceEpoch"), null).build(); Assert.assertEquals(expectedSchema, inferredPinotSchema); + + // change the handling of collection-to-json option, d2 will become string + inferredPinotSchema = AvroUtils + .getPinotSchemaFromAvroSchemaWithComplexTypeHandling(avroSchema, fieldSpecMap, TimeUnit.HOURS, + Lists.newArrayList("entries"), ".", ComplexTypeConfig.CollectionToJsonMode.ALL); + expectedSchema = + new Schema.SchemaBuilder().addSingleValueDimension("d1", DataType.STRING).addMetric("m1", DataType.INT) + .addSingleValueDimension("tuple.streetaddress", DataType.STRING) + .addSingleValueDimension("tuple.city", DataType.STRING).addSingleValueDimension("entries.id", DataType.LONG) + .addSingleValueDimension("entries.description", DataType.STRING) + .addSingleValueDimension("d2", DataType.STRING) + .addTime(new TimeGranularitySpec(DataType.LONG, TimeUnit.HOURS, "hoursSinceEpoch"), null).build(); + Assert.assertEquals(expectedSchema, inferredPinotSchema); } } diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/JsonUtils.java b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/JsonUtils.java index c427640..ecdd6a9 100644 --- a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/JsonUtils.java +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/JsonUtils.java @@ -43,6 +43,7 @@ import java.util.Map; import java.util.TreeMap; import java.util.concurrent.TimeUnit; import javax.annotation.Nullable; +import org.apache.pinot.spi.config.table.ingestion.ComplexTypeConfig; import org.apache.pinot.spi.data.DateTimeFieldSpec; import org.apache.pinot.spi.data.DateTimeFormatSpec; import org.apache.pinot.spi.data.DateTimeGranularitySpec; @@ -409,33 +410,34 @@ public class JsonUtils { public static Schema getPinotSchemaFromJsonFile(File jsonFile, @Nullable Map<String, FieldSpec.FieldType> fieldTypeMap, @Nullable TimeUnit timeUnit, - @Nullable List<String> unnestFields, String delimiter) + @Nullable List<String> unnestFields, String delimiter, + ComplexTypeConfig.CollectionToJsonMode collectionToJsonMode) throws IOException { JsonNode jsonNode = fileToFirstJsonNode(jsonFile); if (unnestFields == null) { unnestFields = new ArrayList<>(); } Preconditions.checkState(jsonNode.isObject(), "the JSON data shall be an object"); - return getPinotSchemaFromJsonNode(jsonNode, fieldTypeMap, timeUnit, unnestFields, delimiter); + return getPinotSchemaFromJsonNode(jsonNode, fieldTypeMap, timeUnit, unnestFields, delimiter, collectionToJsonMode); } public static Schema getPinotSchemaFromJsonNode(JsonNode jsonNode, @Nullable Map<String, FieldSpec.FieldType> fieldTypeMap, @Nullable TimeUnit timeUnit, List<String> unnestFields, - String delimiter) { + String delimiter, ComplexTypeConfig.CollectionToJsonMode collectionToJsonMode) { Schema pinotSchema = new Schema(); Iterator<Map.Entry<String, JsonNode>> fieldIterator = jsonNode.fields(); while (fieldIterator.hasNext()) { Map.Entry<String, JsonNode> fieldEntry = fieldIterator.next(); JsonNode childNode = fieldEntry.getValue(); inferPinotSchemaFromJsonNode(childNode, pinotSchema, fieldEntry.getKey(), fieldTypeMap, timeUnit, unnestFields, - delimiter); + delimiter, collectionToJsonMode); } return pinotSchema; } private static void inferPinotSchemaFromJsonNode(JsonNode jsonNode, Schema pinotSchema, String path, @Nullable Map<String, FieldSpec.FieldType> fieldTypeMap, @Nullable TimeUnit timeUnit, List<String> unnestFields, - String delimiter) { + String delimiter, ComplexTypeConfig.CollectionToJsonMode collectionToJsonMode) { if (jsonNode.isNull()) { // do nothing return; @@ -451,25 +453,42 @@ public class JsonUtils { JsonNode childNode = jsonNode.get(0); if (unnestFields.contains(path)) { - inferPinotSchemaFromJsonNode(childNode, pinotSchema, path, fieldTypeMap, timeUnit, unnestFields, delimiter); - } else if (childNode.isValueNode()) { - addFieldToPinotSchema(pinotSchema, valueOf(childNode), path, false, fieldTypeMap, timeUnit); - } else { + inferPinotSchemaFromJsonNode(childNode, pinotSchema, path, fieldTypeMap, timeUnit, unnestFields, delimiter, + collectionToJsonMode); + } else if (shallConvertToJson(collectionToJsonMode, childNode)) { addFieldToPinotSchema(pinotSchema, DataType.STRING, path, true, fieldTypeMap, timeUnit); + } else if (collectionToJsonMode == ComplexTypeConfig.CollectionToJsonMode.NON_PRIMITIVE && childNode + .isValueNode()) { + addFieldToPinotSchema(pinotSchema, valueOf(childNode), path, false, fieldTypeMap, timeUnit); } + // do not include the node for other cases } else if (jsonNode.isObject()) { Iterator<Map.Entry<String, JsonNode>> fieldIterator = jsonNode.fields(); while (fieldIterator.hasNext()) { Map.Entry<String, JsonNode> fieldEntry = fieldIterator.next(); JsonNode childNode = fieldEntry.getValue(); inferPinotSchemaFromJsonNode(childNode, pinotSchema, String.join(delimiter, path, fieldEntry.getKey()), - fieldTypeMap, timeUnit, unnestFields, delimiter); + fieldTypeMap, timeUnit, unnestFields, delimiter, collectionToJsonMode); } } else { throw new IllegalArgumentException(String.format("Unsupported json node type", jsonNode.getClass())); } } + private static boolean shallConvertToJson(ComplexTypeConfig.CollectionToJsonMode collectionToJsonMode, + JsonNode childNode) { + switch (collectionToJsonMode) { + case ALL: + return true; + case NONE: + return false; + case NON_PRIMITIVE: + return !childNode.isValueNode(); + default: + throw new IllegalArgumentException(String.format("Unsupported collectionToJsonMode %s", collectionToJsonMode)); + } + } + /** * Returns the data type stored in Pinot that is associated with the given Avro type. */ diff --git a/pinot-spi/src/test/java/org/apache/pinot/spi/utils/JsonUtilsTest.java b/pinot-spi/src/test/java/org/apache/pinot/spi/utils/JsonUtilsTest.java index 4e47264..4c2167c 100644 --- a/pinot-spi/src/test/java/org/apache/pinot/spi/utils/JsonUtilsTest.java +++ b/pinot-spi/src/test/java/org/apache/pinot/spi/utils/JsonUtilsTest.java @@ -27,6 +27,7 @@ import java.util.Collections; import java.util.List; import java.util.Map; import java.util.concurrent.TimeUnit; +import org.apache.pinot.spi.config.table.ingestion.ComplexTypeConfig; import org.apache.pinot.spi.data.FieldSpec; import org.apache.pinot.spi.data.Schema; import org.testng.Assert; @@ -273,21 +274,22 @@ public class JsonUtilsTest { Map<String, FieldSpec.FieldType> fieldSpecMap = new ImmutableMap.Builder<String, FieldSpec.FieldType>().put("d1", FieldSpec.FieldType.DIMENSION) .put("hoursSinceEpoch", FieldSpec.FieldType.DATE_TIME).put("m1", FieldSpec.FieldType.METRIC).build(); - Schema inferredPinotSchema = - JsonUtils.getPinotSchemaFromJsonFile(file, fieldSpecMap, TimeUnit.HOURS, new ArrayList<>(), "."); + Schema inferredPinotSchema = JsonUtils + .getPinotSchemaFromJsonFile(file, fieldSpecMap, TimeUnit.HOURS, new ArrayList<>(), ".", + ComplexTypeConfig.CollectionToJsonMode.NON_PRIMITIVE); Schema expectedSchema = new Schema.SchemaBuilder().addSingleValueDimension("d1", FieldSpec.DataType.STRING) .addMetric("m1", FieldSpec.DataType.INT) .addSingleValueDimension("tuple.address.streetaddress", FieldSpec.DataType.STRING) .addSingleValueDimension("tuple.address.city", FieldSpec.DataType.STRING) .addSingleValueDimension("entries", FieldSpec.DataType.STRING) .addMultiValueDimension("d2", FieldSpec.DataType.INT) - .addDateTime("hoursSinceEpoch",FieldSpec.DataType.INT, "1:HOURS:EPOCH","1:HOURS") - .build(); + .addDateTime("hoursSinceEpoch", FieldSpec.DataType.INT, "1:HOURS:EPOCH", "1:HOURS").build(); Assert.assertEquals(inferredPinotSchema, expectedSchema); // unnest collection entries - inferredPinotSchema = - JsonUtils.getPinotSchemaFromJsonFile(file, fieldSpecMap, TimeUnit.HOURS, Lists.newArrayList("entries"), "."); + inferredPinotSchema = JsonUtils + .getPinotSchemaFromJsonFile(file, fieldSpecMap, TimeUnit.HOURS, Lists.newArrayList("entries"), ".", + ComplexTypeConfig.CollectionToJsonMode.NON_PRIMITIVE); expectedSchema = new Schema.SchemaBuilder().addSingleValueDimension("d1", FieldSpec.DataType.STRING) .addMetric("m1", FieldSpec.DataType.INT) .addSingleValueDimension("tuple.address.streetaddress", FieldSpec.DataType.STRING) @@ -295,19 +297,34 @@ public class JsonUtilsTest { .addSingleValueDimension("entries.id", FieldSpec.DataType.INT) .addSingleValueDimension("entries.description", FieldSpec.DataType.STRING) .addMultiValueDimension("d2", FieldSpec.DataType.INT) - .addDateTime("hoursSinceEpoch",FieldSpec.DataType.INT, "1:HOURS:EPOCH","1:HOURS").build(); + .addDateTime("hoursSinceEpoch", FieldSpec.DataType.INT, "1:HOURS:EPOCH", "1:HOURS").build(); Assert.assertEquals(inferredPinotSchema, expectedSchema); // change delimiter - inferredPinotSchema = - JsonUtils.getPinotSchemaFromJsonFile(file, fieldSpecMap, TimeUnit.HOURS, Lists.newArrayList(""), "_"); + inferredPinotSchema = JsonUtils + .getPinotSchemaFromJsonFile(file, fieldSpecMap, TimeUnit.HOURS, Lists.newArrayList(""), "_", + ComplexTypeConfig.CollectionToJsonMode.NON_PRIMITIVE); expectedSchema = new Schema.SchemaBuilder().addSingleValueDimension("d1", FieldSpec.DataType.STRING) .addMetric("m1", FieldSpec.DataType.INT) .addSingleValueDimension("tuple_address_streetaddress", FieldSpec.DataType.STRING) .addSingleValueDimension("tuple_address_city", FieldSpec.DataType.STRING) .addSingleValueDimension("entries", FieldSpec.DataType.STRING) .addMultiValueDimension("d2", FieldSpec.DataType.INT) - .addDateTime("hoursSinceEpoch",FieldSpec.DataType.INT, "1:HOURS:EPOCH","1:HOURS").build(); + .addDateTime("hoursSinceEpoch", FieldSpec.DataType.INT, "1:HOURS:EPOCH", "1:HOURS").build(); + Assert.assertEquals(inferredPinotSchema, expectedSchema); + + // change the handling of collection-to-json option, d2 will become string + inferredPinotSchema = JsonUtils + .getPinotSchemaFromJsonFile(file, fieldSpecMap, TimeUnit.HOURS, Lists.newArrayList("entries"), ".", + ComplexTypeConfig.CollectionToJsonMode.ALL); + expectedSchema = new Schema.SchemaBuilder().addSingleValueDimension("d1", FieldSpec.DataType.STRING) + .addMetric("m1", FieldSpec.DataType.INT) + .addSingleValueDimension("tuple.address.streetaddress", FieldSpec.DataType.STRING) + .addSingleValueDimension("tuple.address.city", FieldSpec.DataType.STRING) + .addSingleValueDimension("entries.id", FieldSpec.DataType.INT) + .addSingleValueDimension("entries.description", FieldSpec.DataType.STRING) + .addSingleValueDimension("d2", FieldSpec.DataType.STRING) + .addDateTime("hoursSinceEpoch", FieldSpec.DataType.INT, "1:HOURS:EPOCH", "1:HOURS").build(); Assert.assertEquals(inferredPinotSchema, expectedSchema); } } diff --git a/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/AvroSchemaToPinotSchema.java b/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/AvroSchemaToPinotSchema.java index 423fb26..34ea580 100644 --- a/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/AvroSchemaToPinotSchema.java +++ b/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/AvroSchemaToPinotSchema.java @@ -27,6 +27,7 @@ import java.util.Map; import java.util.concurrent.TimeUnit; import org.apache.pinot.plugin.inputformat.avro.AvroUtils; import org.apache.pinot.segment.local.recordtransformer.ComplexTypeTransformer; +import org.apache.pinot.spi.config.table.ingestion.ComplexTypeConfig; import org.apache.pinot.spi.data.FieldSpec; import org.apache.pinot.spi.data.Schema; import org.apache.pinot.tools.Command; @@ -76,6 +77,9 @@ public class AvroSchemaToPinotSchema extends AbstractBaseAdminCommand implements @Option(name = "-complexType", metaVar = "<boolean>", usage = "allow complex-type handling, default to false") boolean _complexType; + @Option(name = "-collectionToJsonMode", metaVar = "<string>", usage = "The mode of converting collection to JSON string, can be NONE/NON_PRIMITIVE/ALL") + String _collectionToJsonMode; + @SuppressWarnings("FieldCanBeLocal") @Option(name = "-help", help = true, aliases = {"-h", "--h", "--help"}, usage = "Print this message.") private boolean _help = false; @@ -93,7 +97,7 @@ public class AvroSchemaToPinotSchema extends AbstractBaseAdminCommand implements if (_avroSchemaFile != null) { schema = AvroUtils .getPinotSchemaFromAvroSchemaFile(new File(_avroSchemaFile), buildFieldTypesMap(), _timeUnit, _complexType, - buildUnnestFields(), getDelimiter()); + buildUnnestFields(), getDelimiter(), getCollectionToJsonMode()); } else if (_avroDataFile != null) { schema = AvroUtils.getPinotSchemaFromAvroDataFile(new File(_avroDataFile), buildFieldTypesMap(), _timeUnit); } else { @@ -132,7 +136,9 @@ public class AvroSchemaToPinotSchema extends AbstractBaseAdminCommand implements public String toString() { return "AvroSchemaToPinotSchema -avroSchemaFile " + _avroSchemaFile + " -avroDataFile " + _avroDataFile + " -outputDir " + _outputDir + " -pinotSchemaName " + _pinotSchemaName + " -dimensions " + _dimensions - + " -metrics " + _metrics + " -timeColumnName " + _timeColumnName + " -timeUnit " + _timeUnit; + + " -metrics " + _metrics + " -timeColumnName " + _timeColumnName + " -timeUnit " + _timeUnit + + " _unnestFields " + _unnestFields + " _delimiter " + _delimiter + " _complexType " + _complexType + + " _collectionToJsonMode " + _collectionToJsonMode; } /** @@ -169,6 +175,13 @@ public class AvroSchemaToPinotSchema extends AbstractBaseAdminCommand implements return unnestFields; } + private ComplexTypeConfig.CollectionToJsonMode getCollectionToJsonMode() { + if (_collectionToJsonMode == null) { + return ComplexTypeTransformer.DEFAULT_COLLECTION_TO_JSON_MODE; + } + return ComplexTypeConfig.CollectionToJsonMode.valueOf(_collectionToJsonMode); + } + private String getDelimiter() { return _delimiter == null ? ComplexTypeTransformer.DEFAULT_DELIMITER : _delimiter; } diff --git a/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/JsonToPinotSchema.java b/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/JsonToPinotSchema.java index eb29477..0a63bfc 100644 --- a/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/JsonToPinotSchema.java +++ b/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/JsonToPinotSchema.java @@ -26,6 +26,7 @@ import java.util.List; import java.util.Map; import java.util.concurrent.TimeUnit; import org.apache.pinot.segment.local.recordtransformer.ComplexTypeTransformer; +import org.apache.pinot.spi.config.table.ingestion.ComplexTypeConfig; import org.apache.pinot.spi.data.FieldSpec; import org.apache.pinot.spi.data.Schema; import org.apache.pinot.spi.utils.JsonUtils; @@ -70,6 +71,9 @@ public class JsonToPinotSchema extends AbstractBaseAdminCommand implements Comma @Option(name = "-delimiter", metaVar = "<string>", usage = "The delimiter separating components in nested structure, default to dot") String _delimiter; + @Option(name = "-collectionToJsonMode", metaVar = "<string>", usage = "The mode of converting collection to JSON string, can be NONE/NON_PRIMITIVE/ALL") + String _collectionToJsonMode; + @SuppressWarnings("FieldCanBeLocal") @Option(name = "-help", help = true, aliases = {"-h", "--h", "--help"}, usage = "Print this message.") private boolean _help = false; @@ -86,7 +90,7 @@ public class JsonToPinotSchema extends AbstractBaseAdminCommand implements Comma Schema schema; schema = JsonUtils .getPinotSchemaFromJsonFile(new File(_jsonFile), buildFieldTypesMap(), _timeUnit, buildUnnestFields(), - getDelimiter()); + getDelimiter(), getCollectionToJsonMode()); schema.setSchemaName(_pinotSchemaName); File outputDir = new File(_outputDir); @@ -118,7 +122,8 @@ public class JsonToPinotSchema extends AbstractBaseAdminCommand implements Comma public String toString() { return "JsonToPinotSchema -jsonFile " + _jsonFile + " -outputDir " + _outputDir + " -pinotSchemaName " + _pinotSchemaName + " -dimensions " + _dimensions + " -metrics " + _metrics + " -timeColumnName " - + _dateTimeColumnName + " -timeUnit " + _timeUnit; + + _dateTimeColumnName + " -timeUnit " + _timeUnit + " _unnestFields " + _unnestFields + " _delimiter " + + _delimiter + " _collectionToJsonMode " + _collectionToJsonMode; } /** @@ -155,6 +160,13 @@ public class JsonToPinotSchema extends AbstractBaseAdminCommand implements Comma return unnestFields; } + private ComplexTypeConfig.CollectionToJsonMode getCollectionToJsonMode() { + if (_collectionToJsonMode == null) { + return ComplexTypeTransformer.DEFAULT_COLLECTION_TO_JSON_MODE; + } + return ComplexTypeConfig.CollectionToJsonMode.valueOf(_collectionToJsonMode); + } + private String getDelimiter() { return _delimiter == null ? ComplexTypeTransformer.DEFAULT_DELIMITER : _delimiter; } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org