This is an automated email from the ASF dual-hosted git repository. siddteotia pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git
The following commit(s) were added to refs/heads/master by this push: new bd4239f JSON column datatype support. (#6878) bd4239f is described below commit bd4239fc6908096f60ead9f1ee2c3576f256618b Author: Amrish Lal <amrish.k....@gmail.com> AuthorDate: Fri May 7 19:52:24 2021 -0700 JSON column datatype support. (#6878) * JSON column datatype support. * code review changes + additional test cases. * code review changes. * Cleanup. * Rebuild. * Adjust for UTC based timestamp. * Adjust for UTC based timestamp. * Adjust for UTC based timestamp. * Cleanup. * code review changes. * Fix test case. * add TODO comment. * add TODO comment. * Rebuild. * Rebuild. --- .../org/apache/pinot/common/utils/DataSchema.java | 9 ++ .../apache/pinot/common/utils/PinotDataType.java | 86 ++++++++++++++++++- .../apache/pinot/common/data/FieldSpecTest.java | 12 +++ .../pinot/common/utils/PinotDataTypeTest.java | 98 ++++++++++++++++------ .../transform/function/BaseTransformFunction.java | 2 + .../transform/function/CastTransformFunction.java | 3 + ...tchPredicateTest.java => JsonDatatypeTest.java} | 58 +++++-------- .../pinot/queries/JsonMatchPredicateTest.java | 3 + .../ColumnMinMaxValueGenerator.java | 8 +- .../recordtransformer/RecordTransformerTest.java | 7 ++ .../java/org/apache/pinot/spi/data/FieldSpec.java | 9 ++ .../java/org/apache/pinot/spi/data/Schema.java | 1 + .../java/org/apache/pinot/spi/utils/JsonUtils.java | 1 + 13 files changed, 230 insertions(+), 67 deletions(-) diff --git a/pinot-common/src/main/java/org/apache/pinot/common/utils/DataSchema.java b/pinot-common/src/main/java/org/apache/pinot/common/utils/DataSchema.java index 409fd80..b871c0e 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/utils/DataSchema.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/utils/DataSchema.java @@ -243,6 +243,7 @@ public class DataSchema { BOOLEAN /* Stored as INT */, TIMESTAMP /* Stored as LONG */, STRING, + JSON /* Stored as STRING */, BYTES, OBJECT, INT_ARRAY, @@ -260,6 +261,8 @@ public class DataSchema { return INT; case TIMESTAMP: return LONG; + case JSON: + return STRING; default: return this; } @@ -308,6 +311,8 @@ public class DataSchema { return DataType.TIMESTAMP; case STRING: return DataType.STRING; + case JSON: + return DataType.JSON; case BYTES: return DataType.BYTES; default: @@ -334,6 +339,7 @@ public class DataSchema { case TIMESTAMP: return new Timestamp((Long) value); case STRING: + case JSON: return value.toString(); case BYTES: return ((ByteArray) value).getBytes(); @@ -421,6 +427,7 @@ public class DataSchema { case TIMESTAMP: return new Timestamp((Long) value).toString(); case STRING: + case JSON: return value.toString(); case BYTES: return ((ByteArray) value).toHexString(); @@ -495,6 +502,8 @@ public class DataSchema { return TIMESTAMP; case STRING: return STRING; + case JSON: + return JSON; case BYTES: return BYTES; default: diff --git a/pinot-common/src/main/java/org/apache/pinot/common/utils/PinotDataType.java b/pinot-common/src/main/java/org/apache/pinot/common/utils/PinotDataType.java index ee9b02e..43eaf30 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/utils/PinotDataType.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/utils/PinotDataType.java @@ -19,12 +19,14 @@ package org.apache.pinot.common.utils; import java.sql.Timestamp; +import java.util.Base64; import org.apache.commons.lang3.ArrayUtils; import org.apache.pinot.common.utils.DataSchema.ColumnDataType; import org.apache.pinot.spi.data.FieldSpec; import org.apache.pinot.spi.data.FieldSpec.DataType; import org.apache.pinot.spi.utils.BooleanUtils; import org.apache.pinot.spi.utils.BytesUtils; +import org.apache.pinot.spi.utils.JsonUtils; import org.apache.pinot.spi.utils.TimestampUtils; @@ -538,6 +540,60 @@ public enum PinotDataType { } }, + JSON { + @Override + public int toInt(Object value) { + return Integer.parseInt(value.toString().trim()); + } + + @Override + public long toLong(Object value) { + return Long.parseLong(value.toString().trim()); + } + + @Override + public float toFloat(Object value) { + return Float.parseFloat(value.toString()); + } + + @Override + public double toDouble(Object value) { + return Double.parseDouble(value.toString()); + } + + @Override + public boolean toBoolean(Object value) { + return Boolean.parseBoolean(value.toString().trim()); + } + + @Override + public Timestamp toTimestamp(Object value) { + return TimestampUtils.toTimestamp(value.toString().trim()); + } + + @Override + public String toString(Object value) { + return value.toString(); + } + + @Override + public byte[] toBytes(Object value) { + // Base64 encoding is the commonly used mechanism for encoding binary data in JSON documents. Note that + // toJson function converts byte[] into a Base64 encoded json string value. + try { + return Base64.getDecoder().decode(value.toString()); + } catch (Exception e) { + throw new RuntimeException( + "Unable to convert JSON base64 encoded string value to BYTES. Input value: " + value, e); + } + } + + @Override + public String convert(Object value, PinotDataType sourceType) { + return sourceType.toJson(value); + } + }, + BYTES { @Override public int toInt(Object value) { @@ -717,7 +773,8 @@ public enum PinotDataType { OBJECT_ARRAY; /** - * NOTE: override toInt(), toLong(), toFloat(), toDouble(), toBoolean(), toTimestamp(), toString() and toBytes() for single-value types. + * NOTE: override toInt(), toLong(), toFloat(), toDouble(), toBoolean(), toTimestamp(), toString(), and + * toBytes() for single-value types. */ public int toInt(Object value) { @@ -748,6 +805,23 @@ public enum PinotDataType { return getSingleValueType().toString(toObjectArray(value)[0]); } + + public String toJson(Object value) { + if (value instanceof String) { + try { + return JsonUtils.stringToJsonNode((String) value).toString(); + } catch (Exception e) { + throw new RuntimeException("Unable to convert String into JSON. Input value: " + value, e); + } + } else { + try { + return JsonUtils.objectToString(value).toString(); + } catch (Exception e) { + throw new RuntimeException("Unable to convert " + value.getClass().getCanonicalName() + " to JSON. Input value: " + value, e); + } + } + } + public byte[] toBytes(Object value) { return getSingleValueType().toBytes(toObjectArray(value)[0]); } @@ -936,7 +1010,7 @@ public enum PinotDataType { } public Object convert(Object value, PinotDataType sourceType) { - throw new UnsupportedOperationException("Cannot convert value form " + sourceType + " to " + this); + throw new UnsupportedOperationException("Cannot convert value from " + sourceType + " to " + this); } /** @@ -1011,6 +1085,12 @@ public enum PinotDataType { } else { throw new IllegalStateException("There is no multi-value type for TIMESTAMP"); } + case JSON: + if (fieldSpec.isSingleValueField()) { + return PinotDataType.JSON; + } else { + throw new IllegalStateException("There is no multi-value type for JSON"); + } case STRING: return fieldSpec.isSingleValueField() ? PinotDataType.STRING : PinotDataType.STRING_ARRAY; case BYTES: @@ -1045,6 +1125,8 @@ public enum PinotDataType { return TIMESTAMP; case STRING: return STRING; + case JSON: + return JSON; case BYTES: return BYTES; case INT_ARRAY: diff --git a/pinot-common/src/test/java/org/apache/pinot/common/data/FieldSpecTest.java b/pinot-common/src/test/java/org/apache/pinot/common/data/FieldSpecTest.java index aa7d31e..a020ebc 100644 --- a/pinot-common/src/test/java/org/apache/pinot/common/data/FieldSpecTest.java +++ b/pinot-common/src/test/java/org/apache/pinot/common/data/FieldSpecTest.java @@ -58,6 +58,7 @@ public class FieldSpecTest { Assert.assertEquals(BOOLEAN.getStoredType(), INT); Assert.assertEquals(TIMESTAMP.getStoredType(), LONG); Assert.assertEquals(STRING.getStoredType(), STRING); + Assert.assertEquals(JSON.getStoredType(), STRING); Assert.assertEquals(BYTES.getStoredType(), BYTES); Assert.assertEquals(INT.size(), Integer.BYTES); @@ -106,6 +107,17 @@ public class FieldSpecTest { Assert.assertEquals(fieldSpec1.hashCode(), fieldSpec2.hashCode()); Assert.assertEquals(fieldSpec1.getDefaultNullValue(), "null"); + // Single-value json type dimension field with max length and default null value. + fieldSpec1 = new DimensionFieldSpec(); + fieldSpec1.setName("svDimension"); + fieldSpec1.setDataType(JSON); + fieldSpec1.setMaxLength(20000); + fieldSpec2 = new DimensionFieldSpec("svDimension", JSON, true, 20000, null); + Assert.assertEquals(fieldSpec1, fieldSpec2); + Assert.assertEquals(fieldSpec1.toString(), fieldSpec2.toString()); + Assert.assertEquals(fieldSpec1.hashCode(), fieldSpec2.hashCode()); + Assert.assertEquals(fieldSpec1.getDefaultNullValue(), "null"); + // Multi-value dimension field. fieldSpec1 = new DimensionFieldSpec(); fieldSpec1.setName("mvDimension"); diff --git a/pinot-common/src/test/java/org/apache/pinot/common/utils/PinotDataTypeTest.java b/pinot-common/src/test/java/org/apache/pinot/common/utils/PinotDataTypeTest.java index ca75097..2970b4b 100644 --- a/pinot-common/src/test/java/org/apache/pinot/common/utils/PinotDataTypeTest.java +++ b/pinot-common/src/test/java/org/apache/pinot/common/utils/PinotDataTypeTest.java @@ -18,6 +18,11 @@ */ package org.apache.pinot.common.utils; +import java.sql.Timestamp; +import java.time.LocalDateTime; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; import org.testng.annotations.Test; import static org.apache.pinot.common.utils.PinotDataType.*; @@ -28,16 +33,16 @@ import static org.testng.Assert.fail; public class PinotDataTypeTest { private static final PinotDataType[] SOURCE_TYPES = - {BYTE, CHARACTER, SHORT, INTEGER, LONG, FLOAT, DOUBLE, STRING, BYTE_ARRAY, CHARACTER_ARRAY, SHORT_ARRAY, INTEGER_ARRAY, LONG_ARRAY, FLOAT_ARRAY, DOUBLE_ARRAY, STRING_ARRAY}; + {BYTE, CHARACTER, SHORT, INTEGER, LONG, FLOAT, DOUBLE, STRING, JSON, BYTE_ARRAY, CHARACTER_ARRAY, SHORT_ARRAY, INTEGER_ARRAY, LONG_ARRAY, FLOAT_ARRAY, DOUBLE_ARRAY, STRING_ARRAY}; private static final Object[] SOURCE_VALUES = - {(byte) 123, (char) 123, (short) 123, 123, 123L, 123f, 123d, " 123", new Object[]{(byte) 123}, new Object[]{(char) 123}, new Object[]{(short) 123}, new Object[]{123}, new Object[]{123L}, new Object[]{123f}, new Object[]{123d}, new Object[]{" 123"}}; + {(byte) 123, (char) 123, (short) 123, 123, 123L, 123f, 123d, " 123", "123 ", new Object[]{(byte) 123}, new Object[]{(char) 123}, new Object[]{(short) 123}, new Object[]{123}, new Object[]{123L}, new Object[]{123f}, new Object[]{123d}, new Object[]{" 123"}}; private static final PinotDataType[] DEST_TYPES = {INTEGER, LONG, FLOAT, DOUBLE, INTEGER_ARRAY, LONG_ARRAY, FLOAT_ARRAY, DOUBLE_ARRAY}; private static final Object[] EXPECTED_DEST_VALUES = {123, 123L, 123f, 123d, new Object[]{123}, new Object[]{123L}, new Object[]{123f}, new Object[]{123d}}; private static final String[] EXPECTED_STRING_VALUES = {Byte.toString((byte) 123), Character.toString((char) 123), Short.toString((short) 123), Integer.toString( - 123), Long.toString(123L), Float.toString(123f), Double.toString(123d), " 123", Byte.toString( + 123), Long.toString(123L), Float.toString(123f), Double.toString(123d), " 123", "123 ", Byte.toString( (byte) 123), Character.toString((char) 123), Short.toString((short) 123), Integer.toString( 123), Long.toString(123L), Float.toString(123f), Double.toString(123d), " 123"}; @@ -76,6 +81,9 @@ public class PinotDataTypeTest { assertEquals(DOUBLE.convert(false, BOOLEAN), 0d); assertEquals(STRING.convert(true, BOOLEAN), "true"); assertEquals(STRING.convert(false, BOOLEAN), "false"); + + assertEquals(BOOLEAN.convert("true", JSON), true); + assertEquals(BOOLEAN.convert("false", JSON), false); } @Test @@ -89,11 +97,30 @@ public class PinotDataTypeTest { assertEquals(STRING.convert(new byte[]{0, 1}, BYTES), "0001"); assertEquals(BYTES.convert("0001", STRING), new byte[]{0, 1}); assertEquals(BYTES.convert(new byte[]{0, 1}, BYTES), new byte[]{0, 1}); + assertEquals(BYTES.convert("AAE=", JSON), new byte[]{0,1}); assertEquals(BYTES.convert(new Byte[]{0, 1}, BYTE_ARRAY), new byte[]{0, 1}); assertEquals(BYTES.convert(new String[]{"0001"}, STRING_ARRAY), new byte[]{0, 1}); } @Test + public void testTimestamp() { + Timestamp timestamp = Timestamp.valueOf(LocalDateTime.now()); + assertEquals(TIMESTAMP.convert(timestamp.getTime(), LONG), timestamp); + assertEquals(TIMESTAMP.convert(timestamp.toString(), STRING), timestamp); + assertEquals(TIMESTAMP.convert(timestamp.getTime(), JSON), timestamp); + assertEquals(TIMESTAMP.convert(timestamp.toString(), JSON), timestamp); + } + + @Test + public void testJSON() { + assertEquals(JSON.convert(false, BOOLEAN), "false"); + assertEquals(JSON.convert(true, BOOLEAN), "true"); + assertEquals(JSON.convert(new byte[]{0, 1}, BYTES), "\"AAE=\""); // Base64 encoding. + assertEquals(JSON.convert("{\"bytes\":\"AAE=\",\"map\":{\"key1\":\"value\",\"key2\":null,\"array\":[-5.4,4,\"2\"]},\"timestamp\":1620324238610}", STRING), "{\"bytes\":\"AAE=\",\"map\":{\"key1\":\"value\",\"key2\":null,\"array\":[-5.4,4,\"2\"]},\"timestamp\":1620324238610}"); + assertEquals(JSON.convert(new Timestamp(1620324238610l), TIMESTAMP), "1620324238610"); + } + + @Test public void testObject() { assertEquals(OBJECT.toInt(new NumberObject("123")), 123); assertEquals(OBJECT.toLong(new NumberObject("123")), 123L); @@ -102,43 +129,64 @@ public class PinotDataTypeTest { assertTrue(OBJECT.toBoolean(new NumberObject("123"))); assertEquals(OBJECT.toTimestamp(new NumberObject("123")).getTime(), 123L); assertEquals(OBJECT.toString(new NumberObject("123")), "123"); + assertEquals(OBJECT.toJson(getGenericTestObject()), "{\"bytes\":\"AAE=\",\"map\":{\"key1\":\"value\",\"key2\":null,\"array\":[-5.4,4,\"2\"]},\"timestamp\":1620324238610}"); assertEquals(OBJECT_ARRAY.getSingleValueType(), OBJECT); } + private static Object getGenericTestObject() { + Map<String, Object> map1 = new HashMap<>(); + map1.put("array", Arrays.asList(-5.4,4, "2")); + map1.put("key1", "value"); + map1.put("key2", null); + + Map<String, Object> map2 = new HashMap<>(); + map2.put("map", map1); + map2.put("bytes", new byte[]{0,1}); + map2.put("timestamp", new Timestamp(1620324238610l)); + + return map2; + } + @Test public void testInvalidConversion() { for (PinotDataType sourceType : values()) { - if (sourceType.isSingleValue() && sourceType != STRING && sourceType != BYTES) { - assertInvalidConversion(sourceType, BYTES); + if (sourceType.isSingleValue() && sourceType != STRING && sourceType != BYTES && sourceType != JSON) { + assertInvalidConversion(null, sourceType, BYTES, UnsupportedOperationException.class); } } - assertInvalidConversion(BYTES, INTEGER); - assertInvalidConversion(BYTES, LONG); - assertInvalidConversion(BYTES, FLOAT); - assertInvalidConversion(BYTES, DOUBLE); - assertInvalidConversion(BYTES, INTEGER_ARRAY); - assertInvalidConversion(BYTES, LONG_ARRAY); - assertInvalidConversion(BYTES, FLOAT_ARRAY); - assertInvalidConversion(BYTES, DOUBLE_ARRAY); + assertInvalidConversion(null, BYTES, INTEGER, UnsupportedOperationException.class); + assertInvalidConversion(null, BYTES, LONG, UnsupportedOperationException.class); + assertInvalidConversion(null, BYTES, FLOAT, UnsupportedOperationException.class); + assertInvalidConversion(null, BYTES, DOUBLE, UnsupportedOperationException.class); + assertInvalidConversion(null, BYTES, INTEGER_ARRAY, UnsupportedOperationException.class); + assertInvalidConversion(null, BYTES, LONG_ARRAY, UnsupportedOperationException.class); + assertInvalidConversion(null, BYTES, FLOAT_ARRAY, UnsupportedOperationException.class); + assertInvalidConversion(null, BYTES, DOUBLE_ARRAY, UnsupportedOperationException.class); for (PinotDataType sourceType : values()) { - assertInvalidConversion(sourceType, BYTE); - assertInvalidConversion(sourceType, CHARACTER); - assertInvalidConversion(sourceType, SHORT); - assertInvalidConversion(sourceType, OBJECT); - assertInvalidConversion(sourceType, BYTE_ARRAY); - assertInvalidConversion(sourceType, CHARACTER_ARRAY); - assertInvalidConversion(sourceType, SHORT_ARRAY); - assertInvalidConversion(sourceType, OBJECT_ARRAY); + assertInvalidConversion(null, sourceType, BYTE, UnsupportedOperationException.class); + assertInvalidConversion(null, sourceType, CHARACTER, UnsupportedOperationException.class); + assertInvalidConversion(null, sourceType, SHORT, UnsupportedOperationException.class); + assertInvalidConversion(null, sourceType, OBJECT, UnsupportedOperationException.class); + assertInvalidConversion(null, sourceType, BYTE_ARRAY, UnsupportedOperationException.class); + assertInvalidConversion(null, sourceType, CHARACTER_ARRAY, UnsupportedOperationException.class); + assertInvalidConversion(null, sourceType, SHORT_ARRAY, UnsupportedOperationException.class); + assertInvalidConversion(null, sourceType, OBJECT_ARRAY, UnsupportedOperationException.class); } + + assertInvalidConversion("xyz", STRING, JSON, RuntimeException.class); + } - private void assertInvalidConversion(PinotDataType sourceType, PinotDataType destType) { + private void assertInvalidConversion(Object value, PinotDataType sourceType, PinotDataType destType, + Class expectedExceptionType) { try { - destType.convert(null, sourceType); - } catch (UnsupportedOperationException e) { - return; + destType.convert(value, sourceType); + } catch (Exception e) { + if (e.getClass().equals(expectedExceptionType)) { + return; + } } fail(); } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/BaseTransformFunction.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/BaseTransformFunction.java index b504202..66e9111 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/BaseTransformFunction.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/BaseTransformFunction.java @@ -47,6 +47,8 @@ public abstract class BaseTransformFunction implements TransformFunction { new TransformResultMetadata(DataType.STRING, true, false); protected static final TransformResultMetadata STRING_MV_NO_DICTIONARY_METADATA = new TransformResultMetadata(DataType.STRING, false, false); + protected static final TransformResultMetadata JSON_SV_NO_DICTIONARY_METADATA = + new TransformResultMetadata(DataType.JSON, true, false); protected static final TransformResultMetadata BYTES_SV_NO_DICTIONARY_METADATA = new TransformResultMetadata(DataType.BYTES, true, false); diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/CastTransformFunction.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/CastTransformFunction.java index 7d7f96c..8428249 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/CastTransformFunction.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/CastTransformFunction.java @@ -76,6 +76,9 @@ public class CastTransformFunction extends BaseTransformFunction { case "VARCHAR": _resultMetadata = STRING_SV_NO_DICTIONARY_METADATA; break; + case "JSON": + _resultMetadata = JSON_SV_NO_DICTIONARY_METADATA; + break; default: throw new IllegalArgumentException("Unable to cast expression to type - " + targetType); } diff --git a/pinot-core/src/test/java/org/apache/pinot/queries/JsonMatchPredicateTest.java b/pinot-core/src/test/java/org/apache/pinot/queries/JsonDatatypeTest.java similarity index 84% copy from pinot-core/src/test/java/org/apache/pinot/queries/JsonMatchPredicateTest.java copy to pinot-core/src/test/java/org/apache/pinot/queries/JsonDatatypeTest.java index c85fe86..75a0537 100644 --- a/pinot-core/src/test/java/org/apache/pinot/queries/JsonMatchPredicateTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/queries/JsonDatatypeTest.java @@ -27,6 +27,7 @@ import java.util.HashSet; import java.util.Iterator; import java.util.List; import org.apache.commons.io.FileUtils; +import org.apache.pinot.common.utils.DataSchema; import org.apache.pinot.core.common.Operator; import org.apache.pinot.core.operator.blocks.IntermediateResultsBlock; import org.apache.pinot.segment.local.indexsegment.immutable.ImmutableSegmentLoader; @@ -50,9 +51,9 @@ import org.testng.annotations.Test; /** - * Test cases verifying evaluation of predicate with expressions that contain numerical values of different types. + * Test cases verifying query evaluation against column of type JSON. */ -public class JsonMatchPredicateTest extends BaseQueriesTest { +public class JsonDatatypeTest extends BaseQueriesTest { private static final File INDEX_DIR = new File(FileUtils.getTempDirectory(), "JsonMatchPredicateTest"); private static final String RAW_TABLE_NAME = "testTable"; private static final String SEGMENT_NAME = "testSegment"; @@ -63,7 +64,7 @@ public class JsonMatchPredicateTest extends BaseQueriesTest { private static final String STRING_COLUMN = "stringColumn"; private static final Schema SCHEMA = new Schema.SchemaBuilder().addSingleValueDimension(INT_COLUMN, FieldSpec.DataType.INT) - .addSingleValueDimension(JSON_COLUMN, FieldSpec.DataType.STRING) + .addSingleValueDimension(JSON_COLUMN, FieldSpec.DataType.JSON) .addSingleValueDimension(STRING_COLUMN, FieldSpec.DataType.STRING).build(); private static final TableConfig TABLE_CONFIG = new TableConfigBuilder(TableType.OFFLINE).setTableName(RAW_TABLE_NAME).build(); @@ -143,18 +144,34 @@ public class JsonMatchPredicateTest extends BaseQueriesTest { _indexSegments = Arrays.asList(immutableSegment, immutableSegment); } + /** Verify result column type of a simple select query against JSON column */ + @Test + public void testSimpleSelectOnJsonColumn() { + try { + Operator operator = getOperatorForSqlQuery("select jsonColumn FROM testTable"); + IntermediateResultsBlock block = (IntermediateResultsBlock) operator.nextBlock(); + Collection<Object[]> rows = block.getSelectionResult(); + Assert.assertEquals(rows.size(), 9); + Assert.assertEquals(block.getDataSchema().getColumnDataType(0), DataSchema.ColumnDataType.JSON); + } catch (IllegalStateException ise) { + Assert.assertTrue(true); + } + } + /** Test filtering on string value associated with JSON key*/ @Test public void testExtractScalarWithStringFilter() { Operator operator = getOperatorForSqlQuery( - "select json_extract_scalar(jsonColumn, '$.name.last', 'STRING') FROM testTable WHERE json_extract_scalar(jsonColumn, '$.name.first', 'STRING') = 'daffy'"); + "select intColumn, json_extract_scalar(jsonColumn, '$.name.last', 'STRING') FROM testTable WHERE json_extract_scalar(jsonColumn, '$.name.first', 'STRING') = 'daffy'"); IntermediateResultsBlock block = (IntermediateResultsBlock) operator.nextBlock(); Collection<Object[]> rows = block.getSelectionResult(); Assert.assertEquals(rows.size(), 1); Iterator<Object[]> iterator = rows.iterator(); Assert.assertTrue(iterator.hasNext()); - Assert.assertEquals(iterator.next()[0], "duck"); + Object[] row = iterator.next(); + Assert.assertEquals(row[0], 1); + Assert.assertEquals(row[1], "duck"); } /** Test filtering on number value associated with JSON key*/ @@ -282,37 +299,6 @@ public class JsonMatchPredicateTest extends BaseQueriesTest { Assert.assertEquals(iterator.next()[0], "goofy"); } - /** Evaluate json_extract_scalar over string column that does not contain valid json data. */ - @Test - public void testJsonExtractScalarAgainstInvalidJson() { - // json_extract_scalar throws exception since we are trying to parse a non-JSON string. - Operator operator1 = getOperatorForSqlQuery( - "select count(*) FROM testTable WHERE json_extract_scalar(stringColumn, '$.name.first', 'INT') = 0"); - try { - IntermediateResultsBlock block1 = (IntermediateResultsBlock) operator1.nextBlock(); - Assert.fail("Expected query to fail with Exception."); - } catch (RuntimeException re) { - // JSON parsing exception expected. - } - - // JSON data is stored in columns of type STRING, so there is nothing preventing the column from storing bad json - // string. Bad JSON string in columns will cause json_extract_scalar to throw an exception which would terminate - // query processing. However, when json_extract_scalar is used within the WHERE clause, we should return the - // default value instead of throwing exception. This will allow the predicate to be evaluated to either true or - // false and hence allow the query to complete successfully. Returning default value from json_extract_scalar is - // an undocumented feature. Ideally, json_extract_scalar should return NULL when it encounters bad JSON. However, - // NULL support is currently pending, so this is the best we can do. - Operator operator2 = getOperatorForSqlQuery( - "select count(*) FROM testTable WHERE json_extract_scalar(stringColumn, '$.name.first', 'INT', 0) = 0"); - - IntermediateResultsBlock block2 = (IntermediateResultsBlock) operator2.nextBlock(); - Collection<Object[]> rows = block2.getSelectionResult(); - - // None of the values in stringColumn are valid JSON. Hence, json_extract_scalar should default to '0' for all rows - // and count returned by the query should be 9 (same as number of rows in the table). - Assert.assertEquals(block2.getAggregationResult().get(0), 9L); - } - @AfterClass public void tearDown() throws IOException { diff --git a/pinot-core/src/test/java/org/apache/pinot/queries/JsonMatchPredicateTest.java b/pinot-core/src/test/java/org/apache/pinot/queries/JsonMatchPredicateTest.java index c85fe86..9325a2c 100644 --- a/pinot-core/src/test/java/org/apache/pinot/queries/JsonMatchPredicateTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/queries/JsonMatchPredicateTest.java @@ -51,6 +51,9 @@ import org.testng.annotations.Test; /** * Test cases verifying evaluation of predicate with expressions that contain numerical values of different types. + * TODO: Update these test cases to: 1) use V2 JSON_MATCH function, 2) use multi-dimensional JSON array addressing, + * 3) do json_extract_scalar on a column other than the JSON_MATCH column, 4) query deeper levels of nesting, and + * 5) add test cases for GROUP BY on json_extract_scalar or path expressions. */ public class JsonMatchPredicateTest extends BaseQueriesTest { private static final File INDEX_DIR = new File(FileUtils.getTempDirectory(), "JsonMatchPredicateTest"); diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/columnminmaxvalue/ColumnMinMaxValueGenerator.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/columnminmaxvalue/ColumnMinMaxValueGenerator.java index d3bca59..8549893 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/columnminmaxvalue/ColumnMinMaxValueGenerator.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/columnminmaxvalue/ColumnMinMaxValueGenerator.java @@ -64,7 +64,7 @@ public class ColumnMinMaxValueGenerator { Set<String> columnsToAddMinMaxValue = new HashSet<>(schema.getPhysicalColumnNames()); // mode ALL - use all columns - // mode NON_METRIC - use all dimensions and time columns + // mode NON_METRIC - use all dimensions and time columns // mode TIME - use only time columns switch (_columnMinMaxValueGeneratorMode) { case TIME: @@ -89,7 +89,7 @@ public class ColumnMinMaxValueGenerator { } PinotDataBuffer dictionaryBuffer = _segmentWriter.getIndexFor(columnName, ColumnIndexType.DICTIONARY); - DataType dataType = columnMetadata.getDataType(); + DataType dataType = columnMetadata.getDataType().getStoredType(); int length = columnMetadata.getCardinality(); switch (dataType) { case INT: @@ -146,8 +146,8 @@ public class ColumnMinMaxValueGenerator { private void saveMetadata() throws Exception { if (_minMaxValueAdded) { - // Commons Configuration 1.10 does not support file path containing '%'. - // Explicitly providing the output stream for the file bypasses the problem. + // Commons Configuration 1.10 does not support file path containing '%'. + // Explicitly providing the output stream for the file bypasses the problem. try (FileOutputStream fileOutputStream = new FileOutputStream(_segmentProperties.getFile())) { _segmentProperties.save(fileOutputStream); } diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/recordtransformer/RecordTransformerTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/recordtransformer/RecordTransformerTest.java index 0b5d7ee..eb8b031 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/recordtransformer/RecordTransformerTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/recordtransformer/RecordTransformerTest.java @@ -43,6 +43,7 @@ public class RecordTransformerTest { .addSingleValueDimension("svFloat", DataType.FLOAT).addSingleValueDimension("svDouble", DataType.DOUBLE) .addSingleValueDimension("svBoolean", DataType.BOOLEAN).addSingleValueDimension("svTimestamp", DataType.TIMESTAMP) .addSingleValueDimension("svBytes", DataType.BYTES).addMultiValueDimension("mvInt", DataType.INT) + .addSingleValueDimension("svJson", DataType.JSON) .addMultiValueDimension("mvLong", DataType.LONG).addMultiValueDimension("mvFloat", DataType.FLOAT) .addMultiValueDimension("mvDouble", DataType.DOUBLE) // For sanitation @@ -70,6 +71,7 @@ public class RecordTransformerTest { record.putValue("svBoolean", "true"); record.putValue("svTimestamp", "2020-02-02 22:22:22.222"); record.putValue("svBytes", "7b7b"/*new byte[]{123, 123}*/); + record.putValue("svJson", "{\"first\": \"daffy\", \"last\": \"duck\"}"); record.putValue("mvInt", new Object[]{123L}); record.putValue("mvLong", Collections.singletonList(123f)); record.putValue("mvFloat", new Double[]{123d}); @@ -141,6 +143,7 @@ public class RecordTransformerTest { assertEquals(record.getValue("svBoolean"), 1); assertEquals(record.getValue("svTimestamp"), Timestamp.valueOf("2020-02-02 22:22:22.222").getTime()); assertEquals(record.getValue("svBytes"), new byte[]{123, 123}); + assertEquals(record.getValue("svJson"), "{\"first\":\"daffy\",\"last\":\"duck\"}"); assertEquals(record.getValue("mvInt"), new Object[]{123}); assertEquals(record.getValue("mvLong"), new Object[]{123L}); assertEquals(record.getValue("mvFloat"), new Object[]{123f}); @@ -190,6 +193,7 @@ public class RecordTransformerTest { assertEquals(record.getValue("svBoolean"), FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_BOOLEAN); assertEquals(record.getValue("svTimestamp"), FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_TIMESTAMP); assertEquals(record.getValue("svBytes"), FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_BYTES); + assertEquals(record.getValue("svJson"), FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_JSON); assertEquals(record.getValue("mvInt"), new Object[]{FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_INT}); assertEquals(record.getValue("mvLong"), new Object[]{FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_LONG}); assertEquals(record.getValue("mvFloat"), new Object[]{FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_FLOAT}); @@ -210,6 +214,7 @@ public class RecordTransformerTest { assertTrue(record.isNullValue("svBoolean")); assertTrue(record.isNullValue("svTimestamp")); assertTrue(record.isNullValue("svBytes")); + assertTrue(record.isNullValue("svJson")); assertTrue(record.isNullValue("mvInt")); assertTrue(record.isNullValue("mvLong")); assertTrue(record.isNullValue("mvDouble")); @@ -234,6 +239,7 @@ public class RecordTransformerTest { assertEquals(record.getValue("svDouble"), 123d); assertEquals(record.getValue("svBoolean"), 1); assertEquals(record.getValue("svTimestamp"), Timestamp.valueOf("2020-02-02 22:22:22.222").getTime()); + assertEquals(record.getValue("svJson"),"{\"first\":\"daffy\",\"last\":\"duck\"}"); assertEquals(record.getValue("svBytes"), new byte[]{123, 123}); assertEquals(record.getValue("mvInt"), new Object[]{123}); assertEquals(record.getValue("mvLong"), new Object[]{123L}); @@ -259,6 +265,7 @@ public class RecordTransformerTest { assertEquals(record.getValue("svBoolean"), FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_BOOLEAN); assertEquals(record.getValue("svTimestamp"), FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_TIMESTAMP); assertEquals(record.getValue("svBytes"), FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_BYTES); + assertEquals(record.getValue("svJson"), FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_JSON); assertEquals(record.getValue("mvInt"), new Object[]{FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_INT}); assertEquals(record.getValue("mvLong"), new Object[]{FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_LONG}); assertEquals(record.getValue("mvFloat"), new Object[]{FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_FLOAT}); diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/data/FieldSpec.java b/pinot-spi/src/main/java/org/apache/pinot/spi/data/FieldSpec.java index 46196ff..f77facb 100644 --- a/pinot-spi/src/main/java/org/apache/pinot/spi/data/FieldSpec.java +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/data/FieldSpec.java @@ -52,6 +52,7 @@ public abstract class FieldSpec implements Comparable<FieldSpec>, Serializable { public static final Integer DEFAULT_DIMENSION_NULL_VALUE_OF_BOOLEAN = 0; public static final Long DEFAULT_DIMENSION_NULL_VALUE_OF_TIMESTAMP = 0L; public static final String DEFAULT_DIMENSION_NULL_VALUE_OF_STRING = "null"; + public static final String DEFAULT_DIMENSION_NULL_VALUE_OF_JSON = "null"; public static final byte[] DEFAULT_DIMENSION_NULL_VALUE_OF_BYTES = new byte[0]; public static final Integer DEFAULT_METRIC_NULL_VALUE_OF_INT = 0; @@ -229,6 +230,8 @@ public abstract class FieldSpec implements Comparable<FieldSpec>, Serializable { return DEFAULT_DIMENSION_NULL_VALUE_OF_TIMESTAMP; case STRING: return DEFAULT_DIMENSION_NULL_VALUE_OF_STRING; + case JSON: + return DEFAULT_DIMENSION_NULL_VALUE_OF_JSON; case BYTES: return DEFAULT_DIMENSION_NULL_VALUE_OF_BYTES; default: @@ -303,6 +306,7 @@ public abstract class FieldSpec implements Comparable<FieldSpec>, Serializable { jsonNode.put(key, new Timestamp((Long) _defaultNullValue).toString()); break; case STRING: + case JSON: jsonNode.put(key, (String) _defaultNullValue); break; case BYTES: @@ -375,6 +379,7 @@ public abstract class FieldSpec implements Comparable<FieldSpec>, Serializable { BOOLEAN /* Stored as INT */, TIMESTAMP /* Stored as LONG */, STRING, + JSON /* Stored as STRING */, BYTES, STRUCT, MAP, @@ -392,6 +397,8 @@ public abstract class FieldSpec implements Comparable<FieldSpec>, Serializable { return INT; case TIMESTAMP: return LONG; + case JSON: + return STRING; default: return this; } @@ -451,6 +458,7 @@ public abstract class FieldSpec implements Comparable<FieldSpec>, Serializable { case TIMESTAMP: return TimestampUtils.toMillisSinceEpoch(value); case STRING: + case JSON: return value; case BYTES: return BytesUtils.toBytes(value); @@ -481,6 +489,7 @@ public abstract class FieldSpec implements Comparable<FieldSpec>, Serializable { case TIMESTAMP: return TimestampUtils.toMillisSinceEpoch(value); case STRING: + case JSON: return value; case BYTES: return BytesUtils.toByteArray(value); diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/data/Schema.java b/pinot-spi/src/main/java/org/apache/pinot/spi/data/Schema.java index ebbce81..40b55d1 100644 --- a/pinot-spi/src/main/java/org/apache/pinot/spi/data/Schema.java +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/data/Schema.java @@ -443,6 +443,7 @@ public final class Schema implements Serializable { case BOOLEAN: case TIMESTAMP: case STRING: + case JSON: case BYTES: break; default: diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/JsonUtils.java b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/JsonUtils.java index 4077a53..a3e16a0 100644 --- a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/JsonUtils.java +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/JsonUtils.java @@ -193,6 +193,7 @@ public class JsonUtils { return jsonValue.asBoolean(); case TIMESTAMP: case STRING: + case JSON: return jsonValue.asText(); case BYTES: try { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org