siddharthteotia commented on code in PR #8993: URL: https://github.com/apache/pinot/pull/8993#discussion_r910687228
########## pinot-core/src/test/java/org/apache/pinot/queries/MultiValueRawQueriesTest.java: ########## @@ -338,6 +338,445 @@ public void testSelectQueries() { } } + @Test + public void testNonAggregateMVGroupBY() { + { + // TODO: Today ORDER BY on MV columns (irrespective of whether it's dictionary based or raw) doesn't work + // Fix ORDER BY only for MV columns + String query = "SELECT mvFloatCol from testTable WHERE mvFloatCol < 5 ORDER BY mvFloatCol LIMIT 10"; + BrokerResponseNative brokerResponseNative = getBrokerResponse(query); + assertEquals(brokerResponseNative.getProcessingExceptions().size(), 2); + } + { + // Test a group by query on some raw MV rows. Order by on SV column added for determinism + String query = "SELECT svIntCol, mvRawFloatCol, mvRawDoubleCol, mvRawStringCol from testTable GROUP BY " + + "svIntCol, mvRawFloatCol, mvRawDoubleCol, mvRawStringCol ORDER BY svIntCol LIMIT 10"; + ResultTable resultTable = getBrokerResponse(query).getResultTable(); + assertNotNull(resultTable); + DataSchema dataSchema = new DataSchema(new String[]{ + "svIntCol", "mvRawFloatCol", "mvRawDoubleCol", "mvRawStringCol" + }, new DataSchema.ColumnDataType[]{ + DataSchema.ColumnDataType.INT, DataSchema.ColumnDataType.FLOAT, DataSchema.ColumnDataType.DOUBLE, + DataSchema.ColumnDataType.STRING + }); + assertEquals(resultTable.getDataSchema(), dataSchema); + List<Object[]> recordRows = resultTable.getRows(); + assertEquals(recordRows.size(), 10); + + int[] expectedSVInts = new int[]{0, 0, 0, 0, 0, 0, 0, 0, 1, 1}; + + for (int i = 0; i < 10; i++) { + Object[] values = recordRows.get(i); + assertEquals(values.length, 4); + assertEquals((int) values[0], expectedSVInts[i]); + } + } + { + // Test a group by order by query on some raw MV rows (order by on int and double) + String query = "SELECT mvRawIntCol, mvRawDoubleCol, mvRawStringCol from testTable GROUP BY mvRawIntCol, " + + "mvRawDoubleCol, mvRawStringCol ORDER BY mvRawIntCol, mvRawDoubleCol LIMIT 20"; + ResultTable resultTable = getBrokerResponse(query).getResultTable(); + assertNotNull(resultTable); + DataSchema dataSchema = new DataSchema(new String[]{ + "mvRawIntCol", "mvRawDoubleCol", "mvRawStringCol" + }, new DataSchema.ColumnDataType[]{ + DataSchema.ColumnDataType.INT, DataSchema.ColumnDataType.DOUBLE, DataSchema.ColumnDataType.STRING + }); + assertEquals(resultTable.getDataSchema(), dataSchema); + List<Object[]> recordRows = resultTable.getRows(); + assertEquals(recordRows.size(), 20); + + int intValue = -1; + double doubleValue = -1; + for (int i = 0; i < 20; i++) { + Object[] values = recordRows.get(i); + assertEquals(values.length, 3); + int intValueCur = (int) values[0]; + double doubleValueCur = (double) values[1]; + assertTrue(intValueCur >= intValue); + if (intValueCur == intValue) { + assertTrue(doubleValueCur >= doubleValue); + } + + intValue = intValueCur; + doubleValue = doubleValueCur; + } + } + { + // Test a group by order by query on some raw MV rows (order by on string) + String query = "SELECT mvRawIntCol, mvRawDoubleCol, mvRawStringCol from testTable GROUP BY mvRawIntCol, " + + "mvRawDoubleCol, mvRawStringCol ORDER BY mvRawStringCol LIMIT 10"; + ResultTable resultTable = getBrokerResponse(query).getResultTable(); + assertNotNull(resultTable); + DataSchema dataSchema = new DataSchema(new String[]{ + "mvRawIntCol", "mvRawDoubleCol", "mvRawStringCol" + }, new DataSchema.ColumnDataType[]{ + DataSchema.ColumnDataType.INT, DataSchema.ColumnDataType.DOUBLE, DataSchema.ColumnDataType.STRING + }); + assertEquals(resultTable.getDataSchema(), dataSchema); + List<Object[]> recordRows = resultTable.getRows(); + assertEquals(recordRows.size(), 10); + + String stringVal = null; + for (int i = 0; i < 10; i++) { + Object[] values = recordRows.get(i); + assertEquals(values.length, 3); + String stringValueCur = (String) values[2]; + if (stringVal != null && !stringValueCur.equals(stringVal)) { + assertTrue(stringValueCur.compareTo(stringVal) > 0); + } + stringVal = stringValueCur; + } + } + { + // Test a select with a VALUEIN transform function with group by + String query = "SELECT VALUEIN(mvRawIntCol, '0') from testTable WHERE mvRawIntCol IN (0) GROUP BY " + + "VALUEIN(mvRawIntCol, '0') LIMIT 10"; + ResultTable resultTable = getBrokerResponse(query).getResultTable(); + assertNotNull(resultTable); + DataSchema dataSchema = new DataSchema(new String[]{"valuein(mvRawIntCol,'0')"}, + new DataSchema.ColumnDataType[]{DataSchema.ColumnDataType.INT}); + assertEquals(resultTable.getDataSchema(), dataSchema); + List<Object[]> recordRows = resultTable.getRows(); + assertEquals(recordRows.size(), 1); + Object[] values = recordRows.get(0); + assertEquals(values.length, 1); + int intRawVal = (int) values[0]; + assertEquals(intRawVal, 0); + } + { + // Test a select with a VALUEIN transform function with group by order by + String query = "SELECT VALUEIN(mvRawDoubleCol, '0.0') from testTable WHERE mvRawDoubleCol IN (0.0) GROUP BY " + + "VALUEIN(mvRawDoubleCol, '0.0') ORDER BY VALUEIN(mvRawDoubleCol, '0.0') LIMIT 10"; + ResultTable resultTable = getBrokerResponse(query).getResultTable(); + assertNotNull(resultTable); + DataSchema dataSchema = new DataSchema(new String[]{"valuein(mvRawDoubleCol,'0.0')"}, + new DataSchema.ColumnDataType[]{DataSchema.ColumnDataType.DOUBLE}); + assertEquals(resultTable.getDataSchema(), dataSchema); + List<Object[]> recordRows = resultTable.getRows(); + assertEquals(recordRows.size(), 1); + Object[] values = recordRows.get(0); + assertEquals(values.length, 1); + double doubleRawVal = (double) values[0]; + assertEquals(doubleRawVal, 0.0); + } + } + + @Test + public void testSelectWithFilterQueries() { + { + // Test a select with filter query on a MV raw column identifier + String query = "SELECT mvRawIntCol, mvRawDoubleCol, mvRawStringCol from testTable where mvRawIntCol < 5 LIMIT 10"; + ResultTable resultTable = getBrokerResponse(query).getResultTable(); + assertNotNull(resultTable); + DataSchema dataSchema = new DataSchema(new String[]{ + "mvRawIntCol", "mvRawDoubleCol", "mvRawStringCol" + }, new DataSchema.ColumnDataType[]{ + DataSchema.ColumnDataType.INT_ARRAY, DataSchema.ColumnDataType.DOUBLE_ARRAY, + DataSchema.ColumnDataType.STRING_ARRAY + }); + assertEquals(resultTable.getDataSchema(), dataSchema); + List<Object[]> recordRows = resultTable.getRows(); + assertEquals(recordRows.size(), 10); + + for (int i = 0; i < 10; i++) { + Object[] values = recordRows.get(i); + assertEquals(values.length, 3); + int[] intVal = (int[]) values[0]; + assertEquals(intVal[1] - intVal[0], MV_OFFSET); + assertEquals(intVal[0], i % 5); + assertEquals(intVal[1], (i % 5) + MV_OFFSET); + + double[] doubleVal = (double[]) values[1]; + assertEquals(doubleVal[0], (double) i % 5); + assertEquals(doubleVal[1], (double) (i % 5) + MV_OFFSET); + + String[] stringVal = (String[]) values[2]; + assertEquals(Integer.parseInt(stringVal[0]), i % 5); + assertEquals(Integer.parseInt(stringVal[1]), (i % 5) + MV_OFFSET); + } + } + { + // Test a select with filter query (OR) on two MV raw column identifiers (int and double) + String query = "SELECT mvRawIntCol, mvRawDoubleCol, mvRawStringCol from testTable where mvRawIntCol < 5 " + + "OR mvRawDoubleCol > 1104.0 LIMIT 10"; + ResultTable resultTable = getBrokerResponse(query).getResultTable(); + assertNotNull(resultTable); + DataSchema dataSchema = new DataSchema(new String[]{ + "mvRawIntCol", "mvRawDoubleCol", "mvRawStringCol" + }, new DataSchema.ColumnDataType[]{ + DataSchema.ColumnDataType.INT_ARRAY, DataSchema.ColumnDataType.DOUBLE_ARRAY, + DataSchema.ColumnDataType.STRING_ARRAY + }); + assertEquals(resultTable.getDataSchema(), dataSchema); + List<Object[]> recordRows = resultTable.getRows(); + assertEquals(recordRows.size(), 10); + + for (int i = 0; i < 10; i++) { + Object[] values = recordRows.get(i); + assertEquals(values.length, 3); + int[] intVal = (int[]) values[0]; + assertEquals(intVal[1] - intVal[0], MV_OFFSET); + double[] doubleVal = (double[]) values[1]; + assertTrue(intVal[0] < 5 || intVal[1] < 5 || doubleVal[0] > 1104.0 || doubleVal[1] > 1104.0); + } + } + { + // Test a select with filter query on a long MV raw column identifier + String query = "SELECT mvRawLongCol from testTable where mvRawLongCol > 1100 LIMIT 10"; + ResultTable resultTable = getBrokerResponse(query).getResultTable(); + assertNotNull(resultTable); + DataSchema dataSchema = new DataSchema(new String[]{ + "mvRawLongCol" + }, new DataSchema.ColumnDataType[]{ + DataSchema.ColumnDataType.LONG_ARRAY + }); + assertEquals(resultTable.getDataSchema(), dataSchema); + List<Object[]> recordRows = resultTable.getRows(); + assertEquals(recordRows.size(), 10); + + for (int i = 0; i < 10; i++) { + Object[] values = recordRows.get(i); + assertEquals(values.length, 1); + long[] longVal = (long[]) values[0]; + assertEquals(longVal[1] - longVal[0], MV_OFFSET); + assertTrue(longVal[0] > 1100 || longVal[1] > 1100); + } + } + { + // Test a select with filter = query on a string MV raw column identifier + String query = "SELECT mvRawStringCol from testTable where mvRawStringCol = '1100' LIMIT 10"; + ResultTable resultTable = getBrokerResponse(query).getResultTable(); + assertNotNull(resultTable); + DataSchema dataSchema = new DataSchema(new String[]{ + "mvRawStringCol" + }, new DataSchema.ColumnDataType[]{ + DataSchema.ColumnDataType.STRING_ARRAY + }); + assertEquals(resultTable.getDataSchema(), dataSchema); + List<Object[]> recordRows = resultTable.getRows(); + assertEquals(recordRows.size(), 4); + + for (int i = 0; i < 4; i++) { + Object[] values = recordRows.get(i); + assertEquals(values.length, 1); + String[] stringVal = (String[]) values[0]; + assertEquals(Integer.parseInt(stringVal[1]) - Integer.parseInt(stringVal[0]), MV_OFFSET); + assertTrue(Integer.parseInt(stringVal[0]) == 1100 || Integer.parseInt(stringVal[1]) == 1100); + } + } + { + // Test a select with filter = query on int, float, long, and double MV raw column identifiers + String query = "SELECT mvRawIntCol, mvRawFloatCol, mvRawLongCol, mvRawDoubleCol from testTable where " + + "mvRawIntCol = '1100' AND mvRawFloatCol = '1100.0' AND mvRawLongCol = '1100' AND mvRawDoubleCol = '1100.0' " + + "LIMIT 10"; + ResultTable resultTable = getBrokerResponse(query).getResultTable(); + assertNotNull(resultTable); + DataSchema dataSchema = new DataSchema(new String[]{ + "mvRawIntCol", "mvRawFloatCol", "mvRawLongCol", "mvRawDoubleCol" + }, new DataSchema.ColumnDataType[]{ + DataSchema.ColumnDataType.INT_ARRAY, DataSchema.ColumnDataType.FLOAT_ARRAY, + DataSchema.ColumnDataType.LONG_ARRAY, DataSchema.ColumnDataType.DOUBLE_ARRAY + }); + assertEquals(resultTable.getDataSchema(), dataSchema); + List<Object[]> recordRows = resultTable.getRows(); + assertEquals(recordRows.size(), 4); + + for (int i = 0; i < 4; i++) { + Object[] values = recordRows.get(i); + assertEquals(values.length, 4); + int[] intVal = (int[]) values[0]; + assertEquals(intVal[1] - intVal[0], MV_OFFSET); + assertTrue(intVal[0] == 1100 || intVal[1] == 1100); + + float[] floatVal = (float[]) values[1]; + assertEquals(floatVal[1] - floatVal[0], (float) MV_OFFSET); + assertTrue(floatVal[0] == 1100.0F || floatVal[1] == 1100.0F); + + long[] longVal = (long[]) values[2]; + assertEquals(longVal[1] - longVal[0], MV_OFFSET); + assertTrue(longVal[0] == 1100L || longVal[1] == 1100L); + + double[] doubleVal = (double[]) values[3]; + assertEquals(doubleVal[1] - doubleVal[0], (double) MV_OFFSET); + assertTrue(doubleVal[0] == 1100.0 || doubleVal[1] == 1100.0); + } + } + { + // Test a select with filter query (AND) on two MV raw column identifiers (one int and another double) such that + // the values in the filter are mutually exclusive + // No match should be found + String query = "SELECT mvRawIntCol, mvRawDoubleCol, mvRawStringCol from testTable where mvRawIntCol < 5 " + + "AND mvRawDoubleCol > 1104.0 LIMIT 10"; + ResultTable resultTable = getBrokerResponse(query).getResultTable(); + assertNotNull(resultTable); + DataSchema dataSchema = new DataSchema(new String[]{ + "mvRawIntCol", "mvRawDoubleCol", "mvRawStringCol" + }, new DataSchema.ColumnDataType[]{ + DataSchema.ColumnDataType.INT_ARRAY, DataSchema.ColumnDataType.DOUBLE_ARRAY, + DataSchema.ColumnDataType.STRING_ARRAY + }); + assertEquals(resultTable.getDataSchema(), dataSchema); + List<Object[]> recordRows = resultTable.getRows(); + assertEquals(recordRows.size(), 0); + } + { + // Test a select with filter IN query on int, float, long, and double MV raw column identifiers + String query = "SELECT mvRawIntCol, mvRawFloatCol, mvRawLongCol, mvRawDoubleCol from testTable where " + + "mvRawIntCol IN (1100, 1101) AND mvRawFloatCol IN (1100.0, 1101.0) AND mvRawLongCol " + + "IN (1100, 1101) AND mvRawDoubleCol IN (1100.0, 1101.0) LIMIT 10"; + ResultTable resultTable = getBrokerResponse(query).getResultTable(); + assertNotNull(resultTable); + DataSchema dataSchema = new DataSchema(new String[]{ + "mvRawIntCol", "mvRawFloatCol", "mvRawLongCol", "mvRawDoubleCol" + }, new DataSchema.ColumnDataType[]{ + DataSchema.ColumnDataType.INT_ARRAY, DataSchema.ColumnDataType.FLOAT_ARRAY, + DataSchema.ColumnDataType.LONG_ARRAY, DataSchema.ColumnDataType.DOUBLE_ARRAY + }); + assertEquals(resultTable.getDataSchema(), dataSchema); + List<Object[]> recordRows = resultTable.getRows(); + assertEquals(recordRows.size(), 8); + + for (int i = 0; i < 4; i++) { + Object[] values = recordRows.get(i); + assertEquals(values.length, 4); + int[] intVal = (int[]) values[0]; + assertEquals(intVal[1] - intVal[0], MV_OFFSET); + assertTrue(intVal[0] == 1100 || intVal[1] == 1100 || intVal[0] == 1101 || intVal[1] == 1101); + + float[] floatVal = (float[]) values[1]; + assertEquals(floatVal[1] - floatVal[0], (float) MV_OFFSET); + assertTrue(floatVal[0] == 1100.0F || floatVal[1] == 1100.0F || floatVal[0] == 1101.0F + || floatVal[1] == 1101.0F); + + long[] longVal = (long[]) values[2]; + assertEquals(longVal[1] - longVal[0], MV_OFFSET); + assertTrue(longVal[0] == 1100L || longVal[1] == 1100L || longVal[0] == 1101L || longVal[1] == 1101L); + + double[] doubleVal = (double[]) values[3]; + assertEquals(doubleVal[1] - doubleVal[0], (double) MV_OFFSET); + assertTrue(doubleVal[0] == 1100.0 || doubleVal[1] == 1100.0 || doubleVal[0] == 1101.0 + || doubleVal[1] == 1101.0); + } + } + { + // Test a select with filter IN query on the string MV raw column identifier + String query = "SELECT mvRawStringCol from testTable where mvRawStringCol IN ('1100', '1101') LIMIT 10"; + ResultTable resultTable = getBrokerResponse(query).getResultTable(); + assertNotNull(resultTable); + DataSchema dataSchema = new DataSchema(new String[]{ + "mvRawStringCol" + }, new DataSchema.ColumnDataType[]{ + DataSchema.ColumnDataType.STRING_ARRAY + }); + assertEquals(resultTable.getDataSchema(), dataSchema); + List<Object[]> recordRows = resultTable.getRows(); + assertEquals(recordRows.size(), 8); + + for (int i = 0; i < 4; i++) { + Object[] values = recordRows.get(i); + assertEquals(values.length, 1); + String[] stringVal = (String[]) values[0]; + assertEquals(Integer.parseInt(stringVal[1]) - Integer.parseInt(stringVal[0]), MV_OFFSET); + assertTrue(Integer.parseInt(stringVal[0]) == 1100 || Integer.parseInt(stringVal[1]) == 1100 + || Integer.parseInt(stringVal[0]) == 1101 || Integer.parseInt(stringVal[1]) == 1101); + } + } + { + // Test a select with filter query on an arraylength transform function + String query = "SELECT mvRawIntCol, mvRawDoubleCol, mvRawStringCol from testTable where " + + "ARRAYLENGTH(mvRawIntCol) < 5 LIMIT 10"; + ResultTable resultTable = getBrokerResponse(query).getResultTable(); + assertNotNull(resultTable); + DataSchema dataSchema = new DataSchema(new String[]{ + "mvRawIntCol", "mvRawDoubleCol", "mvRawStringCol" + }, new DataSchema.ColumnDataType[]{ + DataSchema.ColumnDataType.INT_ARRAY, DataSchema.ColumnDataType.DOUBLE_ARRAY, + DataSchema.ColumnDataType.STRING_ARRAY + }); + assertEquals(resultTable.getDataSchema(), dataSchema); + List<Object[]> recordRows = resultTable.getRows(); + assertEquals(recordRows.size(), 10); + + for (int i = 0; i < 10; i++) { + Object[] values = recordRows.get(i); + assertEquals(values.length, 3); + int[] intVal = (int[]) values[0]; + assertEquals(intVal[1] - intVal[0], MV_OFFSET); + + double[] doubleVal = (double[]) values[1]; + assertEquals(doubleVal[0], (double) intVal[0]); + assertEquals(doubleVal[1], (double) intVal[1]); + + String[] stringVal = (String[]) values[2]; + assertEquals(Integer.parseInt(stringVal[0]), intVal[0]); + assertEquals(Integer.parseInt(stringVal[1]), intVal[1]); + } + } + { + // Test a select with filter = query on an arraylength transform function + String query = "SELECT mvRawIntCol, mvRawDoubleCol, mvRawStringCol from testTable where " + + "ARRAYLENGTH(mvRawDoubleCol) = 2 LIMIT 10"; + ResultTable resultTable = getBrokerResponse(query).getResultTable(); + assertNotNull(resultTable); + DataSchema dataSchema = new DataSchema(new String[]{ + "mvRawIntCol", "mvRawDoubleCol", "mvRawStringCol" + }, new DataSchema.ColumnDataType[]{ + DataSchema.ColumnDataType.INT_ARRAY, DataSchema.ColumnDataType.DOUBLE_ARRAY, + DataSchema.ColumnDataType.STRING_ARRAY + }); + assertEquals(resultTable.getDataSchema(), dataSchema); + List<Object[]> recordRows = resultTable.getRows(); + assertEquals(recordRows.size(), 10); + + for (int i = 0; i < 10; i++) { + Object[] values = recordRows.get(i); + assertEquals(values.length, 3); + int[] intVal = (int[]) values[0]; + assertEquals(intVal[1] - intVal[0], MV_OFFSET); + + double[] doubleVal = (double[]) values[1]; + assertEquals(doubleVal[0], (double) intVal[0]); + assertEquals(doubleVal[1], (double) intVal[1]); + + String[] stringVal = (String[]) values[2]; + assertEquals(Integer.parseInt(stringVal[0]), intVal[0]); + assertEquals(Integer.parseInt(stringVal[1]), intVal[1]); + } + } + { + // Test a select with filter IN query on an arraylength transform function + String query = "SELECT mvRawIntCol, mvRawDoubleCol, mvRawStringCol from testTable where " + + "ARRAYLENGTH(mvRawStringCol) IN (2, 5) LIMIT 10"; + ResultTable resultTable = getBrokerResponse(query).getResultTable(); + assertNotNull(resultTable); + DataSchema dataSchema = new DataSchema(new String[]{ + "mvRawIntCol", "mvRawDoubleCol", "mvRawStringCol" + }, new DataSchema.ColumnDataType[]{ + DataSchema.ColumnDataType.INT_ARRAY, DataSchema.ColumnDataType.DOUBLE_ARRAY, + DataSchema.ColumnDataType.STRING_ARRAY + }); + assertEquals(resultTable.getDataSchema(), dataSchema); + List<Object[]> recordRows = resultTable.getRows(); + assertEquals(recordRows.size(), 10); + + for (int i = 0; i < 10; i++) { + Object[] values = recordRows.get(i); + assertEquals(values.length, 3); + int[] intVal = (int[]) values[0]; + assertEquals(intVal[1] - intVal[0], MV_OFFSET); + + double[] doubleVal = (double[]) values[1]; + assertEquals(doubleVal[0], (double) intVal[0]); + assertEquals(doubleVal[1], (double) intVal[1]); + + String[] stringVal = (String[]) values[2]; + assertEquals(Integer.parseInt(stringVal[0]), intVal[0]); + assertEquals(Integer.parseInt(stringVal[1]), intVal[1]); + } + } + } + Review Comment: For the next test on testSimpleAggregateQueries, can we add 1/2 queries with nesting a transform inside the aggregate if that is not already covered elsewhere ? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org