This is an automated email from the ASF dual-hosted git repository. siddteotia pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git
The following commit(s) were added to refs/heads/master by this push: new 3eb0f9c Use sorted index based filtering only for dictionary encoded column (#6288) 3eb0f9c is described below commit 3eb0f9c9daf3e176d41775af515eeb982ff2daeb Author: Sidd <siddharthteo...@gmail.com> AuthorDate: Mon Nov 30 12:36:50 2020 -0800 Use sorted index based filtering only for dictionary encoded column (#6288) * Use sorted index based filtering only for sorted column with dictionary * Address review comments Co-authored-by: Siddharth Teotia <steo...@steotia-mn1.linkedin.biz> --- .../core/operator/filter/FilterOperatorUtils.java | 10 +++++-- .../RangePredicateWithSortedInvertedIndexTest.java | 32 ++++++++++++++++++++-- 2 files changed, 37 insertions(+), 5 deletions(-) diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/FilterOperatorUtils.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/FilterOperatorUtils.java index b497031..a0b7cae 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/FilterOperatorUtils.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/FilterOperatorUtils.java @@ -47,9 +47,15 @@ public class FilterOperatorUtils { return new MatchAllFilterOperator(numDocs); } + // Currently sorted index based filtering is supported only for + // dictionary encoded columns. The on-disk segment metadata + // will indicate if the column is sorted or not regardless of + // whether it is raw or dictionary encoded. Here when creating + // the filter operator, we need to make sure that sort filter + // operator is used only if the column is sorted and has dictionary. Predicate.Type predicateType = predicateEvaluator.getPredicateType(); if (predicateType == Predicate.Type.RANGE) { - if (dataSource.getDataSourceMetadata().isSorted()) { + if (dataSource.getDataSourceMetadata().isSorted() && dataSource.getDictionary() != null) { return new SortedIndexBasedFilterOperator(predicateEvaluator, dataSource, numDocs); } if (dataSource.getRangeIndex() != null) { @@ -59,7 +65,7 @@ public class FilterOperatorUtils { } else if (predicateType == Predicate.Type.REGEXP_LIKE) { return new ScanBasedFilterOperator(predicateEvaluator, dataSource, numDocs); } else { - if (dataSource.getDataSourceMetadata().isSorted()) { + if (dataSource.getDataSourceMetadata().isSorted() && dataSource.getDictionary() != null) { return new SortedIndexBasedFilterOperator(predicateEvaluator, dataSource, numDocs); } if (dataSource.getInvertedIndex() != null) { diff --git a/pinot-core/src/test/java/org/apache/pinot/queries/RangePredicateWithSortedInvertedIndexTest.java b/pinot-core/src/test/java/org/apache/pinot/queries/RangePredicateWithSortedInvertedIndexTest.java index cd97723..9539969 100644 --- a/pinot-core/src/test/java/org/apache/pinot/queries/RangePredicateWithSortedInvertedIndexTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/queries/RangePredicateWithSortedInvertedIndexTest.java @@ -59,8 +59,9 @@ public class RangePredicateWithSortedInvertedIndexTest extends BaseQueriesTest { private static final String SEGMENT_NAME = "testSegment"; private static final String D1 = "STRING_COL"; - private static final String M1 = "INT_COL"; // sorted column + private static final String M1 = "INT_COL"; // sorted column (dictionary encoded) private static final String M2 = "LONG_COL"; + private static final String M3 = "INT_COL_RAW"; // sorted raw column private static final int NUM_ROWS = 30000; private static final int INT_BASE_VALUE = 0; @@ -117,13 +118,17 @@ public class RangePredicateWithSortedInvertedIndexTest extends BaseQueriesTest { row.putValue(M1, INT_BASE_VALUE + rowIndex); _longValues[rowIndex] = RANDOM.nextLong(); row.putValue(M2, _longValues[rowIndex]); + row.putValue(M3, INT_BASE_VALUE + rowIndex); rows.add(row); } - TableConfig tableConfig = new TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME).build(); + TableConfig tableConfig = + new TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME).setNoDictionaryColumns(Arrays.asList(M3)) + .build(); Schema schema = new Schema.SchemaBuilder().setSchemaName(TABLE_NAME).addSingleValueDimension(D1, FieldSpec.DataType.STRING) - .addMetric(M1, FieldSpec.DataType.INT).addMetric(M2, FieldSpec.DataType.LONG).build(); + .addMetric(M1, FieldSpec.DataType.INT).addMetric(M2, FieldSpec.DataType.LONG) + .addMetric(M3, FieldSpec.DataType.INT).build(); SegmentGeneratorConfig config = new SegmentGeneratorConfig(tableConfig, schema); config.setOutDir(INDEX_DIR.getPath()); config.setTableName(TABLE_NAME); @@ -142,14 +147,35 @@ public class RangePredicateWithSortedInvertedIndexTest extends BaseQueriesTest { Pairs.IntPair pair = new Pairs.IntPair(20000, 29999); runQuery(query, 10000, Lists.newArrayList(pair), 2); + // test with sorted column without dictionary + // FilterOperatorUtils code should correctly create scan operator for INT_COL_RAW + // else this test will fail + query = "SELECT STRING_COL, INT_COL FROM testTable WHERE INT_COL >= 20000 AND INT_COL_RAW >= 20000 LIMIT 100000"; + pair = new Pairs.IntPair(20000, 29999); + runQuery(query, 10000, Lists.newArrayList(pair), 2); + query = "SELECT STRING_COL, INT_COL FROM testTable WHERE INT_COL >= 20000 AND INT_COL <= 23666 LIMIT 100000"; pair = new Pairs.IntPair(20000, 23666); runQuery(query, 3667, Lists.newArrayList(pair), 2); + // test with sorted column without dictionary + // FilterOperatorUtils code should correctly create scan operator for INT_COL_RAW + // else this test will fail + query = "SELECT STRING_COL, INT_COL FROM testTable WHERE INT_COL >= 20000 AND INT_COL <= 23666 AND INT_COL_RAW <= 23666 LIMIT 100000"; + pair = new Pairs.IntPair(20000, 23666); + runQuery(query, 3667, Lists.newArrayList(pair), 2); + query = "SELECT STRING_COL, INT_COL FROM testTable WHERE INT_COL <= 20000 LIMIT 100000"; pair = new Pairs.IntPair(0, 20000); runQuery(query, 20001, Lists.newArrayList(pair), 2); + // test with sorted column without dictionary + // FilterOperatorUtils code should correctly create scan operator for INT_COL_RAW + // else this test will fail + query = "SELECT STRING_COL, INT_COL FROM testTable WHERE INT_COL_RAW = 20000 LIMIT 100000"; + pair = new Pairs.IntPair(20000, 20000); + runQuery(query, 1, Lists.newArrayList(pair), 2); + String filter = "WHERE (INT_COL >= 15000 AND INT_COL <= 16665) OR (INT_COL >= 18000 AND INT_COL <= 19887)"; query = "SELECT STRING_COL, INT_COL FROM testTable " + filter + " LIMIT 100000"; pair = new Pairs.IntPair(15000, 16665); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org