This is an automated email from the ASF dual-hosted git repository. siddteotia pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push: new 0aa643d00b Vectorize predicates over full col filter scans (#8764) 0aa643d00b is described below commit 0aa643d00bf9eaed1d45ff97546952df2d0cb44e Author: Richard Startin <rich...@startree.ai> AuthorDate: Tue May 24 18:39:34 2022 +0200 Vectorize predicates over full col filter scans (#8764) * vectorize full column predicate scans * benchmark --- .../dociditerators/SVScanDocIdIterator.java | 36 ++++++++++++++++++---- .../org/apache/pinot/perf/BenchmarkQueries.java | 7 +++-- 2 files changed, 35 insertions(+), 8 deletions(-) diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/SVScanDocIdIterator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/SVScanDocIdIterator.java index d462b3b6ed..236b6f17fd 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/SVScanDocIdIterator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/SVScanDocIdIterator.java @@ -41,6 +41,9 @@ public final class SVScanDocIdIterator implements ScanBasedDocIdIterator { private final ForwardIndexReaderContext _readerContext; private final int _numDocs; private final ValueMatcher _valueMatcher; + private final int[] _batch = new int[OPTIMAL_ITERATOR_BATCH_SIZE]; + private int _firstMismatch; + private int _cursor; private int _nextDocId = 0; private long _numEntriesScanned = 0L; @@ -55,6 +58,33 @@ public final class SVScanDocIdIterator implements ScanBasedDocIdIterator { @Override public int next() { + if (_cursor >= _firstMismatch) { + int limit; + int batchSize = 0; + do { + limit = Math.min(_numDocs - _nextDocId, OPTIMAL_ITERATOR_BATCH_SIZE); + if (limit > 0) { + for (int i = 0; i < limit; i++) { + _batch[i] = _nextDocId + i; + } + batchSize = _valueMatcher.matchValues(limit, _batch); + _nextDocId += limit; + _numEntriesScanned += limit; + } + } while (limit > 0 & batchSize == 0); + _firstMismatch = batchSize; + _cursor = 0; + if (_firstMismatch == 0) { + return Constants.EOF; + } + } + return _batch[_cursor++]; + } + + @Override + public int advance(int targetDocId) { + _nextDocId = targetDocId; + _firstMismatch = 0; while (_nextDocId < _numDocs) { int nextDocId = _nextDocId++; _numEntriesScanned++; @@ -65,12 +95,6 @@ public final class SVScanDocIdIterator implements ScanBasedDocIdIterator { return Constants.EOF; } - @Override - public int advance(int targetDocId) { - _nextDocId = targetDocId; - return next(); - } - @Override public MutableRoaringBitmap applyAnd(ImmutableRoaringBitmap docIds) { if (docIds.isEmpty()) { diff --git a/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkQueries.java b/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkQueries.java index 32696caf5f..6d1c4aab23 100644 --- a/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkQueries.java +++ b/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkQueries.java @@ -163,6 +163,9 @@ public class BenchmarkQueries extends BaseQueriesTest { + "WHERE INT_COL = 0 and SORTED_COL = 1" + "GROUP BY INT_COL, SORTED_COL ORDER BY SORTED_COL, INT_COL ASC"; + public static final String FILTERING_SCAN_QUERY = "SELECT SUM(RAW_INT_COL) FROM MyTable " + + "WHERE RAW_INT_COL BETWEEN 1 AND 10"; + @Param("1500000") private int _numRows; @Param({"EXP(0.001)", "EXP(0.5)", "EXP(0.999)"}) @@ -172,7 +175,7 @@ public class BenchmarkQueries extends BaseQueriesTest { SUM_QUERY, NO_INDEX_LIKE_QUERY, MULTI_GROUP_BY_ORDER_BY, MULTI_GROUP_BY_ORDER_BY_LOW_HIGH, TIME_GROUP_BY, RAW_COLUMN_SUMMARY_STATS, COUNT_OVER_BITMAP_INDEX_IN, COUNT_OVER_BITMAP_INDEXES, COUNT_OVER_BITMAP_AND_SORTED_INDEXES, COUNT_OVER_BITMAP_INDEX_EQUALS, STARTREE_SUM_QUERY, STARTREE_FILTER_QUERY, - FILTERING_BITMAP_SCAN_QUERY + FILTERING_BITMAP_SCAN_QUERY, FILTERING_SCAN_QUERY }) String _query; private IndexSegment _indexSegment; @@ -246,7 +249,7 @@ public class BenchmarkQueries extends BaseQueriesTest { .setSortedColumn(SORTED_COL_NAME) .setStarTreeIndexConfigs(Collections.singletonList(new StarTreeIndexConfig( Arrays.asList(SORTED_COL_NAME, INT_COL_NAME), null, Collections.singletonList( - new AggregationFunctionColumnPair(AggregationFunctionType.SUM, RAW_INT_COL_NAME).toColumnName()), + new AggregationFunctionColumnPair(AggregationFunctionType.SUM, RAW_INT_COL_NAME).toColumnName()), Integer.MAX_VALUE))) .build(); Schema schema = new Schema.SchemaBuilder().setSchemaName(TABLE_NAME) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org