This is an automated email from the ASF dual-hosted git repository.

siddteotia pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 0aa643d00b Vectorize predicates over full col filter scans (#8764)
0aa643d00b is described below

commit 0aa643d00bf9eaed1d45ff97546952df2d0cb44e
Author: Richard Startin <rich...@startree.ai>
AuthorDate: Tue May 24 18:39:34 2022 +0200

    Vectorize predicates over full col filter scans (#8764)
    
    * vectorize full column predicate scans
    
    * benchmark
---
 .../dociditerators/SVScanDocIdIterator.java        | 36 ++++++++++++++++++----
 .../org/apache/pinot/perf/BenchmarkQueries.java    |  7 +++--
 2 files changed, 35 insertions(+), 8 deletions(-)

diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/SVScanDocIdIterator.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/SVScanDocIdIterator.java
index d462b3b6ed..236b6f17fd 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/SVScanDocIdIterator.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/SVScanDocIdIterator.java
@@ -41,6 +41,9 @@ public final class SVScanDocIdIterator implements 
ScanBasedDocIdIterator {
   private final ForwardIndexReaderContext _readerContext;
   private final int _numDocs;
   private final ValueMatcher _valueMatcher;
+  private final int[] _batch = new int[OPTIMAL_ITERATOR_BATCH_SIZE];
+  private int _firstMismatch;
+  private int _cursor;
 
   private int _nextDocId = 0;
   private long _numEntriesScanned = 0L;
@@ -55,6 +58,33 @@ public final class SVScanDocIdIterator implements 
ScanBasedDocIdIterator {
 
   @Override
   public int next() {
+    if (_cursor >= _firstMismatch) {
+      int limit;
+      int batchSize = 0;
+      do {
+        limit = Math.min(_numDocs - _nextDocId, OPTIMAL_ITERATOR_BATCH_SIZE);
+        if (limit > 0) {
+          for (int i = 0; i < limit; i++) {
+            _batch[i] = _nextDocId + i;
+          }
+          batchSize = _valueMatcher.matchValues(limit, _batch);
+          _nextDocId += limit;
+          _numEntriesScanned += limit;
+        }
+      } while (limit > 0 & batchSize == 0);
+      _firstMismatch = batchSize;
+      _cursor = 0;
+      if (_firstMismatch == 0) {
+        return Constants.EOF;
+      }
+    }
+    return _batch[_cursor++];
+  }
+
+  @Override
+  public int advance(int targetDocId) {
+    _nextDocId = targetDocId;
+    _firstMismatch = 0;
     while (_nextDocId < _numDocs) {
       int nextDocId = _nextDocId++;
       _numEntriesScanned++;
@@ -65,12 +95,6 @@ public final class SVScanDocIdIterator implements 
ScanBasedDocIdIterator {
     return Constants.EOF;
   }
 
-  @Override
-  public int advance(int targetDocId) {
-    _nextDocId = targetDocId;
-    return next();
-  }
-
   @Override
   public MutableRoaringBitmap applyAnd(ImmutableRoaringBitmap docIds) {
     if (docIds.isEmpty()) {
diff --git 
a/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkQueries.java 
b/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkQueries.java
index 32696caf5f..6d1c4aab23 100644
--- a/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkQueries.java
+++ b/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkQueries.java
@@ -163,6 +163,9 @@ public class BenchmarkQueries extends BaseQueriesTest {
       + "WHERE INT_COL = 0 and SORTED_COL = 1"
       + "GROUP BY INT_COL, SORTED_COL ORDER BY SORTED_COL, INT_COL ASC";
 
+  public static final String FILTERING_SCAN_QUERY = "SELECT SUM(RAW_INT_COL) 
FROM MyTable "
+      + "WHERE RAW_INT_COL BETWEEN 1 AND 10";
+
   @Param("1500000")
   private int _numRows;
   @Param({"EXP(0.001)", "EXP(0.5)", "EXP(0.999)"})
@@ -172,7 +175,7 @@ public class BenchmarkQueries extends BaseQueriesTest {
       SUM_QUERY, NO_INDEX_LIKE_QUERY, MULTI_GROUP_BY_ORDER_BY, 
MULTI_GROUP_BY_ORDER_BY_LOW_HIGH, TIME_GROUP_BY,
       RAW_COLUMN_SUMMARY_STATS, COUNT_OVER_BITMAP_INDEX_IN, 
COUNT_OVER_BITMAP_INDEXES,
       COUNT_OVER_BITMAP_AND_SORTED_INDEXES, COUNT_OVER_BITMAP_INDEX_EQUALS, 
STARTREE_SUM_QUERY, STARTREE_FILTER_QUERY,
-      FILTERING_BITMAP_SCAN_QUERY
+      FILTERING_BITMAP_SCAN_QUERY, FILTERING_SCAN_QUERY
   })
   String _query;
   private IndexSegment _indexSegment;
@@ -246,7 +249,7 @@ public class BenchmarkQueries extends BaseQueriesTest {
         .setSortedColumn(SORTED_COL_NAME)
         .setStarTreeIndexConfigs(Collections.singletonList(new 
StarTreeIndexConfig(
             Arrays.asList(SORTED_COL_NAME, INT_COL_NAME), null, 
Collections.singletonList(
-                new AggregationFunctionColumnPair(AggregationFunctionType.SUM, 
RAW_INT_COL_NAME).toColumnName()),
+            new AggregationFunctionColumnPair(AggregationFunctionType.SUM, 
RAW_INT_COL_NAME).toColumnName()),
             Integer.MAX_VALUE)))
         .build();
     Schema schema = new Schema.SchemaBuilder().setSchemaName(TABLE_NAME)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org

Reply via email to