This is an automated email from the ASF dual-hosted git repository.

kishoreg pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new af90f79  Improved range queries (#7513)
af90f79 is described below

commit af90f7972802f03b211c2c22becd8a838c46854b
Author: Richard Startin <rich...@startree.ai>
AuthorDate: Tue Oct 5 17:08:40 2021 +0100

    Improved range queries (#7513)
---
 LICENSE-binary                                     |  4 +--
 .../org/apache/pinot/perf/BenchmarkRangeIndex.java | 15 +++++----
 .../index/column/PhysicalColumnIndexContainer.java |  2 +-
 .../index/readers/BitSlicedRangeIndexReader.java   | 39 ++++++++++++++++------
 .../index/creator/BitSlicedIndexCreatorTest.java   | 16 ++++++---
 pom.xml                                            |  2 +-
 6 files changed, 54 insertions(+), 24 deletions(-)

diff --git a/LICENSE-binary b/LICENSE-binary
index 08ee41e..c8c9569 100644
--- a/LICENSE-binary
+++ b/LICENSE-binary
@@ -379,8 +379,8 @@ org.eclipse.jetty:jetty-util:9.4.39.v20210325
 org.javassist:javassist:3.19.0-GA
 org.lz4:lz4-java:1.7.1
 org.quartz-scheduler:quartz:2.3.2
-org.roaringbitmap:RoaringBitmap:0.9.21
-org.roaringbitmap:shims:0.9.21
+org.roaringbitmap:RoaringBitmap:0.9.22
+org.roaringbitmap:shims:0.9.22
 org.webjars:swagger-ui:3.18.2
 org.wildfly.openssl:wildfly-openssl:1.0.7.Final
 org.xerial.larray:larray-buffer:0.4.1
diff --git 
a/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkRangeIndex.java 
b/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkRangeIndex.java
index ae02607..25079ca 100644
--- a/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkRangeIndex.java
+++ b/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkRangeIndex.java
@@ -196,7 +196,7 @@ public class BenchmarkRangeIndex {
     @TearDown(Level.Trial)
     public void tearDown()
         throws IOException {
-      FileUtils.forceDelete(_indexDir);
+      FileUtils.deleteQuietly(_indexDir);
     }
 
     protected Comparable<?> max() {
@@ -378,19 +378,22 @@ public class BenchmarkRangeIndex {
     public void setup()
         throws IOException {
       super.setup();
-      _reader = new BitSlicedRangeIndexReader(_buffer);
+      _reader = new BitSlicedRangeIndexReader(_buffer, metadata());
     }
 
-    @Override
-    protected RawValueBasedInvertedIndexCreator newCreator() {
-      ColumnMetadata metadata = new ColumnMetadataImpl.Builder()
+    private ColumnMetadata metadata() {
+      return new ColumnMetadataImpl.Builder()
           .setFieldSpec(_fieldSpec)
           .setTotalDocs(_numDocs)
           .setHasDictionary(false)
           .setMaxValue(max())
           .setMinValue(min())
           .build();
-      return new BitSlicedRangeIndexCreator(_indexDir, metadata);
+    }
+
+    @Override
+    protected RawValueBasedInvertedIndexCreator newCreator() {
+      return new BitSlicedRangeIndexCreator(_indexDir, metadata());
     }
   }
 
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/column/PhysicalColumnIndexContainer.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/column/PhysicalColumnIndexContainer.java
index 49c7e2f..9ab1e7d 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/column/PhysicalColumnIndexContainer.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/column/PhysicalColumnIndexContainer.java
@@ -183,7 +183,7 @@ public final class PhysicalColumnIndexContainer implements 
ColumnIndexContainer
         if (version == RangeIndexCreator.VERSION) {
           _rangeIndex = new RangeIndexReaderImpl(buffer);
         } else if (version == BitSlicedRangeIndexCreator.VERSION) {
-          _rangeIndex = new BitSlicedRangeIndexReader(buffer);
+          _rangeIndex = new BitSlicedRangeIndexReader(buffer, metadata);
         } else {
           LOGGER.warn("Unknown range index version: {}, skip loading range 
index for column: {}", version,
               metadata.getColumnName());
diff --git 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/BitSlicedRangeIndexReader.java
 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/BitSlicedRangeIndexReader.java
index 032b4c3..41d34e1 100644
--- 
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/BitSlicedRangeIndexReader.java
+++ 
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/BitSlicedRangeIndexReader.java
@@ -23,11 +23,13 @@ import java.nio.ByteBuffer;
 import javax.annotation.Nullable;
 import 
org.apache.pinot.segment.local.segment.creator.impl.inv.BitSlicedRangeIndexCreator;
 import org.apache.pinot.segment.local.utils.FPOrdering;
+import org.apache.pinot.segment.spi.ColumnMetadata;
 import org.apache.pinot.segment.spi.index.reader.RangeIndexReader;
 import org.apache.pinot.segment.spi.memory.PinotDataBuffer;
 import org.roaringbitmap.RangeBitmap;
 import org.roaringbitmap.RoaringBitmap;
 import org.roaringbitmap.buffer.ImmutableRoaringBitmap;
+import org.roaringbitmap.buffer.MutableRoaringBitmap;
 
 
 public class BitSlicedRangeIndexReader implements 
RangeIndexReader<ImmutableRoaringBitmap> {
@@ -35,8 +37,11 @@ public class BitSlicedRangeIndexReader implements 
RangeIndexReader<ImmutableRoar
   private final PinotDataBuffer _dataBuffer;
   private final long _offset;
   private final long _min;
+  private final long _max;
+  private final long _numDocs;
 
-  public BitSlicedRangeIndexReader(PinotDataBuffer dataBuffer) {
+  public BitSlicedRangeIndexReader(PinotDataBuffer dataBuffer,
+      ColumnMetadata metadata) {
     _dataBuffer = dataBuffer;
     long offset = 0;
     int version = dataBuffer.getInt(offset);
@@ -45,30 +50,34 @@ public class BitSlicedRangeIndexReader implements 
RangeIndexReader<ImmutableRoar
     _min = dataBuffer.getLong(offset);
     offset += Long.BYTES;
     _offset = offset;
+    _max = metadata.hasDictionary()
+        ? metadata.getCardinality() - 1
+        : ((Number) metadata.getMaxValue()).longValue();
+    _numDocs = metadata.getTotalDocs();
   }
 
   @Nullable
   @Override
   public ImmutableRoaringBitmap getMatchingDocIds(int min, int max) {
-    return queryRangeBitmap(Math.max(min, _min) - _min, max - _min);
+    return queryRangeBitmap(Math.max(min, _min) - _min, max - _min, _max - 
_min);
   }
 
   @Nullable
   @Override
   public ImmutableRoaringBitmap getMatchingDocIds(long min, long max) {
-    return queryRangeBitmap(Math.max(min, _min) - _min, max - _min);
+    return queryRangeBitmap(Math.max(min, _min) - _min, max - _min, _max - 
_min);
   }
 
   @Nullable
   @Override
   public ImmutableRoaringBitmap getMatchingDocIds(float min, float max) {
-    return queryRangeBitmap(FPOrdering.ordinalOf(min), 
FPOrdering.ordinalOf(max));
+    return queryRangeBitmap(FPOrdering.ordinalOf(min), 
FPOrdering.ordinalOf(max), 0xFFFFFFFFL);
   }
 
   @Nullable
   @Override
   public ImmutableRoaringBitmap getMatchingDocIds(double min, double max) {
-    return queryRangeBitmap(FPOrdering.ordinalOf(min), 
FPOrdering.ordinalOf(max));
+    return queryRangeBitmap(FPOrdering.ordinalOf(min), 
FPOrdering.ordinalOf(max), 0xFFFFFFFFFFFFFFFFL);
   }
 
   // this index supports exact matches, so always return null for partial 
matches
@@ -97,12 +106,22 @@ public class BitSlicedRangeIndexReader implements 
RangeIndexReader<ImmutableRoar
     return null;
   }
 
-  private ImmutableRoaringBitmap queryRangeBitmap(long min, long max) {
+  private ImmutableRoaringBitmap queryRangeBitmap(long min, long max, long 
columnMax) {
     RangeBitmap rangeBitmap = mapRangeBitmap();
-    RoaringBitmap lte = rangeBitmap.lte(max);
-    RoaringBitmap gte = rangeBitmap.gte(min);
-    lte.and(gte);
-    return lte.toMutableRoaringBitmap();
+    if (Long.compareUnsigned(max, columnMax) < 0) {
+      RoaringBitmap lte = rangeBitmap.lte(max);
+      if (Long.compareUnsigned(min, 0) > 0) {
+        return rangeBitmap.gte(min, lte).toMutableRoaringBitmap();
+      }
+      return lte.toMutableRoaringBitmap();
+    } else {
+      if (Long.compareUnsigned(min, 0) > 0) {
+        return rangeBitmap.gte(min).toMutableRoaringBitmap();
+      }
+      MutableRoaringBitmap all = new MutableRoaringBitmap();
+      all.add(0, _numDocs);
+      return all;
+    }
   }
 
   private RangeBitmap mapRangeBitmap() {
diff --git 
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/BitSlicedIndexCreatorTest.java
 
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/BitSlicedIndexCreatorTest.java
index e0a3d52..c06ebee 100644
--- 
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/BitSlicedIndexCreatorTest.java
+++ 
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/BitSlicedIndexCreatorTest.java
@@ -161,7 +161,7 @@ public class BitSlicedIndexCreatorTest {
     }
     File rangeIndexFile = new File(INDEX_DIR, metadata.getColumnName() + 
BITMAP_RANGE_INDEX_FILE_EXTENSION);
     try (PinotDataBuffer dataBuffer = 
PinotDataBuffer.mapReadOnlyBigEndianFile(rangeIndexFile)) {
-      BitSlicedRangeIndexReader reader = new 
BitSlicedRangeIndexReader(dataBuffer);
+      BitSlicedRangeIndexReader reader = new 
BitSlicedRangeIndexReader(dataBuffer, metadata);
       int prev = Integer.MIN_VALUE;
       for (int quantile : dataset.quantiles()) {
         ImmutableRoaringBitmap reference = dataset.scan(prev, quantile);
@@ -171,6 +171,8 @@ public class BitSlicedIndexCreatorTest {
       }
       ImmutableRoaringBitmap result = reader.getMatchingDocIds(prev + 1, 
Integer.MAX_VALUE);
       assertTrue(result != null && result.isEmpty());
+      assertEquals(reader.getMatchingDocIds(Integer.MIN_VALUE, 
Integer.MAX_VALUE),
+          dataset.scan(Integer.MIN_VALUE, Integer.MAX_VALUE));
     } finally {
       FileUtils.forceDelete(rangeIndexFile);
     }
@@ -187,7 +189,7 @@ public class BitSlicedIndexCreatorTest {
     }
     File rangeIndexFile = new File(INDEX_DIR, metadata.getColumnName() + 
BITMAP_RANGE_INDEX_FILE_EXTENSION);
     try (PinotDataBuffer dataBuffer = 
PinotDataBuffer.mapReadOnlyBigEndianFile(rangeIndexFile)) {
-      BitSlicedRangeIndexReader reader = new 
BitSlicedRangeIndexReader(dataBuffer);
+      BitSlicedRangeIndexReader reader = new 
BitSlicedRangeIndexReader(dataBuffer, metadata);
       long prev = Long.MIN_VALUE;
       for (long quantile : dataset.quantiles()) {
         ImmutableRoaringBitmap reference = dataset.scan(prev, quantile);
@@ -197,6 +199,8 @@ public class BitSlicedIndexCreatorTest {
       }
       ImmutableRoaringBitmap result = reader.getMatchingDocIds(prev + 1, 
Long.MAX_VALUE);
       assertTrue(result != null && result.isEmpty());
+      assertEquals(reader.getMatchingDocIds(Long.MIN_VALUE, Long.MAX_VALUE),
+          dataset.scan(Long.MIN_VALUE, Long.MAX_VALUE));
     } finally {
       FileUtils.forceDelete(rangeIndexFile);
     }
@@ -213,7 +217,7 @@ public class BitSlicedIndexCreatorTest {
     }
     File rangeIndexFile = new File(INDEX_DIR, metadata.getColumnName() + 
BITMAP_RANGE_INDEX_FILE_EXTENSION);
     try (PinotDataBuffer dataBuffer = 
PinotDataBuffer.mapReadOnlyBigEndianFile(rangeIndexFile)) {
-      BitSlicedRangeIndexReader reader = new 
BitSlicedRangeIndexReader(dataBuffer);
+      BitSlicedRangeIndexReader reader = new 
BitSlicedRangeIndexReader(dataBuffer, metadata);
       float prev = Float.NEGATIVE_INFINITY;
       for (float quantile : dataset.quantiles()) {
         ImmutableRoaringBitmap reference = dataset.scan(prev, quantile);
@@ -223,6 +227,8 @@ public class BitSlicedIndexCreatorTest {
       }
       ImmutableRoaringBitmap result = reader.getMatchingDocIds(prev + 1, 
Float.POSITIVE_INFINITY);
       assertTrue(result != null && result.isEmpty());
+      assertEquals(reader.getMatchingDocIds(Float.NEGATIVE_INFINITY, 
Float.POSITIVE_INFINITY),
+          dataset.scan(Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY));
     } finally {
       FileUtils.forceDelete(rangeIndexFile);
     }
@@ -239,7 +245,7 @@ public class BitSlicedIndexCreatorTest {
     }
     File rangeIndexFile = new File(INDEX_DIR, metadata.getColumnName() + 
BITMAP_RANGE_INDEX_FILE_EXTENSION);
     try (PinotDataBuffer dataBuffer = 
PinotDataBuffer.mapReadOnlyBigEndianFile(rangeIndexFile)) {
-      BitSlicedRangeIndexReader reader = new 
BitSlicedRangeIndexReader(dataBuffer);
+      BitSlicedRangeIndexReader reader = new 
BitSlicedRangeIndexReader(dataBuffer, metadata);
       double prev = Double.NEGATIVE_INFINITY;
       for (double quantile : dataset.quantiles()) {
         ImmutableRoaringBitmap reference = dataset.scan(prev, quantile);
@@ -249,6 +255,8 @@ public class BitSlicedIndexCreatorTest {
       }
       ImmutableRoaringBitmap result = reader.getMatchingDocIds(prev + 1, 
Double.POSITIVE_INFINITY);
       assertTrue(result != null && result.isEmpty());
+      assertEquals(reader.getMatchingDocIds(Double.NEGATIVE_INFINITY, 
Double.POSITIVE_INFINITY),
+          dataset.scan(Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY));
     } finally {
       FileUtils.forceDelete(rangeIndexFile);
     }
diff --git a/pom.xml b/pom.xml
index 539243e..168ec2a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -436,7 +436,7 @@
       <dependency>
         <groupId>org.roaringbitmap</groupId>
         <artifactId>RoaringBitmap</artifactId>
-        <version>0.9.21</version>
+        <version>0.9.22</version>
       </dependency>
       <dependency>
         <groupId>com.101tec</groupId>

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org
For additional commands, e-mail: commits-h...@pinot.apache.org

Reply via email to