This is an automated email from the ASF dual-hosted git repository. kishoreg pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push: new af90f79 Improved range queries (#7513) af90f79 is described below commit af90f7972802f03b211c2c22becd8a838c46854b Author: Richard Startin <rich...@startree.ai> AuthorDate: Tue Oct 5 17:08:40 2021 +0100 Improved range queries (#7513) --- LICENSE-binary | 4 +-- .../org/apache/pinot/perf/BenchmarkRangeIndex.java | 15 +++++---- .../index/column/PhysicalColumnIndexContainer.java | 2 +- .../index/readers/BitSlicedRangeIndexReader.java | 39 ++++++++++++++++------ .../index/creator/BitSlicedIndexCreatorTest.java | 16 ++++++--- pom.xml | 2 +- 6 files changed, 54 insertions(+), 24 deletions(-) diff --git a/LICENSE-binary b/LICENSE-binary index 08ee41e..c8c9569 100644 --- a/LICENSE-binary +++ b/LICENSE-binary @@ -379,8 +379,8 @@ org.eclipse.jetty:jetty-util:9.4.39.v20210325 org.javassist:javassist:3.19.0-GA org.lz4:lz4-java:1.7.1 org.quartz-scheduler:quartz:2.3.2 -org.roaringbitmap:RoaringBitmap:0.9.21 -org.roaringbitmap:shims:0.9.21 +org.roaringbitmap:RoaringBitmap:0.9.22 +org.roaringbitmap:shims:0.9.22 org.webjars:swagger-ui:3.18.2 org.wildfly.openssl:wildfly-openssl:1.0.7.Final org.xerial.larray:larray-buffer:0.4.1 diff --git a/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkRangeIndex.java b/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkRangeIndex.java index ae02607..25079ca 100644 --- a/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkRangeIndex.java +++ b/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkRangeIndex.java @@ -196,7 +196,7 @@ public class BenchmarkRangeIndex { @TearDown(Level.Trial) public void tearDown() throws IOException { - FileUtils.forceDelete(_indexDir); + FileUtils.deleteQuietly(_indexDir); } protected Comparable<?> max() { @@ -378,19 +378,22 @@ public class BenchmarkRangeIndex { public void setup() throws IOException { super.setup(); - _reader = new BitSlicedRangeIndexReader(_buffer); + _reader = new BitSlicedRangeIndexReader(_buffer, metadata()); } - @Override - protected RawValueBasedInvertedIndexCreator newCreator() { - ColumnMetadata metadata = new ColumnMetadataImpl.Builder() + private ColumnMetadata metadata() { + return new ColumnMetadataImpl.Builder() .setFieldSpec(_fieldSpec) .setTotalDocs(_numDocs) .setHasDictionary(false) .setMaxValue(max()) .setMinValue(min()) .build(); - return new BitSlicedRangeIndexCreator(_indexDir, metadata); + } + + @Override + protected RawValueBasedInvertedIndexCreator newCreator() { + return new BitSlicedRangeIndexCreator(_indexDir, metadata()); } } diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/column/PhysicalColumnIndexContainer.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/column/PhysicalColumnIndexContainer.java index 49c7e2f..9ab1e7d 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/column/PhysicalColumnIndexContainer.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/column/PhysicalColumnIndexContainer.java @@ -183,7 +183,7 @@ public final class PhysicalColumnIndexContainer implements ColumnIndexContainer if (version == RangeIndexCreator.VERSION) { _rangeIndex = new RangeIndexReaderImpl(buffer); } else if (version == BitSlicedRangeIndexCreator.VERSION) { - _rangeIndex = new BitSlicedRangeIndexReader(buffer); + _rangeIndex = new BitSlicedRangeIndexReader(buffer, metadata); } else { LOGGER.warn("Unknown range index version: {}, skip loading range index for column: {}", version, metadata.getColumnName()); diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/BitSlicedRangeIndexReader.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/BitSlicedRangeIndexReader.java index 032b4c3..41d34e1 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/BitSlicedRangeIndexReader.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/BitSlicedRangeIndexReader.java @@ -23,11 +23,13 @@ import java.nio.ByteBuffer; import javax.annotation.Nullable; import org.apache.pinot.segment.local.segment.creator.impl.inv.BitSlicedRangeIndexCreator; import org.apache.pinot.segment.local.utils.FPOrdering; +import org.apache.pinot.segment.spi.ColumnMetadata; import org.apache.pinot.segment.spi.index.reader.RangeIndexReader; import org.apache.pinot.segment.spi.memory.PinotDataBuffer; import org.roaringbitmap.RangeBitmap; import org.roaringbitmap.RoaringBitmap; import org.roaringbitmap.buffer.ImmutableRoaringBitmap; +import org.roaringbitmap.buffer.MutableRoaringBitmap; public class BitSlicedRangeIndexReader implements RangeIndexReader<ImmutableRoaringBitmap> { @@ -35,8 +37,11 @@ public class BitSlicedRangeIndexReader implements RangeIndexReader<ImmutableRoar private final PinotDataBuffer _dataBuffer; private final long _offset; private final long _min; + private final long _max; + private final long _numDocs; - public BitSlicedRangeIndexReader(PinotDataBuffer dataBuffer) { + public BitSlicedRangeIndexReader(PinotDataBuffer dataBuffer, + ColumnMetadata metadata) { _dataBuffer = dataBuffer; long offset = 0; int version = dataBuffer.getInt(offset); @@ -45,30 +50,34 @@ public class BitSlicedRangeIndexReader implements RangeIndexReader<ImmutableRoar _min = dataBuffer.getLong(offset); offset += Long.BYTES; _offset = offset; + _max = metadata.hasDictionary() + ? metadata.getCardinality() - 1 + : ((Number) metadata.getMaxValue()).longValue(); + _numDocs = metadata.getTotalDocs(); } @Nullable @Override public ImmutableRoaringBitmap getMatchingDocIds(int min, int max) { - return queryRangeBitmap(Math.max(min, _min) - _min, max - _min); + return queryRangeBitmap(Math.max(min, _min) - _min, max - _min, _max - _min); } @Nullable @Override public ImmutableRoaringBitmap getMatchingDocIds(long min, long max) { - return queryRangeBitmap(Math.max(min, _min) - _min, max - _min); + return queryRangeBitmap(Math.max(min, _min) - _min, max - _min, _max - _min); } @Nullable @Override public ImmutableRoaringBitmap getMatchingDocIds(float min, float max) { - return queryRangeBitmap(FPOrdering.ordinalOf(min), FPOrdering.ordinalOf(max)); + return queryRangeBitmap(FPOrdering.ordinalOf(min), FPOrdering.ordinalOf(max), 0xFFFFFFFFL); } @Nullable @Override public ImmutableRoaringBitmap getMatchingDocIds(double min, double max) { - return queryRangeBitmap(FPOrdering.ordinalOf(min), FPOrdering.ordinalOf(max)); + return queryRangeBitmap(FPOrdering.ordinalOf(min), FPOrdering.ordinalOf(max), 0xFFFFFFFFFFFFFFFFL); } // this index supports exact matches, so always return null for partial matches @@ -97,12 +106,22 @@ public class BitSlicedRangeIndexReader implements RangeIndexReader<ImmutableRoar return null; } - private ImmutableRoaringBitmap queryRangeBitmap(long min, long max) { + private ImmutableRoaringBitmap queryRangeBitmap(long min, long max, long columnMax) { RangeBitmap rangeBitmap = mapRangeBitmap(); - RoaringBitmap lte = rangeBitmap.lte(max); - RoaringBitmap gte = rangeBitmap.gte(min); - lte.and(gte); - return lte.toMutableRoaringBitmap(); + if (Long.compareUnsigned(max, columnMax) < 0) { + RoaringBitmap lte = rangeBitmap.lte(max); + if (Long.compareUnsigned(min, 0) > 0) { + return rangeBitmap.gte(min, lte).toMutableRoaringBitmap(); + } + return lte.toMutableRoaringBitmap(); + } else { + if (Long.compareUnsigned(min, 0) > 0) { + return rangeBitmap.gte(min).toMutableRoaringBitmap(); + } + MutableRoaringBitmap all = new MutableRoaringBitmap(); + all.add(0, _numDocs); + return all; + } } private RangeBitmap mapRangeBitmap() { diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/BitSlicedIndexCreatorTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/BitSlicedIndexCreatorTest.java index e0a3d52..c06ebee 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/BitSlicedIndexCreatorTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/BitSlicedIndexCreatorTest.java @@ -161,7 +161,7 @@ public class BitSlicedIndexCreatorTest { } File rangeIndexFile = new File(INDEX_DIR, metadata.getColumnName() + BITMAP_RANGE_INDEX_FILE_EXTENSION); try (PinotDataBuffer dataBuffer = PinotDataBuffer.mapReadOnlyBigEndianFile(rangeIndexFile)) { - BitSlicedRangeIndexReader reader = new BitSlicedRangeIndexReader(dataBuffer); + BitSlicedRangeIndexReader reader = new BitSlicedRangeIndexReader(dataBuffer, metadata); int prev = Integer.MIN_VALUE; for (int quantile : dataset.quantiles()) { ImmutableRoaringBitmap reference = dataset.scan(prev, quantile); @@ -171,6 +171,8 @@ public class BitSlicedIndexCreatorTest { } ImmutableRoaringBitmap result = reader.getMatchingDocIds(prev + 1, Integer.MAX_VALUE); assertTrue(result != null && result.isEmpty()); + assertEquals(reader.getMatchingDocIds(Integer.MIN_VALUE, Integer.MAX_VALUE), + dataset.scan(Integer.MIN_VALUE, Integer.MAX_VALUE)); } finally { FileUtils.forceDelete(rangeIndexFile); } @@ -187,7 +189,7 @@ public class BitSlicedIndexCreatorTest { } File rangeIndexFile = new File(INDEX_DIR, metadata.getColumnName() + BITMAP_RANGE_INDEX_FILE_EXTENSION); try (PinotDataBuffer dataBuffer = PinotDataBuffer.mapReadOnlyBigEndianFile(rangeIndexFile)) { - BitSlicedRangeIndexReader reader = new BitSlicedRangeIndexReader(dataBuffer); + BitSlicedRangeIndexReader reader = new BitSlicedRangeIndexReader(dataBuffer, metadata); long prev = Long.MIN_VALUE; for (long quantile : dataset.quantiles()) { ImmutableRoaringBitmap reference = dataset.scan(prev, quantile); @@ -197,6 +199,8 @@ public class BitSlicedIndexCreatorTest { } ImmutableRoaringBitmap result = reader.getMatchingDocIds(prev + 1, Long.MAX_VALUE); assertTrue(result != null && result.isEmpty()); + assertEquals(reader.getMatchingDocIds(Long.MIN_VALUE, Long.MAX_VALUE), + dataset.scan(Long.MIN_VALUE, Long.MAX_VALUE)); } finally { FileUtils.forceDelete(rangeIndexFile); } @@ -213,7 +217,7 @@ public class BitSlicedIndexCreatorTest { } File rangeIndexFile = new File(INDEX_DIR, metadata.getColumnName() + BITMAP_RANGE_INDEX_FILE_EXTENSION); try (PinotDataBuffer dataBuffer = PinotDataBuffer.mapReadOnlyBigEndianFile(rangeIndexFile)) { - BitSlicedRangeIndexReader reader = new BitSlicedRangeIndexReader(dataBuffer); + BitSlicedRangeIndexReader reader = new BitSlicedRangeIndexReader(dataBuffer, metadata); float prev = Float.NEGATIVE_INFINITY; for (float quantile : dataset.quantiles()) { ImmutableRoaringBitmap reference = dataset.scan(prev, quantile); @@ -223,6 +227,8 @@ public class BitSlicedIndexCreatorTest { } ImmutableRoaringBitmap result = reader.getMatchingDocIds(prev + 1, Float.POSITIVE_INFINITY); assertTrue(result != null && result.isEmpty()); + assertEquals(reader.getMatchingDocIds(Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY), + dataset.scan(Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY)); } finally { FileUtils.forceDelete(rangeIndexFile); } @@ -239,7 +245,7 @@ public class BitSlicedIndexCreatorTest { } File rangeIndexFile = new File(INDEX_DIR, metadata.getColumnName() + BITMAP_RANGE_INDEX_FILE_EXTENSION); try (PinotDataBuffer dataBuffer = PinotDataBuffer.mapReadOnlyBigEndianFile(rangeIndexFile)) { - BitSlicedRangeIndexReader reader = new BitSlicedRangeIndexReader(dataBuffer); + BitSlicedRangeIndexReader reader = new BitSlicedRangeIndexReader(dataBuffer, metadata); double prev = Double.NEGATIVE_INFINITY; for (double quantile : dataset.quantiles()) { ImmutableRoaringBitmap reference = dataset.scan(prev, quantile); @@ -249,6 +255,8 @@ public class BitSlicedIndexCreatorTest { } ImmutableRoaringBitmap result = reader.getMatchingDocIds(prev + 1, Double.POSITIVE_INFINITY); assertTrue(result != null && result.isEmpty()); + assertEquals(reader.getMatchingDocIds(Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY), + dataset.scan(Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY)); } finally { FileUtils.forceDelete(rangeIndexFile); } diff --git a/pom.xml b/pom.xml index 539243e..168ec2a 100644 --- a/pom.xml +++ b/pom.xml @@ -436,7 +436,7 @@ <dependency> <groupId>org.roaringbitmap</groupId> <artifactId>RoaringBitmap</artifactId> - <version>0.9.21</version> + <version>0.9.22</version> </dependency> <dependency> <groupId>com.101tec</groupId> --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org