This is an automated email from the ASF dual-hosted git repository. richardstartin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push: new 6eff17754a Enable range index on raw columns and push counts down to range index when v2 is in use (#8513) 6eff17754a is described below commit 6eff17754a0b0ffdfb8670b54b5ada38961b7917 Author: Richard Startin <rich...@startree.ai> AuthorDate: Tue Apr 19 21:04:10 2022 +0100 Enable range index on raw columns and push counts down to range index when v2 is in use (#8513) * push counts down to range index when v2 is in use * enable range indexes on raw columns * add query tests for range indexes, cleanup RangeIndexBasedFilterOperator * fix method name --- .../filter/RangeIndexBasedFilterOperator.java | 305 ++++++++++++++++----- .../predicate/RangePredicateEvaluatorFactory.java | 8 +- .../pinot/queries/FastFilteredCountTest.java | 19 +- .../org/apache/pinot/queries/RangeQueriesTest.java | 201 ++++++++++++++ .../index/column/PhysicalColumnIndexContainer.java | 16 +- .../index/readers/BitSlicedRangeIndexReader.java | 39 ++- .../index/readers/RangeIndexReaderImpl.java | 41 +++ .../index/creator/BitSlicedIndexCreatorTest.java | 18 +- .../segment/spi/index/reader/RangeIndexReader.java | 46 ++++ 9 files changed, 598 insertions(+), 95 deletions(-) diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/RangeIndexBasedFilterOperator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/RangeIndexBasedFilterOperator.java index 44f9d72fbd..61494b0347 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/RangeIndexBasedFilterOperator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/RangeIndexBasedFilterOperator.java @@ -44,95 +44,77 @@ public class RangeIndexBasedFilterOperator extends BaseFilterOperator { private static final String EXPLAIN_NAME = "FILTER_RANGE_INDEX"; - // NOTE: Range index can only apply to dictionary-encoded columns for now - // TODO: Support raw index columns + private final RangeEvaluator _rangeEvaluator; private final PredicateEvaluator _rangePredicateEvaluator; private final DataSource _dataSource; private final int _numDocs; + @SuppressWarnings("unchecked") public RangeIndexBasedFilterOperator(PredicateEvaluator rangePredicateEvaluator, DataSource dataSource, int numDocs) { _rangePredicateEvaluator = rangePredicateEvaluator; + _rangeEvaluator = RangeEvaluator.of((RangeIndexReader<ImmutableRoaringBitmap>) dataSource.getRangeIndex(), + rangePredicateEvaluator); _dataSource = dataSource; _numDocs = numDocs; } @Override protected FilterBlock getNextBlock() { - @SuppressWarnings("unchecked") - RangeIndexReader<ImmutableRoaringBitmap> rangeIndexReader = - (RangeIndexReader<ImmutableRoaringBitmap>) _dataSource.getRangeIndex(); - assert rangeIndexReader != null; + if (_rangeEvaluator.isExact()) { + ImmutableRoaringBitmap matches = _rangeEvaluator.getMatchingDocIds(); + recordFilter(matches); + return new FilterBlock(new BitmapDocIdSet(matches, _numDocs)); + } + return evaluateLegacyRangeFilter(); + } - ImmutableRoaringBitmap matches; + private FilterBlock evaluateLegacyRangeFilter() { + ImmutableRoaringBitmap matches = _rangeEvaluator.getMatchingDocIds(); // if the implementation cannot match the entire query exactly, it will // yield partial matches, which need to be verified by scanning. If it // can answer the query exactly, this will be null. - ImmutableRoaringBitmap partialMatches; - int firstRangeId; - int lastRangeId; - if (_rangePredicateEvaluator instanceof SortedDictionaryBasedRangePredicateEvaluator) { - // NOTE: End dictionary id is exclusive in OfflineDictionaryBasedRangePredicateEvaluator. - int startDictId = ((SortedDictionaryBasedRangePredicateEvaluator) _rangePredicateEvaluator).getStartDictId(); - int endDictId = ((SortedDictionaryBasedRangePredicateEvaluator) _rangePredicateEvaluator).getEndDictId() - 1; - matches = rangeIndexReader.getMatchingDocIds(startDictId, endDictId); - partialMatches = rangeIndexReader.getPartiallyMatchingDocIds(startDictId, endDictId); - } else { - switch (_rangePredicateEvaluator.getDataType()) { - case INT: { - int lowerBound = ((IntRawValueBasedRangePredicateEvaluator) _rangePredicateEvaluator).geLowerBound(); - int upperBound = ((IntRawValueBasedRangePredicateEvaluator) _rangePredicateEvaluator).getUpperBound(); - matches = rangeIndexReader.getMatchingDocIds(lowerBound, upperBound); - partialMatches = rangeIndexReader.getPartiallyMatchingDocIds(lowerBound, upperBound); - break; - } - case LONG: { - long lowerBound = ((LongRawValueBasedRangePredicateEvaluator) _rangePredicateEvaluator).geLowerBound(); - long upperBound = ((LongRawValueBasedRangePredicateEvaluator) _rangePredicateEvaluator).getUpperBound(); - matches = rangeIndexReader.getMatchingDocIds(lowerBound, upperBound); - partialMatches = rangeIndexReader.getPartiallyMatchingDocIds(lowerBound, upperBound); - break; - } - case FLOAT: { - float lowerBound = ((FloatRawValueBasedRangePredicateEvaluator) _rangePredicateEvaluator).geLowerBound(); - float upperBound = ((FloatRawValueBasedRangePredicateEvaluator) _rangePredicateEvaluator).getUpperBound(); - matches = rangeIndexReader.getMatchingDocIds(lowerBound, upperBound); - partialMatches = rangeIndexReader.getPartiallyMatchingDocIds(lowerBound, upperBound); - break; - } - case DOUBLE: { - double lowerBound = ((DoubleRawValueBasedRangePredicateEvaluator) _rangePredicateEvaluator).geLowerBound(); - double upperBound = ((DoubleRawValueBasedRangePredicateEvaluator) _rangePredicateEvaluator).getUpperBound(); - matches = rangeIndexReader.getMatchingDocIds(lowerBound, upperBound); - partialMatches = rangeIndexReader.getPartiallyMatchingDocIds(lowerBound, upperBound); - break; - } - default: - throw new IllegalStateException("String and Bytes data type not supported for Range Indexing"); - } - } + ImmutableRoaringBitmap partialMatches = _rangeEvaluator.getPartiallyMatchingDocIds(); // this branch is likely until RangeIndexReader reimplemented and enabled by default - if (partialMatches != null) { - // Need to scan the first and last range as they might be partially matched - ScanBasedFilterOperator scanBasedFilterOperator = - new ScanBasedFilterOperator(_rangePredicateEvaluator, _dataSource, _numDocs); - FilterBlockDocIdSet scanBasedDocIdSet = scanBasedFilterOperator.getNextBlock().getBlockDocIdSet(); - MutableRoaringBitmap docIds = ((ScanBasedDocIdIterator) scanBasedDocIdSet.iterator()).applyAnd(partialMatches); - if (matches != null) { - docIds.or(matches); - } - return new FilterBlock(new BitmapDocIdSet(docIds, _numDocs) { - // Override this method to reflect the entries scanned - @Override - public long getNumEntriesScannedInFilter() { - return scanBasedDocIdSet.getNumEntriesScannedInFilter(); - } - }); - } else { - recordFilter(matches); + if (partialMatches == null) { return new FilterBlock(new BitmapDocIdSet(matches == null ? new MutableRoaringBitmap() : matches, _numDocs)); } + // Need to scan the first and last range as they might be partially matched + ScanBasedFilterOperator scanBasedFilterOperator = + new ScanBasedFilterOperator(_rangePredicateEvaluator, _dataSource, _numDocs); + FilterBlockDocIdSet scanBasedDocIdSet = scanBasedFilterOperator.getNextBlock().getBlockDocIdSet(); + MutableRoaringBitmap docIds = ((ScanBasedDocIdIterator) scanBasedDocIdSet.iterator()).applyAnd(partialMatches); + if (matches != null) { + docIds.or(matches); + } + recordFilter(matches); + return new FilterBlock(new BitmapDocIdSet(docIds, _numDocs) { + // Override this method to reflect the entries scanned + @Override + public long getNumEntriesScannedInFilter() { + return scanBasedDocIdSet.getNumEntriesScannedInFilter(); + } + }); + } + + @Override + public boolean canOptimizeCount() { + return _rangeEvaluator.isExact(); + } + + @Override + public int getNumMatchingDocs() { + return _rangeEvaluator.getNumMatchingDocs(); + } + + @Override + public boolean canProduceBitmaps() { + return _rangeEvaluator.isExact(); } + @Override + public BitmapCollection getBitmaps() { + return new BitmapCollection(_numDocs, false, _rangeEvaluator.getMatchingDocIds()); + } @Override public List<Operator> getChildOperators() { @@ -141,10 +123,189 @@ public class RangeIndexBasedFilterOperator extends BaseFilterOperator { @Override public String toExplainString() { - StringBuilder stringBuilder = new StringBuilder(EXPLAIN_NAME).append("(indexLookUp:range_index"); - stringBuilder.append(",operator:").append(_rangePredicateEvaluator.getPredicateType()); - stringBuilder.append(",predicate:").append(_rangePredicateEvaluator.getPredicate().toString()); - return stringBuilder.append(')').toString(); + return EXPLAIN_NAME + "(indexLookUp:range_index" + + ",operator:" + _rangePredicateEvaluator.getPredicateType() + + ",predicate:" + _rangePredicateEvaluator.getPredicate().toString() + + ')'; + } + + interface RangeEvaluator { + static RangeEvaluator of(RangeIndexReader<ImmutableRoaringBitmap> rangeIndexReader, + PredicateEvaluator predicateEvaluator) { + if (predicateEvaluator instanceof SortedDictionaryBasedRangePredicateEvaluator) { + return new IntRangeEvaluator(rangeIndexReader, + (SortedDictionaryBasedRangePredicateEvaluator) predicateEvaluator); + } else { + switch (predicateEvaluator.getDataType()) { + case INT: + return new IntRangeEvaluator(rangeIndexReader, + (IntRawValueBasedRangePredicateEvaluator) predicateEvaluator); + case LONG: + return new LongRangeEvaluator(rangeIndexReader, + (LongRawValueBasedRangePredicateEvaluator) predicateEvaluator); + case FLOAT: + return new FloatRangeEvaluator(rangeIndexReader, + (FloatRawValueBasedRangePredicateEvaluator) predicateEvaluator); + case DOUBLE: + return new DoubleRangeEvaluator(rangeIndexReader, + (DoubleRawValueBasedRangePredicateEvaluator) predicateEvaluator); + default: + throw new IllegalStateException("String and Bytes data type not supported for Range Indexing"); + } + } + } + + ImmutableRoaringBitmap getMatchingDocIds(); + + ImmutableRoaringBitmap getPartiallyMatchingDocIds(); + + int getNumMatchingDocs(); + + boolean isExact(); + } + + private static final class IntRangeEvaluator implements RangeEvaluator { + final RangeIndexReader<ImmutableRoaringBitmap> _rangeIndexReader; + final int _min; + final int _max; + + private IntRangeEvaluator(RangeIndexReader<ImmutableRoaringBitmap> rangeIndexReader, int min, int max) { + _rangeIndexReader = rangeIndexReader; + _min = min; + _max = max; + } + + IntRangeEvaluator( + RangeIndexReader<ImmutableRoaringBitmap> rangeIndexReader, + IntRawValueBasedRangePredicateEvaluator predicateEvaluator) { + this(rangeIndexReader, predicateEvaluator.getLowerBound(), predicateEvaluator.getUpperBound()); + } + + IntRangeEvaluator( + RangeIndexReader<ImmutableRoaringBitmap> rangeIndexReader, + SortedDictionaryBasedRangePredicateEvaluator predicateEvaluator) { + // NOTE: End dictionary id is exclusive in OfflineDictionaryBasedRangePredicateEvaluator. + this(rangeIndexReader, predicateEvaluator.getStartDictId(), predicateEvaluator.getEndDictId() - 1); + } + + @Override + public ImmutableRoaringBitmap getMatchingDocIds() { + return _rangeIndexReader.getMatchingDocIds(_min, _max); + } + + @Override + public ImmutableRoaringBitmap getPartiallyMatchingDocIds() { + return _rangeIndexReader.getPartiallyMatchingDocIds(_min, _max); + } + + @Override + public int getNumMatchingDocs() { + return _rangeIndexReader.getNumMatchingDocs(_min, _max); + } + + @Override + public boolean isExact() { + return _rangeIndexReader.isExact(); + } + } + + private static final class LongRangeEvaluator implements RangeEvaluator { + final RangeIndexReader<ImmutableRoaringBitmap> _rangeIndexReader; + final long _min; + final long _max; + + LongRangeEvaluator(RangeIndexReader<ImmutableRoaringBitmap> rangeIndexReader, + LongRawValueBasedRangePredicateEvaluator predicateEvaluator) { + _rangeIndexReader = rangeIndexReader; + _min = predicateEvaluator.getLowerBound(); + _max = predicateEvaluator.getUpperBound(); + } + + @Override + public ImmutableRoaringBitmap getMatchingDocIds() { + return _rangeIndexReader.getMatchingDocIds(_min, _max); + } + + @Override + public ImmutableRoaringBitmap getPartiallyMatchingDocIds() { + return _rangeIndexReader.getPartiallyMatchingDocIds(_min, _max); + } + + @Override + public int getNumMatchingDocs() { + return _rangeIndexReader.getNumMatchingDocs(_min, _max); + } + + @Override + public boolean isExact() { + return _rangeIndexReader.isExact(); + } + } + + private static final class FloatRangeEvaluator implements RangeEvaluator { + final RangeIndexReader<ImmutableRoaringBitmap> _rangeIndexReader; + final float _min; + final float _max; + + FloatRangeEvaluator(RangeIndexReader<ImmutableRoaringBitmap> rangeIndexReader, + FloatRawValueBasedRangePredicateEvaluator predicateEvaluator) { + _rangeIndexReader = rangeIndexReader; + _min = predicateEvaluator.getLowerBound(); + _max = predicateEvaluator.getUpperBound(); + } + + @Override + public ImmutableRoaringBitmap getMatchingDocIds() { + return _rangeIndexReader.getMatchingDocIds(_min, _max); + } + + @Override + public ImmutableRoaringBitmap getPartiallyMatchingDocIds() { + return _rangeIndexReader.getPartiallyMatchingDocIds(_min, _max); + } + + @Override + public int getNumMatchingDocs() { + return _rangeIndexReader.getNumMatchingDocs(_min, _max); + } + + @Override + public boolean isExact() { + return _rangeIndexReader.isExact(); + } + } + + private static final class DoubleRangeEvaluator implements RangeEvaluator { + final RangeIndexReader<ImmutableRoaringBitmap> _rangeIndexReader; + final double _min; + final double _max; + + DoubleRangeEvaluator(RangeIndexReader<ImmutableRoaringBitmap> rangeIndexReader, + DoubleRawValueBasedRangePredicateEvaluator predicateEvaluator) { + _rangeIndexReader = rangeIndexReader; + _min = predicateEvaluator.getLowerBound(); + _max = predicateEvaluator.getUpperBound(); + } + + @Override + public ImmutableRoaringBitmap getMatchingDocIds() { + return _rangeIndexReader.getMatchingDocIds(_min, _max); + } + + @Override + public ImmutableRoaringBitmap getPartiallyMatchingDocIds() { + return _rangeIndexReader.getPartiallyMatchingDocIds(_min, _max); + } + + @Override + public int getNumMatchingDocs() { + return _rangeIndexReader.getNumMatchingDocs(_min, _max); + } + + @Override + public boolean isExact() { + return _rangeIndexReader.isExact(); + } } private void recordFilter(ImmutableRoaringBitmap bitmap) { diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/RangePredicateEvaluatorFactory.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/RangePredicateEvaluatorFactory.java index 9bf184e27c..437749890b 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/RangePredicateEvaluatorFactory.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/RangePredicateEvaluatorFactory.java @@ -280,7 +280,7 @@ public class RangePredicateEvaluatorFactory { _upperInclusive = upperInclusive; } - public int geLowerBound() { + public int getLowerBound() { return _lowerBound; } @@ -325,7 +325,7 @@ public class RangePredicateEvaluatorFactory { _upperInclusive = upperInclusive; } - public long geLowerBound() { + public long getLowerBound() { return _lowerBound; } @@ -370,7 +370,7 @@ public class RangePredicateEvaluatorFactory { _upperInclusive = upperInclusive; } - public float geLowerBound() { + public float getLowerBound() { return _lowerBound; } @@ -415,7 +415,7 @@ public class RangePredicateEvaluatorFactory { _upperInclusive = upperInclusive; } - public double geLowerBound() { + public double getLowerBound() { return _lowerBound; } diff --git a/pinot-core/src/test/java/org/apache/pinot/queries/FastFilteredCountTest.java b/pinot-core/src/test/java/org/apache/pinot/queries/FastFilteredCountTest.java index 7771476460..19bcc301be 100644 --- a/pinot-core/src/test/java/org/apache/pinot/queries/FastFilteredCountTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/queries/FastFilteredCountTest.java @@ -64,12 +64,14 @@ public class FastFilteredCountTest extends BaseQueriesTest { private static final String CLASSIFICATION_COLUMN = "class"; private static final String TEXT_COLUMN = "textCol"; private static final String JSON_COLUMN = "jsonCol"; + private static final String INT_RANGE_COLUMN = "intRangeCol"; private static final Schema SCHEMA = new Schema.SchemaBuilder() .addSingleValueDimension(SORTED_COLUMN, FieldSpec.DataType.INT) .addSingleValueDimension(CLASSIFICATION_COLUMN, FieldSpec.DataType.INT) .addSingleValueDimension(TEXT_COLUMN, FieldSpec.DataType.STRING) .addSingleValueDimension(JSON_COLUMN, FieldSpec.DataType.JSON) + .addSingleValueDimension(INT_RANGE_COLUMN, FieldSpec.DataType.INT) .build(); private static final TableConfig TABLE_CONFIG = @@ -108,6 +110,7 @@ public class FastFilteredCountTest extends BaseQueriesTest { record.putValue(SORTED_COLUMN, i); record.putValue(TEXT_COLUMN, "text" + (i % BUCKET_SIZE)); record.putValue(JSON_COLUMN, "{\"field\":" + (i % BUCKET_SIZE) + "}"); + record.putValue(INT_RANGE_COLUMN, NUM_RECORDS - i); records.add(record); } @@ -124,6 +127,7 @@ public class FastFilteredCountTest extends BaseQueriesTest { indexLoadingConfig.setInvertedIndexColumns(new HashSet<>(Arrays.asList(CLASSIFICATION_COLUMN, SORTED_COLUMN))); indexLoadingConfig.setTextIndexColumns(Collections.singleton(TEXT_COLUMN)); indexLoadingConfig.setJsonIndexColumns(Collections.singleton(JSON_COLUMN)); + indexLoadingConfig.setRangeIndexColumns(Collections.singleton(INT_RANGE_COLUMN)); ImmutableSegment immutableSegment = ImmutableSegmentLoader.load(new File(INDEX_DIR, SEGMENT_NAME), indexLoadingConfig); @@ -284,7 +288,20 @@ public class FastFilteredCountTest extends BaseQueriesTest { + " where " + SORTED_COLUMN + " >= 500" + " and " + CLASSIFICATION_COLUMN + " <> 0" + " and not JSON_MATCH(" + JSON_COLUMN + ", '\"$.field\"=0')" - + " and not TEXT_MATCH(" + TEXT_COLUMN + ", 'text0')", bucketCountComplement / 2 + 1} + + " and not TEXT_MATCH(" + TEXT_COLUMN + ", 'text0')", bucketCountComplement / 2 + 1}, + {"select count(*) from " + RAW_TABLE_NAME + + " where " + INT_RANGE_COLUMN + " >= " + min + + " and " + INT_RANGE_COLUMN + " < " + max, max - min}, + {"select count(*) from " + RAW_TABLE_NAME + + " where " + INT_RANGE_COLUMN + " < " + max, max - 1}, + {"select count(*) from " + RAW_TABLE_NAME + + " where " + INT_RANGE_COLUMN + " not between " + min + " and " + max, NUM_RECORDS - max + min - 1}, + {"select count(*) from " + RAW_TABLE_NAME + + " where " + INT_RANGE_COLUMN + " between " + min + " and " + max + + " and " + CLASSIFICATION_COLUMN + " = 0", bucketCount - (min + NUM_RECORDS - max) / BUCKET_SIZE}, + {"select count(*) from " + RAW_TABLE_NAME + + " where " + INT_RANGE_COLUMN + " not between " + min + " and " + max + + " and " + CLASSIFICATION_COLUMN + " = 0", (min + NUM_RECORDS - max) / BUCKET_SIZE} }; } diff --git a/pinot-core/src/test/java/org/apache/pinot/queries/RangeQueriesTest.java b/pinot-core/src/test/java/org/apache/pinot/queries/RangeQueriesTest.java new file mode 100644 index 0000000000..58e31a4ec2 --- /dev/null +++ b/pinot-core/src/test/java/org/apache/pinot/queries/RangeQueriesTest.java @@ -0,0 +1,201 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.queries; + +import java.io.File; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Objects; +import org.apache.commons.io.FileUtils; +import org.apache.pinot.core.common.Operator; +import org.apache.pinot.core.operator.query.FastFilteredCountOperator; +import org.apache.pinot.core.operator.query.SelectionOnlyOperator; +import org.apache.pinot.segment.local.indexsegment.immutable.ImmutableSegmentLoader; +import org.apache.pinot.segment.local.segment.creator.impl.SegmentIndexCreationDriverImpl; +import org.apache.pinot.segment.local.segment.index.loader.IndexLoadingConfig; +import org.apache.pinot.segment.local.segment.readers.GenericRowRecordReader; +import org.apache.pinot.segment.spi.ImmutableSegment; +import org.apache.pinot.segment.spi.IndexSegment; +import org.apache.pinot.segment.spi.creator.SegmentGeneratorConfig; +import org.apache.pinot.spi.config.table.TableConfig; +import org.apache.pinot.spi.config.table.TableType; +import org.apache.pinot.spi.data.FieldSpec; +import org.apache.pinot.spi.data.Schema; +import org.apache.pinot.spi.data.readers.GenericRow; +import org.apache.pinot.spi.utils.builder.TableConfigBuilder; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertTrue; + + +public class RangeQueriesTest extends BaseQueriesTest { + + private static final File INDEX_DIR = new File(FileUtils.getTempDirectory(), "RangeQueriesTest"); + private static final String RAW_TABLE_NAME = "testTable"; + private static final String SEGMENT_NAME = "testSegment"; + + private static final int NUM_RECORDS = 1000; + private static final int MAX_VALUE = NUM_RECORDS * 100; + private static final String DICTIONARIZED_INT_COL = "dictionarizedIntCol"; + private static final String RAW_INT_COL = "rawIntCol"; + private static final String RAW_LONG_COL = "rawLongCol"; + private static final String RAW_FLOAT_COL = "rawFloatCol"; + private static final String RAW_DOUBLE_COL = "rawDoubleCol"; + + private static final Schema SCHEMA = new Schema.SchemaBuilder() + .addSingleValueDimension(DICTIONARIZED_INT_COL, FieldSpec.DataType.INT) + .addSingleValueDimension(RAW_INT_COL, FieldSpec.DataType.INT) + .addSingleValueDimension(RAW_LONG_COL, FieldSpec.DataType.LONG) + .addSingleValueDimension(RAW_FLOAT_COL, FieldSpec.DataType.FLOAT) + .addSingleValueDimension(RAW_DOUBLE_COL, FieldSpec.DataType.DOUBLE) + .build(); + + private static final TableConfig TABLE_CONFIG = + new TableConfigBuilder(TableType.OFFLINE).setTableName(RAW_TABLE_NAME) + .setNoDictionaryColumns(Arrays.asList(RAW_INT_COL, RAW_LONG_COL, RAW_FLOAT_COL, RAW_DOUBLE_COL)) + .setRangeIndexColumns(Arrays.asList(DICTIONARIZED_INT_COL, RAW_INT_COL, RAW_LONG_COL, RAW_FLOAT_COL, + RAW_DOUBLE_COL)) + .build(); + + private IndexSegment _indexSegment; + private List<IndexSegment> _indexSegments; + + @Override + protected String getFilter() { + return ""; + } + + @Override + protected IndexSegment getIndexSegment() { + return _indexSegment; + } + + @Override + protected List<IndexSegment> getIndexSegments() { + return _indexSegments; + } + + @BeforeClass + public void setUp() + throws Exception { + FileUtils.deleteQuietly(INDEX_DIR); + List<GenericRow> records = new ArrayList<>(NUM_RECORDS); + for (int i = 0; i < NUM_RECORDS; i++) { + GenericRow record = new GenericRow(); + int intValue = ((MAX_VALUE + NUM_RECORDS / 2) - (i * 100)) % MAX_VALUE; + record.putValue(DICTIONARIZED_INT_COL, intValue); + record.putValue(RAW_INT_COL, intValue); + record.putValue(RAW_LONG_COL, (long) intValue); + record.putValue(RAW_FLOAT_COL, (float) intValue); + record.putValue(RAW_DOUBLE_COL, (double) intValue); + records.add(record); + } + + SegmentGeneratorConfig segmentGeneratorConfig = new SegmentGeneratorConfig(TABLE_CONFIG, SCHEMA); + segmentGeneratorConfig.setTableName(RAW_TABLE_NAME); + segmentGeneratorConfig.setSegmentName(SEGMENT_NAME); + segmentGeneratorConfig.setOutDir(INDEX_DIR.getPath()); + + SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl(); + driver.init(segmentGeneratorConfig, new GenericRowRecordReader(records)); + driver.build(); + + IndexLoadingConfig indexLoadingConfig = new IndexLoadingConfig(); + indexLoadingConfig.setRangeIndexColumns( + new HashSet<>(Arrays.asList(DICTIONARIZED_INT_COL, RAW_INT_COL, RAW_LONG_COL, RAW_FLOAT_COL, RAW_DOUBLE_COL))); + + ImmutableSegment immutableSegment = + ImmutableSegmentLoader.load(new File(INDEX_DIR, SEGMENT_NAME), indexLoadingConfig); + _indexSegment = immutableSegment; + _indexSegments = Arrays.asList(immutableSegment, immutableSegment); + } + + @DataProvider + public static Object[][] selectionTestCases() { + return new Object[][]{ + {buildSelectionQuery(DICTIONARIZED_INT_COL, 250, 500), 250, 500}, + {buildSelectionQuery(RAW_INT_COL, 250, 500), 250, 500}, + {buildSelectionQuery(RAW_LONG_COL, 250, 500), 250, 500}, + {buildSelectionQuery(RAW_FLOAT_COL, 250, 500), 250, 500}, + {buildSelectionQuery(RAW_DOUBLE_COL, 250, 500), 250, 500}, + }; + } + + private static String buildSelectionQuery(String filterCol, Number min, Number max) { + return "select " + RAW_INT_COL + " from " + RAW_TABLE_NAME + " where " + filterCol + " between " + + buildFilter(filterCol, min, max); + } + + @DataProvider + public static Object[][] countTestCases() { + return new Object[][]{ + {buildCountQuery(DICTIONARIZED_INT_COL, 250, 500), 3}, + {buildCountQuery(RAW_INT_COL, 250, 500), 3}, + {buildCountQuery(RAW_LONG_COL, 250, 500), 3}, + {buildCountQuery(RAW_FLOAT_COL, 250, 500), 3}, + {buildCountQuery(RAW_DOUBLE_COL, 250, 500), 3}, + }; + } + + private static String buildCountQuery(String filterCol, Number min, Number max) { + return "select count(*) from " + RAW_TABLE_NAME + " where " + filterCol + " between " + + buildFilter(filterCol, min, max); + } + + private static String buildFilter(String filterCol, Number min, Number max) { + switch (filterCol) { + case DICTIONARIZED_INT_COL: + case RAW_INT_COL: + case RAW_LONG_COL: + return min.intValue() + " and " + max.intValue(); + case RAW_FLOAT_COL: + case RAW_DOUBLE_COL: + return min.doubleValue() + " and " + max.doubleValue(); + default: + throw new AssertionError("unexpected column: " + filterCol); + } + } + + @Test(dataProvider = "selectionTestCases") + public void testSelectionOverRangeFilter(String query, int min, int max) { + Operator<?> operator = getOperatorForSqlQuery(query); + assertTrue(operator instanceof SelectionOnlyOperator); + for (Object[] row : Objects.requireNonNull(((SelectionOnlyOperator) operator).nextBlock().getSelectionResult())) { + int value = (int) row[0]; + assertTrue(value >= min); + assertTrue(value <= max); + } + } + + @Test(dataProvider = "countTestCases") + public void testCountOverRangeFilter(String query, int expectedCount) { + Operator<?> operator = getOperatorForSqlQuery(query); + assertTrue(operator instanceof FastFilteredCountOperator); + List<Object> aggregationResult = ((FastFilteredCountOperator) operator).nextBlock().getAggregationResult(); + assertNotNull(aggregationResult); + assertEquals(aggregationResult.size(), 1); + assertEquals(((Number) aggregationResult.get(0)).intValue(), expectedCount, query); + } +} diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/column/PhysicalColumnIndexContainer.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/column/PhysicalColumnIndexContainer.java index 591d9ac24a..139a114856 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/column/PhysicalColumnIndexContainer.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/column/PhysicalColumnIndexContainer.java @@ -123,6 +123,13 @@ public final class PhysicalColumnIndexContainer implements ColumnIndexContainer _bloomFilter = null; } + if (loadRangeIndex) { + PinotDataBuffer buffer = segmentReader.getIndexFor(columnName, ColumnIndexType.RANGE_INDEX); + _rangeIndex = indexReaderProvider.newRangeIndexReader(buffer, metadata); + } else { + _rangeIndex = null; + } + PinotDataBuffer fwdIndexBuffer = segmentReader.getIndexFor(columnName, ColumnIndexType.FORWARD_INDEX); if (metadata.hasDictionary()) { // Dictionary-based index @@ -135,7 +142,6 @@ public final class PhysicalColumnIndexContainer implements ColumnIndexContainer SortedIndexReader<?> sortedIndexReader = indexReaderProvider.newSortedIndexReader(fwdIndexBuffer, metadata); _forwardIndex = sortedIndexReader; _invertedIndex = sortedIndexReader; - _rangeIndex = null; _fstIndex = null; return; } @@ -154,18 +160,10 @@ public final class PhysicalColumnIndexContainer implements ColumnIndexContainer } else { _fstIndex = null; } - - if (loadRangeIndex) { - PinotDataBuffer buffer = segmentReader.getIndexFor(columnName, ColumnIndexType.RANGE_INDEX); - _rangeIndex = indexReaderProvider.newRangeIndexReader(buffer, metadata); - } else { - _rangeIndex = null; - } } else { // Raw index _forwardIndex = indexReaderProvider.newForwardIndexReader(fwdIndexBuffer, metadata); _dictionary = null; - _rangeIndex = null; _invertedIndex = null; _fstIndex = null; } diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/BitSlicedRangeIndexReader.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/BitSlicedRangeIndexReader.java index b0d2a08521..b85ea612d3 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/BitSlicedRangeIndexReader.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/BitSlicedRangeIndexReader.java @@ -55,25 +55,41 @@ public class BitSlicedRangeIndexReader implements RangeIndexReader<ImmutableRoar _numDocs = metadata.getTotalDocs(); } - @Nullable + @Override + public int getNumMatchingDocs(int min, int max) { + return queryRangeBitmapCardinality(Math.max(min, _min) - _min, max - _min, _max - _min); + } + + @Override + public int getNumMatchingDocs(long min, long max) { + return queryRangeBitmapCardinality(Math.max(min, _min) - _min, max - _min, _max - _min); + } + + @Override + public int getNumMatchingDocs(float min, float max) { + return queryRangeBitmapCardinality(FPOrdering.ordinalOf(min), FPOrdering.ordinalOf(max), 0xFFFFFFFFL); + } + + @Override + public int getNumMatchingDocs(double min, double max) { + return queryRangeBitmapCardinality(FPOrdering.ordinalOf(min), FPOrdering.ordinalOf(max), 0xFFFFFFFFFFFFFFFFL); + } + @Override public ImmutableRoaringBitmap getMatchingDocIds(int min, int max) { return queryRangeBitmap(Math.max(min, _min) - _min, max - _min, _max - _min); } - @Nullable @Override public ImmutableRoaringBitmap getMatchingDocIds(long min, long max) { return queryRangeBitmap(Math.max(min, _min) - _min, max - _min, _max - _min); } - @Nullable @Override public ImmutableRoaringBitmap getMatchingDocIds(float min, float max) { return queryRangeBitmap(FPOrdering.ordinalOf(min), FPOrdering.ordinalOf(max), 0xFFFFFFFFL); } - @Nullable @Override public ImmutableRoaringBitmap getMatchingDocIds(double min, double max) { return queryRangeBitmap(FPOrdering.ordinalOf(min), FPOrdering.ordinalOf(max), 0xFFFFFFFFFFFFFFFFL); @@ -122,6 +138,21 @@ public class BitSlicedRangeIndexReader implements RangeIndexReader<ImmutableRoar } } + private int queryRangeBitmapCardinality(long min, long max, long columnMax) { + RangeBitmap rangeBitmap = mapRangeBitmap(); + if (Long.compareUnsigned(max, columnMax) < 0) { + if (Long.compareUnsigned(min, 0) > 0) { + return (int) rangeBitmap.betweenCardinality(min, max); + } + return (int) rangeBitmap.lteCardinality(max); + } else { + if (Long.compareUnsigned(min, 0) > 0) { + return (int) rangeBitmap.gteCardinality(min); + } + return (int) _numDocs; + } + } + private RangeBitmap mapRangeBitmap() { // note that this is a very cheap operation, no deserialization is required ByteBuffer buffer = _dataBuffer.toDirectByteBuffer(_offset, (int) (_dataBuffer.size() - _offset)); diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/RangeIndexReaderImpl.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/RangeIndexReaderImpl.java index 510fa80e07..c6211ad71c 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/RangeIndexReaderImpl.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/RangeIndexReaderImpl.java @@ -103,6 +103,47 @@ public class RangeIndexReaderImpl implements RangeIndexReader<ImmutableRoaringBi } } + @Override + public boolean isExact() { + return false; + } + + /** + * {@inheritDoc} + */ + @Override + public int getNumMatchingDocs(int min, int max) { + ImmutableRoaringBitmap matching = getMatchingDocIds(min, max); + return matching == null ? 0 : matching.getCardinality(); + } + + /** + * {@inheritDoc} + */ + @Override + public int getNumMatchingDocs(long min, long max) { + ImmutableRoaringBitmap matching = getMatchingDocIds(min, max); + return matching == null ? 0 : matching.getCardinality(); + } + + /** + * {@inheritDoc} + */ + @Override + public int getNumMatchingDocs(float min, float max) { + ImmutableRoaringBitmap matching = getMatchingDocIds(min, max); + return matching == null ? 0 : matching.getCardinality(); + } + + /** + * {@inheritDoc} + */ + @Override + public int getNumMatchingDocs(double min, double max) { + ImmutableRoaringBitmap matching = getMatchingDocIds(min, max); + return matching == null ? 0 : matching.getCardinality(); + } + /** * {@inheritDoc} */ diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/BitSlicedIndexCreatorTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/BitSlicedIndexCreatorTest.java index 0ad213c9c2..cb74ab5ff2 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/BitSlicedIndexCreatorTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/BitSlicedIndexCreatorTest.java @@ -151,7 +151,7 @@ public class BitSlicedIndexCreatorTest { private void testInt(Dataset<int[]> dataset) throws IOException { ColumnMetadata metadata = dataset.toColumnMetadata(); - try (BitSlicedRangeIndexCreator creator = newBitslicedIndexCreator(metadata)) { + try (BitSlicedRangeIndexCreator creator = newBitSlicedIndexCreator(metadata)) { for (int value : dataset.values()) { creator.add(value); } @@ -164,7 +164,9 @@ public class BitSlicedIndexCreatorTest { for (int quantile : dataset.quantiles()) { ImmutableRoaringBitmap reference = dataset.scan(prev, quantile); ImmutableRoaringBitmap result = reader.getMatchingDocIds(prev, quantile); + int resultCount = reader.getNumMatchingDocs(prev, quantile); assertEquals(result, reference); + assertEquals(resultCount, result == null ? 0 : result.getCardinality()); prev = quantile; } ImmutableRoaringBitmap result = reader.getMatchingDocIds(prev + 1, Integer.MAX_VALUE); @@ -179,7 +181,7 @@ public class BitSlicedIndexCreatorTest { private void testLong(Dataset<long[]> dataset) throws IOException { ColumnMetadata metadata = dataset.toColumnMetadata(); - try (BitSlicedRangeIndexCreator creator = newBitslicedIndexCreator(metadata)) { + try (BitSlicedRangeIndexCreator creator = newBitSlicedIndexCreator(metadata)) { for (long value : dataset.values()) { creator.add(value); } @@ -192,7 +194,9 @@ public class BitSlicedIndexCreatorTest { for (long quantile : dataset.quantiles()) { ImmutableRoaringBitmap reference = dataset.scan(prev, quantile); ImmutableRoaringBitmap result = reader.getMatchingDocIds(prev, quantile); + int resultCount = reader.getNumMatchingDocs(prev, quantile); assertEquals(result, reference); + assertEquals(resultCount, result == null ? 0 : result.getCardinality()); prev = quantile; } ImmutableRoaringBitmap result = reader.getMatchingDocIds(prev + 1, Long.MAX_VALUE); @@ -207,7 +211,7 @@ public class BitSlicedIndexCreatorTest { private void testFloat(Dataset<float[]> dataset) throws IOException { ColumnMetadata metadata = dataset.toColumnMetadata(); - try (BitSlicedRangeIndexCreator creator = newBitslicedIndexCreator(metadata)) { + try (BitSlicedRangeIndexCreator creator = newBitSlicedIndexCreator(metadata)) { for (float value : dataset.values()) { creator.add(value); } @@ -220,7 +224,9 @@ public class BitSlicedIndexCreatorTest { for (float quantile : dataset.quantiles()) { ImmutableRoaringBitmap reference = dataset.scan(prev, quantile); ImmutableRoaringBitmap result = reader.getMatchingDocIds(prev, quantile); + int resultCount = reader.getNumMatchingDocs(prev, quantile); assertEquals(result, reference); + assertEquals(resultCount, result == null ? 0 : result.getCardinality()); prev = quantile; } ImmutableRoaringBitmap result = reader.getMatchingDocIds(prev + 1, Float.POSITIVE_INFINITY); @@ -235,7 +241,7 @@ public class BitSlicedIndexCreatorTest { private void testDouble(Dataset<double[]> dataset) throws IOException { ColumnMetadata metadata = dataset.toColumnMetadata(); - try (BitSlicedRangeIndexCreator creator = newBitslicedIndexCreator(metadata)) { + try (BitSlicedRangeIndexCreator creator = newBitSlicedIndexCreator(metadata)) { for (double value : dataset.values()) { creator.add(value); } @@ -248,7 +254,9 @@ public class BitSlicedIndexCreatorTest { for (double quantile : dataset.quantiles()) { ImmutableRoaringBitmap reference = dataset.scan(prev, quantile); ImmutableRoaringBitmap result = reader.getMatchingDocIds(prev, quantile); + int resultCount = reader.getNumMatchingDocs(prev, quantile); assertEquals(result, reference); + assertEquals(resultCount, result == null ? 0 : result.getCardinality()); prev = quantile; } ImmutableRoaringBitmap result = reader.getMatchingDocIds(prev + 1, Double.POSITIVE_INFINITY); @@ -260,7 +268,7 @@ public class BitSlicedIndexCreatorTest { } } - private static BitSlicedRangeIndexCreator newBitslicedIndexCreator(ColumnMetadata metadata) { + private static BitSlicedRangeIndexCreator newBitSlicedIndexCreator(ColumnMetadata metadata) { return metadata.hasDictionary() ? new BitSlicedRangeIndexCreator(INDEX_DIR, metadata.getFieldSpec(), metadata.getCardinality()) : new BitSlicedRangeIndexCreator(INDEX_DIR, metadata.getFieldSpec(), metadata.getMinValue(), metadata.getMaxValue()); diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/reader/RangeIndexReader.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/reader/RangeIndexReader.java index 3aeeaf9f23..eabeef502a 100644 --- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/reader/RangeIndexReader.java +++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/reader/RangeIndexReader.java @@ -26,6 +26,52 @@ import javax.annotation.Nullable; * @param <T> */ public interface RangeIndexReader<T> extends Closeable { + + /** + * @return true if the results are exact and don't need refinement by scanning. + * This means {@see getPartiallyMatchingDocIds} will return null. + */ + default boolean isExact() { + return true; + } + + /** + * Returns the number of docs with a value between min and max, both inclusive. + * Doc ids returned by this method must correspond to values which + * satisfy the query. + * @param min the inclusive lower bound. + * @param max the inclusive upper bound. + * @return the matching doc ids. + */ + int getNumMatchingDocs(int min, int max); + + /** + * Returns the number of docs with a value between min and max, both inclusive. + * The count is exact unless {@see getPartiallyMatchingDocIds} returns a non-null value. + * @param min the inclusive lower bound. + * @param max the inclusive upper bound. + * @return the matching doc ids. + */ + int getNumMatchingDocs(long min, long max); + + /** + * Returns the number of docs with a value between min and max, both inclusive. + * The count is exact unless {@see getPartiallyMatchingDocIds} returns a non-null value. + * @param min the inclusive lower bound. + * @param max the inclusive upper bound. + * @return the matching doc ids. + */ + int getNumMatchingDocs(float min, float max); + + /** + * Returns the number of docs with a value between min and max, both inclusive. + * The count is exact unless {@see getPartiallyMatchingDocIds} returns a non-null value. + * @param min the inclusive lower bound. + * @param max the inclusive upper bound. + * @return the matching doc ids. + */ + int getNumMatchingDocs(double min, double max); + /** * Returns doc ids with a value between min and max, both inclusive. * Doc ids returned by this method must correspond to values which --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org