This is an automated email from the ASF dual-hosted git repository. jackie pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push: new 5fc89ce453 Support NOT in StarTree Index (#12988) 5fc89ce453 is described below commit 5fc89ce4530f856756a3ca6357d90deea9365032 Author: Xiaotian (Jackie) Jiang <17555551+jackie-ji...@users.noreply.github.com> AuthorDate: Fri Apr 26 13:06:25 2024 -0700 Support NOT in StarTree Index (#12988) --- .../filter/SortedIndexBasedFilterOperator.java | 6 +- .../BaseDictionaryBasedPredicateEvaluator.java | 55 +++++++-- .../filter/predicate/BasePredicateEvaluator.java | 10 -- .../predicate/EqualsPredicateEvaluatorFactory.java | 16 ++- .../FSTBasedRegexpPredicateEvaluatorFactory.java | 30 ++--- .../predicate/InPredicateEvaluatorFactory.java | 37 +++--- .../NotEqualsPredicateEvaluatorFactory.java | 41 ++----- .../predicate/NotInPredicateEvaluatorFactory.java | 62 ++++------ .../filter/predicate/PredicateEvaluator.java | 23 +--- .../operator/filter/predicate/PredicateUtils.java | 34 ++++++ .../predicate/RangePredicateEvaluatorFactory.java | 96 +++++++++------- .../RegexpLikePredicateEvaluatorFactory.java | 22 +--- .../core/startree/CompositePredicateEvaluator.java | 17 +-- .../apache/pinot/core/startree/StarTreeUtils.java | 125 +++++++++++++++------ .../startree/operator/StarTreeFilterOperator.java | 68 +++++++---- .../pinot/core/startree/v2/BaseStarTreeV2Test.java | 20 +++- .../pinot/perf/BenchmarkScanDocIdIterators.java | 10 -- 17 files changed, 357 insertions(+), 315 deletions(-) diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/SortedIndexBasedFilterOperator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/SortedIndexBasedFilterOperator.java index d32c68c7e5..09144af9ff 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/SortedIndexBasedFilterOperator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/SortedIndexBasedFilterOperator.java @@ -20,7 +20,6 @@ package org.apache.pinot.core.operator.filter; import com.google.common.base.Preconditions; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collections; import java.util.List; import org.apache.pinot.core.common.BlockDocIdSet; @@ -90,10 +89,7 @@ public class SortedIndexBasedFilterOperator extends BaseColumnFilterOperator { return new SortedDocIdSet(Collections.singletonList(docIdRange)); } } else { - // Sort the dictIds in ascending order so that their respective docIdRanges are adjacent if they are adjacent - Arrays.sort(dictIds); - - // Merge adjacent docIdRanges + // Merge adjacent docIdRanges (dictIds are already sorted) List<IntPair> docIdRanges = new ArrayList<>(); IntPair lastDocIdRange = _sortedIndexReader.getDocIds(dictIds[0]); for (int i = 1; i < numDictIds; i++) { diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/BaseDictionaryBasedPredicateEvaluator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/BaseDictionaryBasedPredicateEvaluator.java index 92050c3cef..18d15d3673 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/BaseDictionaryBasedPredicateEvaluator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/BaseDictionaryBasedPredicateEvaluator.java @@ -18,17 +18,34 @@ */ package org.apache.pinot.core.operator.filter.predicate; +import it.unimi.dsi.fastutil.ints.IntArrayList; +import it.unimi.dsi.fastutil.ints.IntList; import java.math.BigDecimal; import org.apache.pinot.common.request.context.predicate.Predicate; +import org.apache.pinot.segment.spi.index.reader.Dictionary; import org.apache.pinot.spi.data.FieldSpec.DataType; public abstract class BaseDictionaryBasedPredicateEvaluator extends BasePredicateEvaluator { + protected final Dictionary _dictionary; protected boolean _alwaysTrue; protected boolean _alwaysFalse; + protected int[] _matchingDictIds; + protected int[] _nonMatchingDictIds; - protected BaseDictionaryBasedPredicateEvaluator(Predicate predicate) { + protected BaseDictionaryBasedPredicateEvaluator(Predicate predicate, Dictionary dictionary) { super(predicate); + _dictionary = dictionary; + } + + @Override + public final boolean isDictionaryBased() { + return true; + } + + @Override + public DataType getDataType() { + return DataType.INT; } @Override @@ -42,13 +59,33 @@ public abstract class BaseDictionaryBasedPredicateEvaluator extends BasePredicat } @Override - public final boolean isDictionaryBased() { - return true; + public int[] getMatchingDictIds() { + if (_matchingDictIds == null) { + _matchingDictIds = calculateMatchingDictIds(); + } + return _matchingDictIds; } - @Override - public DataType getDataType() { - return DataType.INT; + protected int[] calculateMatchingDictIds() { + IntList matchingDictIds = new IntArrayList(); + int dictionarySize = _dictionary.length(); + for (int dictId = 0; dictId < dictionarySize; dictId++) { + if (applySV(dictId)) { + matchingDictIds.add(dictId); + } + } + return matchingDictIds.toIntArray(); + } + + public int[] getNonMatchingDictIds() { + if (_nonMatchingDictIds == null) { + _nonMatchingDictIds = calculateNonMatchingDictIds(); + } + return _nonMatchingDictIds; + } + + protected int[] calculateNonMatchingDictIds() { + return PredicateUtils.flipDictIds(getMatchingDictIds(), _dictionary.length()); } @Override @@ -106,12 +143,6 @@ public abstract class BaseDictionaryBasedPredicateEvaluator extends BasePredicat throw new UnsupportedOperationException(); } - // NOTE: override it for exclusive predicate - @Override - public int[] getNonMatchingDictIds() { - throw new UnsupportedOperationException(); - } - /** * Apply a single-value entry to the predicate. * diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/BasePredicateEvaluator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/BasePredicateEvaluator.java index 0e04954675..407e619ae3 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/BasePredicateEvaluator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/BasePredicateEvaluator.java @@ -42,14 +42,4 @@ public abstract class BasePredicateEvaluator implements PredicateEvaluator { public final boolean isExclusive() { return getPredicateType().isExclusive(); } - - @Override - public int getNumMatchingDictIds() { - return getMatchingDictIds().length; - } - - @Override - public int getNumNonMatchingDictIds() { - return getNonMatchingDictIds().length; - } } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/EqualsPredicateEvaluatorFactory.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/EqualsPredicateEvaluatorFactory.java index 14616b36be..bf99e6c933 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/EqualsPredicateEvaluatorFactory.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/EqualsPredicateEvaluatorFactory.java @@ -62,8 +62,7 @@ public class EqualsPredicateEvaluatorFactory { * @param dataType Data type for the column * @return Raw value based EQ predicate evaluator */ - public static EqRawPredicateEvaluator newRawValueBasedEvaluator(EqPredicate eqPredicate, - DataType dataType) { + public static EqRawPredicateEvaluator newRawValueBasedEvaluator(EqPredicate eqPredicate, DataType dataType) { String value = eqPredicate.getValue(); switch (dataType) { case INT: @@ -92,10 +91,9 @@ public class EqualsPredicateEvaluatorFactory { private static final class DictionaryBasedEqPredicateEvaluator extends BaseDictionaryBasedPredicateEvaluator implements IntValue { final int _matchingDictId; - final int[] _matchingDictIds; DictionaryBasedEqPredicateEvaluator(EqPredicate eqPredicate, Dictionary dictionary, DataType dataType) { - super(eqPredicate); + super(eqPredicate, dictionary); String predicateValue = PredicateUtils.getStoredValue(eqPredicate.getValue(), dataType); _matchingDictId = dictionary.indexOf(predicateValue); if (_matchingDictId >= 0) { @@ -109,6 +107,11 @@ public class EqualsPredicateEvaluatorFactory { } } + @Override + protected int[] calculateNonMatchingDictIds() { + return PredicateUtils.getDictIds(_dictionary.length(), _matchingDictId); + } + @Override public int getNumMatchingItems() { return 1; @@ -132,11 +135,6 @@ public class EqualsPredicateEvaluatorFactory { return matches; } - @Override - public int[] getMatchingDictIds() { - return _matchingDictIds; - } - @Override public int getInt() { return _matchingDictId; diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/FSTBasedRegexpPredicateEvaluatorFactory.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/FSTBasedRegexpPredicateEvaluatorFactory.java index b1a0559a92..11dbe7aa99 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/FSTBasedRegexpPredicateEvaluatorFactory.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/FSTBasedRegexpPredicateEvaluatorFactory.java @@ -50,30 +50,29 @@ public class FSTBasedRegexpPredicateEvaluatorFactory { * Matches regexp query using FSTIndexReader. */ private static class FSTBasedRegexpPredicateEvaluator extends BaseDictionaryBasedPredicateEvaluator { - final Dictionary _dictionary; - final ImmutableRoaringBitmap _dictIds; + final ImmutableRoaringBitmap _matchingDictIdBitmap; public FSTBasedRegexpPredicateEvaluator(RegexpLikePredicate regexpLikePredicate, TextIndexReader fstIndexReader, Dictionary dictionary) { - super(regexpLikePredicate); - _dictionary = dictionary; + super(regexpLikePredicate, dictionary); String searchQuery = RegexpPatternConverterUtils.regexpLikeToLuceneRegExp(regexpLikePredicate.getValue()); - _dictIds = fstIndexReader.getDictIds(searchQuery); - } - - @Override - public boolean isAlwaysFalse() { - return _dictIds.isEmpty(); + _matchingDictIdBitmap = fstIndexReader.getDictIds(searchQuery); + int numMatchingDictIds = _matchingDictIdBitmap.getCardinality(); + if (numMatchingDictIds == 0) { + _alwaysFalse = true; + } else if (dictionary.length() == numMatchingDictIds) { + _alwaysTrue = true; + } } @Override - public boolean isAlwaysTrue() { - return _dictIds.getCardinality() == _dictionary.length(); + protected int[] calculateMatchingDictIds() { + return _matchingDictIdBitmap.toArray(); } @Override public boolean applySV(int dictId) { - return _dictIds.contains(dictId); + return _matchingDictIdBitmap.contains(dictId); } @Override @@ -88,10 +87,5 @@ public class FSTBasedRegexpPredicateEvaluatorFactory { } return matches; } - - @Override - public int[] getMatchingDictIds() { - return _dictIds.toArray(); - } } } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/InPredicateEvaluatorFactory.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/InPredicateEvaluatorFactory.java index 9ad0a78014..5ebc9a1a6b 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/InPredicateEvaluatorFactory.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/InPredicateEvaluatorFactory.java @@ -71,8 +71,7 @@ public class InPredicateEvaluatorFactory { * @param dataType Data type for the column * @return Raw value based IN predicate evaluator */ - public static InRawPredicateEvaluator newRawValueBasedEvaluator(InPredicate inPredicate, - DataType dataType) { + public static InRawPredicateEvaluator newRawValueBasedEvaluator(InPredicate inPredicate, DataType dataType) { switch (dataType) { case INT: { int[] intValues = inPredicate.getIntValues(); @@ -157,42 +156,34 @@ public class InPredicateEvaluatorFactory { private static final class DictionaryBasedInPredicateEvaluator extends BaseDictionaryBasedPredicateEvaluator { final IntSet _matchingDictIdSet; - final int _numMatchingDictIds; - int[] _matchingDictIds; DictionaryBasedInPredicateEvaluator(InPredicate inPredicate, Dictionary dictionary, DataType dataType, @Nullable QueryContext queryContext) { - super(inPredicate); + super(inPredicate, dictionary); _matchingDictIdSet = PredicateUtils.getDictIdSet(inPredicate, dictionary, dataType, queryContext); - _numMatchingDictIds = _matchingDictIdSet.size(); - if (_numMatchingDictIds == 0) { + int numMatchingDictIds = _matchingDictIdSet.size(); + if (numMatchingDictIds == 0) { _alwaysFalse = true; - } else if (dictionary.length() == _numMatchingDictIds) { + } else if (dictionary.length() == numMatchingDictIds) { _alwaysTrue = true; } } @Override - public boolean applySV(int dictId) { - return _matchingDictIdSet.contains(dictId); - } - - @Override - public int getNumMatchingDictIds() { - return _numMatchingDictIds; + protected int[] calculateMatchingDictIds() { + int[] matchingDictIds = _matchingDictIdSet.toIntArray(); + Arrays.sort(matchingDictIds); + return matchingDictIds; } @Override public int getNumMatchingItems() { - return getNumMatchingDictIds(); + return _matchingDictIdSet.size(); } @Override - public int[] getMatchingDictIds() { - if (_matchingDictIds == null) { - _matchingDictIds = _matchingDictIdSet.toIntArray(); - } - return _matchingDictIds; + public boolean applySV(int dictId) { + return _matchingDictIdSet.contains(dictId); } @Override @@ -477,9 +468,7 @@ public class InPredicateEvaluatorFactory { @Override public <R> R accept(MultiValueVisitor<R> visitor) { - byte[][] bytes = _matchingValues.stream() - .map(ByteArray::getBytes) - .toArray(byte[][]::new); + byte[][] bytes = _matchingValues.stream().map(ByteArray::getBytes).toArray(byte[][]::new); return visitor.visitBytes(bytes); } } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/NotEqualsPredicateEvaluatorFactory.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/NotEqualsPredicateEvaluatorFactory.java index 54ce7df58c..fdcff7c579 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/NotEqualsPredicateEvaluatorFactory.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/NotEqualsPredicateEvaluatorFactory.java @@ -58,8 +58,7 @@ public class NotEqualsPredicateEvaluatorFactory { * @param dataType Data type for the column * @return Raw value based NOT_EQ predicate evaluator */ - public static NeqRawPredicateEvaluator newRawValueBasedEvaluator(NotEqPredicate notEqPredicate, - DataType dataType) { + public static NeqRawPredicateEvaluator newRawValueBasedEvaluator(NotEqPredicate notEqPredicate, DataType dataType) { String value = notEqPredicate.getValue(); switch (dataType) { case INT: @@ -87,12 +86,9 @@ public class NotEqualsPredicateEvaluatorFactory { private static final class DictionaryBasedNeqPredicateEvaluator extends BaseDictionaryBasedPredicateEvaluator { final int _nonMatchingDictId; - final int[] _nonMatchingDictIds; - final Dictionary _dictionary; - int[] _matchingDictIds; DictionaryBasedNeqPredicateEvaluator(NotEqPredicate notEqPredicate, Dictionary dictionary, DataType dataType) { - super(notEqPredicate); + super(notEqPredicate, dictionary); String predicateValue = PredicateUtils.getStoredValue(notEqPredicate.getValue(), dataType); _nonMatchingDictId = dictionary.indexOf(predicateValue); if (_nonMatchingDictId >= 0) { @@ -104,7 +100,11 @@ public class NotEqualsPredicateEvaluatorFactory { _nonMatchingDictIds = new int[0]; _alwaysTrue = true; } - _dictionary = dictionary; + } + + @Override + protected int[] calculateMatchingDictIds() { + return PredicateUtils.getDictIds(_dictionary.length(), _nonMatchingDictId); } @Override @@ -129,33 +129,6 @@ public class NotEqualsPredicateEvaluatorFactory { } return matches; } - - @Override - public int[] getMatchingDictIds() { - if (_matchingDictIds == null) { - int dictionarySize = _dictionary.length(); - if (_nonMatchingDictId >= 0) { - _matchingDictIds = new int[dictionarySize - 1]; - int index = 0; - for (int dictId = 0; dictId < dictionarySize; dictId++) { - if (dictId != _nonMatchingDictId) { - _matchingDictIds[index++] = dictId; - } - } - } else { - _matchingDictIds = new int[dictionarySize]; - for (int dictId = 0; dictId < dictionarySize; dictId++) { - _matchingDictIds[dictId] = dictId; - } - } - } - return _matchingDictIds; - } - - @Override - public int[] getNonMatchingDictIds() { - return _nonMatchingDictIds; - } } public static abstract class NeqRawPredicateEvaluator extends BaseRawValueBasedPredicateEvaluator { diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/NotInPredicateEvaluatorFactory.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/NotInPredicateEvaluatorFactory.java index 5fe7b51d35..4682225aa7 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/NotInPredicateEvaluatorFactory.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/NotInPredicateEvaluatorFactory.java @@ -71,8 +71,7 @@ public class NotInPredicateEvaluatorFactory { * @param dataType Data type for the column * @return Raw value based NOT_IN predicate evaluator */ - public static NotInRawPredicateEvaluator newRawValueBasedEvaluator(NotInPredicate notInPredicate, - DataType dataType) { + public static NotInRawPredicateEvaluator newRawValueBasedEvaluator(NotInPredicate notInPredicate, DataType dataType) { switch (dataType) { case INT: { int[] intValues = notInPredicate.getIntValues(); @@ -157,27 +156,34 @@ public class NotInPredicateEvaluatorFactory { public static final class DictionaryBasedNotInPredicateEvaluator extends BaseDictionaryBasedPredicateEvaluator { final IntSet _nonMatchingDictIdSet; - final int _numNonMatchingDictIds; - final Dictionary _dictionary; - int[] _matchingDictIds; - int[] _nonMatchingDictIds; DictionaryBasedNotInPredicateEvaluator(NotInPredicate notInPredicate, Dictionary dictionary, DataType dataType, @Nullable QueryContext queryContext) { - super(notInPredicate); + super(notInPredicate, dictionary); _nonMatchingDictIdSet = PredicateUtils.getDictIdSet(notInPredicate, dictionary, dataType, queryContext); - _numNonMatchingDictIds = _nonMatchingDictIdSet.size(); - if (_numNonMatchingDictIds == 0) { + int numNonMatchingDictIds = _nonMatchingDictIdSet.size(); + if (numNonMatchingDictIds == 0) { _alwaysTrue = true; - } else if (dictionary.length() == _numNonMatchingDictIds) { + } else if (dictionary.length() == numNonMatchingDictIds) { _alwaysFalse = true; } - _dictionary = dictionary; + } + + @Override + protected int[] calculateMatchingDictIds() { + return PredicateUtils.flipDictIds(getNonMatchingDictIds(), _dictionary.length()); + } + + @Override + protected int[] calculateNonMatchingDictIds() { + int[] nonMatchingDictIds = _nonMatchingDictIdSet.toIntArray(); + Arrays.sort(nonMatchingDictIds); + return nonMatchingDictIds; } @Override public int getNumMatchingItems() { - return -_numNonMatchingDictIds; + return -_nonMatchingDictIdSet.size(); } @Override @@ -197,34 +203,6 @@ public class NotInPredicateEvaluatorFactory { } return matches; } - - @Override - public int[] getMatchingDictIds() { - if (_matchingDictIds == null) { - int dictionarySize = _dictionary.length(); - _matchingDictIds = new int[dictionarySize - _numNonMatchingDictIds]; - int index = 0; - for (int dictId = 0; dictId < dictionarySize; dictId++) { - if (!_nonMatchingDictIdSet.contains(dictId)) { - _matchingDictIds[index++] = dictId; - } - } - } - return _matchingDictIds; - } - - @Override - public int getNumNonMatchingDictIds() { - return _numNonMatchingDictIds; - } - - @Override - public int[] getNonMatchingDictIds() { - if (_nonMatchingDictIds == null) { - _nonMatchingDictIds = _nonMatchingDictIdSet.toIntArray(); - } - return _nonMatchingDictIds; - } } public static abstract class NotInRawPredicateEvaluator extends BaseRawValueBasedPredicateEvaluator { @@ -491,9 +469,7 @@ public class NotInPredicateEvaluatorFactory { @Override public <R> R accept(MultiValueVisitor<R> visitor) { - byte[][] bytes = _nonMatchingValues.stream() - .map(ByteArray::getBytes) - .toArray(byte[][]::new); + byte[][] bytes = _nonMatchingValues.stream().map(ByteArray::getBytes).toArray(byte[][]::new); return visitor.visitBytes(bytes); } } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/PredicateEvaluator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/PredicateEvaluator.java index 889e287107..09b420f48f 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/PredicateEvaluator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/PredicateEvaluator.java @@ -102,35 +102,24 @@ public interface PredicateEvaluator { boolean applyMV(int[] values, int length); /** - * APIs for dictionary based predicate evaluator - */ - - /** - * return the number of matching items specified by predicate - * negative number indicates exclusive (not eq, not in) match - * return {@code Integer.MIN_VALUE} for not applicable + * Get the number of matching items. Negative number indicates exclusive (e.g. NOT_EQ, NOT_IN) match. Returns + * {@code Integer.MIN_VALUE} if not applicable. */ default int getNumMatchingItems() { return Integer.MIN_VALUE; - }; + } /** - * Get the number of matching dictionary ids. + * APIs for dictionary based predicate evaluator */ - int getNumMatchingDictIds(); /** - * Get the matching dictionary ids. + * Get the matching dictionary ids. The returned ids should be sorted. */ int[] getMatchingDictIds(); /** - * Get the number of non-matching dictionary ids. - */ - int getNumNonMatchingDictIds(); - - /** - * Get the non-matching dictionary ids. + * Get the non-matching dictionary ids. The returned ids should be sorted. */ int[] getNonMatchingDictIds(); diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/PredicateUtils.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/PredicateUtils.java index 9135a85f78..c7b93cf086 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/PredicateUtils.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/PredicateUtils.java @@ -190,4 +190,38 @@ public class PredicateUtils { } return dictIdSet; } + + public static int[] flipDictIds(int[] dictIds, int length) { + int numDictIds = dictIds.length; + int[] flippedDictIds = new int[length - numDictIds]; + int flippedDictIdsIndex = 0; + int dictIdsIndex = 0; + for (int dictId = 0; dictId < length; dictId++) { + if (dictIdsIndex < numDictIds && dictId == dictIds[dictIdsIndex]) { + dictIdsIndex++; + } else { + flippedDictIds[flippedDictIdsIndex++] = dictId; + } + } + return flippedDictIds; + } + + public static int[] getDictIds(int length, int excludeId) { + int[] dictIds; + if (excludeId >= 0) { + dictIds = new int[length - 1]; + int index = 0; + for (int dictId = 0; dictId < length; dictId++) { + if (dictId != excludeId) { + dictIds[index++] = dictId; + } + } + } else { + dictIds = new int[length]; + for (int dictId = 0; dictId < length; dictId++) { + dictIds[dictId] = dictId; + } + } + return dictIds; + } } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/RangePredicateEvaluatorFactory.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/RangePredicateEvaluatorFactory.java index ca8e1f126a..e9bd3b4b0a 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/RangePredicateEvaluatorFactory.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/RangePredicateEvaluatorFactory.java @@ -122,11 +122,10 @@ public class RangePredicateEvaluatorFactory { // Exclusive final int _endDictId; final int _numMatchingDictIds; - int[] _matchingDictIds; SortedDictionaryBasedRangePredicateEvaluator(RangePredicate rangePredicate, Dictionary dictionary, DataType dataType) { - super(rangePredicate); + super(rangePredicate, dictionary); String lowerBound = rangePredicate.getLowerBound(); String upperBound = rangePredicate.getUpperBound(); boolean lowerInclusive = rangePredicate.isLowerInclusive(); @@ -161,8 +160,8 @@ public class RangePredicateEvaluatorFactory { } } - _numMatchingDictIds = _endDictId - _startDictId; - if (_numMatchingDictIds <= 0) { + _numMatchingDictIds = Integer.max(_endDictId - _startDictId, 0); + if (_numMatchingDictIds == 0) { _alwaysFalse = true; } else if (dictionary.length() == _numMatchingDictIds) { _alwaysTrue = true; @@ -178,46 +177,61 @@ public class RangePredicateEvaluatorFactory { } @Override - public boolean applySV(int dictId) { - return _startDictId <= dictId && _endDictId > dictId; + protected int[] calculateMatchingDictIds() { + if (_numMatchingDictIds == 0) { + return new int[0]; + } else { + int[] matchingDictIds = new int[_numMatchingDictIds]; + for (int i = 0; i < _numMatchingDictIds; i++) { + matchingDictIds[i] = _startDictId + i; + } + return matchingDictIds; + } } @Override - public int applySV(int limit, int[] docIds, int[] dictIds) { - // reimplemented here to ensure applySV can be inlined - int matches = 0; - for (int i = 0; i < limit; i++) { - int dictId = dictIds[i]; - if (applySV(dictId)) { - docIds[matches++] = docIds[i]; + protected int[] calculateNonMatchingDictIds() { + int dictionarySize = _dictionary.length(); + if (_numMatchingDictIds == 0) { + int[] nonMatchingDictIds = new int[dictionarySize]; + for (int i = 0; i < dictionarySize; i++) { + nonMatchingDictIds[i] = i; + } + return nonMatchingDictIds; + } else { + int[] nonMatchingDictIds = new int[dictionarySize - _numMatchingDictIds]; + int index = 0; + for (int i = 0; i < _startDictId; i++) { + nonMatchingDictIds[index++] = i; + } + for (int i = _endDictId; i < dictionarySize; i++) { + nonMatchingDictIds[index++] = i; } + return nonMatchingDictIds; } - return matches; } @Override - public int getNumMatchingDictIds() { + public int getNumMatchingItems() { return _numMatchingDictIds; } @Override - public int[] getMatchingDictIds() { - if (_matchingDictIds == null) { - if (_numMatchingDictIds <= 0) { - _matchingDictIds = new int[0]; - } else { - _matchingDictIds = new int[_numMatchingDictIds]; - for (int i = 0; i < _numMatchingDictIds; i++) { - _matchingDictIds[i] = _startDictId + i; - } - } - } - return _matchingDictIds; + public boolean applySV(int dictId) { + return _startDictId <= dictId && _endDictId > dictId; } @Override - public int getNumMatchingItems() { - return Math.max(_numMatchingDictIds, 0); + public int applySV(int limit, int[] docIds, int[] dictIds) { + // reimplemented here to ensure applySV can be inlined + int matches = 0; + for (int i = 0; i < limit; i++) { + int dictId = dictIds[i]; + if (applySV(dictId)) { + docIds[matches++] = docIds[i]; + } + } + return matches; } @Override @@ -238,15 +252,13 @@ public class RangePredicateEvaluatorFactory { // TODO: Tune this threshold private static final int DICT_ID_SET_BASED_CARDINALITY_THRESHOLD = 1000; - final Dictionary _dictionary; final boolean _dictIdSetBased; final IntSet _matchingDictIdSet; final BaseRawValueBasedPredicateEvaluator _rawValueBasedEvaluator; UnsortedDictionaryBasedRangePredicateEvaluator(RangePredicate rangePredicate, Dictionary dictionary, DataType dataType) { - super(rangePredicate); - _dictionary = dictionary; + super(rangePredicate, dictionary); int cardinality = dictionary.length(); if (cardinality < DICT_ID_SET_BASED_CARDINALITY_THRESHOLD) { _dictIdSetBased = true; @@ -274,6 +286,16 @@ public class RangePredicateEvaluatorFactory { } } + @Override + public int[] getMatchingDictIds() { + throw new UnsupportedOperationException(); + } + + @Override + public int getNumMatchingItems() { + return _matchingDictIdSet != null ? _matchingDictIdSet.size() : Integer.MIN_VALUE; + } + @Override public boolean applySV(int dictId) { if (_dictIdSetBased) { @@ -299,16 +321,6 @@ public class RangePredicateEvaluatorFactory { } } } - - @Override - public int getNumMatchingItems() { - return _matchingDictIdSet == null ? super.getNumMatchingItems() : _matchingDictIdSet.size(); - } - - @Override - public int[] getMatchingDictIds() { - throw new UnsupportedOperationException(); - } } private static final class IntRawValueBasedRangePredicateEvaluator extends BaseRawValueBasedPredicateEvaluator diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/RegexpLikePredicateEvaluatorFactory.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/RegexpLikePredicateEvaluatorFactory.java index 82477c3560..09ffdb62f0 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/RegexpLikePredicateEvaluatorFactory.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/RegexpLikePredicateEvaluatorFactory.java @@ -19,8 +19,6 @@ package org.apache.pinot.core.operator.filter.predicate; import com.google.common.base.Preconditions; -import it.unimi.dsi.fastutil.ints.IntArrayList; -import it.unimi.dsi.fastutil.ints.IntList; import java.util.regex.Matcher; import org.apache.pinot.common.request.context.predicate.RegexpLikePredicate; import org.apache.pinot.segment.spi.index.reader.Dictionary; @@ -66,12 +64,9 @@ public class RegexpLikePredicateEvaluatorFactory { // Reuse matcher to avoid excessive allocation. This is safe to do because the evaluator is always used // within the scope of a single thread. final Matcher _matcher; - final Dictionary _dictionary; - int[] _matchingDictIds; public DictionaryBasedRegexpLikePredicateEvaluator(RegexpLikePredicate regexpLikePredicate, Dictionary dictionary) { - super(regexpLikePredicate); - _dictionary = dictionary; + super(regexpLikePredicate, dictionary); _matcher = regexpLikePredicate.getPattern().matcher(""); } @@ -92,21 +87,6 @@ public class RegexpLikePredicateEvaluatorFactory { } return matches; } - - @Override - public int[] getMatchingDictIds() { - if (_matchingDictIds == null) { - IntList matchingDictIds = new IntArrayList(); - int dictionarySize = _dictionary.length(); - for (int dictId = 0; dictId < dictionarySize; dictId++) { - if (applySV(dictId)) { - matchingDictIds.add(dictId); - } - } - _matchingDictIds = matchingDictIds.toIntArray(); - } - return _matchingDictIds; - } } private static final class RawValueBasedRegexpLikePredicateEvaluator extends BaseRawValueBasedPredicateEvaluator { diff --git a/pinot-core/src/main/java/org/apache/pinot/core/startree/CompositePredicateEvaluator.java b/pinot-core/src/main/java/org/apache/pinot/core/startree/CompositePredicateEvaluator.java index 9424bc0cdb..1725364aee 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/startree/CompositePredicateEvaluator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/startree/CompositePredicateEvaluator.java @@ -18,6 +18,7 @@ */ package org.apache.pinot.core.startree; +import it.unimi.dsi.fastutil.objects.ObjectBooleanPair; import java.util.List; import org.apache.pinot.core.operator.filter.predicate.PredicateEvaluator; @@ -26,19 +27,19 @@ import org.apache.pinot.core.operator.filter.predicate.PredicateEvaluator; * Represents a composite predicate. * * A composite predicate evaluator represents a single predicate evaluator or multiple predicate evaluators conjoined - * with OR. - * Consider the given predicate: (d1 > 10 OR d1 < 50). A composite predicate will represent two predicates -- (d1 > 10) - * and (d1 < 50) and represent that they are related by the operator OR. + * with OR. Each predicate evaluator is associated with a boolean value indicating whether the predicate is negated. + * Consider the given predicate: (d1 > 10 OR NOT d1 > 50). A composite predicate will represent two predicates: + * (d1 > 10) and NOT(d1 > 50) and represent that they are related by the operator OR. */ public class CompositePredicateEvaluator { - private final List<PredicateEvaluator> _predicateEvaluators; + private final List<ObjectBooleanPair<PredicateEvaluator>> _predicateEvaluators; - public CompositePredicateEvaluator(List<PredicateEvaluator> predicateEvaluators) { + public CompositePredicateEvaluator(List<ObjectBooleanPair<PredicateEvaluator>> predicateEvaluators) { assert !predicateEvaluators.isEmpty(); _predicateEvaluators = predicateEvaluators; } - public List<PredicateEvaluator> getPredicateEvaluators() { + public List<ObjectBooleanPair<PredicateEvaluator>> getPredicateEvaluators() { return _predicateEvaluators; } @@ -47,8 +48,8 @@ public class CompositePredicateEvaluator { * predicate evaluator, {@code false} otherwise. */ public boolean apply(int dictId) { - for (PredicateEvaluator predicateEvaluator : _predicateEvaluators) { - if (predicateEvaluator.applySV(dictId)) { + for (ObjectBooleanPair<PredicateEvaluator> predicateEvaluator : _predicateEvaluators) { + if (predicateEvaluator.left().applySV(dictId) != predicateEvaluator.rightBoolean()) { return true; } } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/startree/StarTreeUtils.java b/pinot-core/src/main/java/org/apache/pinot/core/startree/StarTreeUtils.java index f79070ae9f..68bd26e780 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/startree/StarTreeUtils.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/startree/StarTreeUtils.java @@ -18,6 +18,7 @@ */ package org.apache.pinot.core.startree; +import it.unimi.dsi.fastutil.objects.ObjectBooleanPair; import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Collections; @@ -75,13 +76,13 @@ public class StarTreeUtils { } /** - * Extracts a map from the column to a list of {@link PredicateEvaluator}s for it. Returns {@code null} if the filter - * cannot be solved by the star-tree. + * Extracts a map from the column to a list of {@link CompositePredicateEvaluator}s for it. Returns {@code null} if + * the filter cannot be solved by the star-tree. * * A predicate can be simple (d1 > 10) or composite (d1 > 10 AND d2 < 50) or multi levelled - * (d1 > 50 AND (d2 > 10 OR d2 < 35)). + * (d1 > 50 AND (d2 > 10 OR NOT d2 > 35)). * This method represents a list of CompositePredicates per dimension. For each dimension, all CompositePredicates in - * the list are implicitly ANDed together. Any OR predicates are nested within a CompositePredicate. + * the list are implicitly ANDed together. Any OR and NOT predicates are nested within a CompositePredicate. * * A map from predicates to their evaluators is passed in to accelerate the computation. */ @@ -102,21 +103,50 @@ public class StarTreeUtils { queue.addAll(filterNode.getChildren()); break; case OR: - Pair<String, List<PredicateEvaluator>> pair = + Pair<String, CompositePredicateEvaluator> pair = isOrClauseValidForStarTree(indexSegment, filterNode, predicateEvaluatorMapping); if (pair == null) { return null; } - List<PredicateEvaluator> predicateEvaluators = pair.getRight(); - // NOTE: Empty list means always true - if (!predicateEvaluators.isEmpty()) { - predicateEvaluatorsMap.computeIfAbsent(pair.getLeft(), k -> new ArrayList<>()) - .add(new CompositePredicateEvaluator(predicateEvaluators)); + // NOTE: Null identifier means always true + if (pair.getLeft() != null) { + predicateEvaluatorsMap.computeIfAbsent(pair.getLeft(), k -> new ArrayList<>()).add(pair.getRight()); } break; case NOT: - // TODO: Support NOT in star-tree - return null; + boolean negated = true; + FilterContext negatedChild = filterNode.getChildren().get(0); + while (true) { + FilterContext.Type type = negatedChild.getType(); + if (type == FilterContext.Type.PREDICATE) { + Predicate predicate = negatedChild.getPredicate(); + PredicateEvaluator predicateEvaluator = + getPredicateEvaluator(indexSegment, predicate, predicateEvaluatorMapping); + // Do not use star-tree when the predicate cannot be solved with star-tree + if (predicateEvaluator == null) { + return null; + } + // Do not use star-tree when the predicate is always false + if ((predicateEvaluator.isAlwaysTrue() && negated) || (predicateEvaluator.isAlwaysFalse() && !negated)) { + return null; + } + // Skip adding always true predicate + if ((predicateEvaluator.isAlwaysTrue() && !negated) || (predicateEvaluator.isAlwaysFalse() && negated)) { + break; + } + predicateEvaluatorsMap.computeIfAbsent(predicate.getLhs().getIdentifier(), k -> new ArrayList<>()) + .add(new CompositePredicateEvaluator(List.of(ObjectBooleanPair.of(predicateEvaluator, negated)))); + break; + } + if (type == FilterContext.Type.NOT) { + negated = !negated; + negatedChild = negatedChild.getChildren().get(0); + continue; + } + // Do not allow nested AND/OR under NOT + return null; + } + break; case PREDICATE: Predicate predicate = filterNode.getPredicate(); PredicateEvaluator predicateEvaluator = @@ -127,7 +157,7 @@ public class StarTreeUtils { } if (!predicateEvaluator.isAlwaysTrue()) { predicateEvaluatorsMap.computeIfAbsent(predicate.getLhs().getIdentifier(), k -> new ArrayList<>()) - .add(new CompositePredicateEvaluator(Collections.singletonList(predicateEvaluator))); + .add(new CompositePredicateEvaluator(List.of(ObjectBooleanPair.of(predicateEvaluator, false)))); } break; default: @@ -177,70 +207,91 @@ public class StarTreeUtils { * StarTree supports OR predicates on a single dimension only (d1 < 10 OR d1 > 50). * * @return The pair of single identifier and predicate evaluators applied to it if true; {@code null} if the OR clause - * cannot be solved with star-tree; empty predicate evaluator list if the OR clause always evaluates to true. + * cannot be solved with star-tree; a pair of nulls if the OR clause always evaluates to true. */ @Nullable - private static Pair<String, List<PredicateEvaluator>> isOrClauseValidForStarTree(IndexSegment indexSegment, + private static Pair<String, CompositePredicateEvaluator> isOrClauseValidForStarTree(IndexSegment indexSegment, FilterContext filter, List<Pair<Predicate, PredicateEvaluator>> predicateEvaluatorMapping) { assert filter.getType() == FilterContext.Type.OR; - List<Predicate> predicates = new ArrayList<>(); + List<ObjectBooleanPair<Predicate>> predicates = new ArrayList<>(); if (!extractOrClausePredicates(filter, predicates)) { return null; } String identifier = null; - List<PredicateEvaluator> predicateEvaluators = new ArrayList<>(); - for (Predicate predicate : predicates) { - PredicateEvaluator predicateEvaluator = getPredicateEvaluator(indexSegment, predicate, predicateEvaluatorMapping); + List<ObjectBooleanPair<PredicateEvaluator>> predicateEvaluators = new ArrayList<>(); + for (ObjectBooleanPair<Predicate> predicate : predicates) { + PredicateEvaluator predicateEvaluator = + getPredicateEvaluator(indexSegment, predicate.left(), predicateEvaluatorMapping); if (predicateEvaluator == null) { // The predicate cannot be solved with star-tree return null; } - if (predicateEvaluator.isAlwaysTrue()) { - // Use empty predicate evaluators to represent always true - return Pair.of(null, Collections.emptyList()); + boolean negated = predicate.rightBoolean(); + // Use a pair of null values to represent always true + if ((predicateEvaluator.isAlwaysTrue() && !negated) || (predicateEvaluator.isAlwaysFalse() && negated)) { + return Pair.of(null, null); } - if (!predicateEvaluator.isAlwaysFalse()) { - String predicateIdentifier = predicate.getLhs().getIdentifier(); - if (identifier == null) { - identifier = predicateIdentifier; - } else { - if (!identifier.equals(predicateIdentifier)) { - // The predicates are applied to multiple columns - return null; - } + // Skip the always false predicate + if ((predicateEvaluator.isAlwaysTrue() && negated) || (predicateEvaluator.isAlwaysFalse() && !negated)) { + continue; + } + String predicateIdentifier = predicate.left().getLhs().getIdentifier(); + if (identifier == null) { + identifier = predicateIdentifier; + } else { + if (!identifier.equals(predicateIdentifier)) { + // The predicates are applied to multiple columns + return null; } - predicateEvaluators.add(predicateEvaluator); } + predicateEvaluators.add(ObjectBooleanPair.of(predicateEvaluator, negated)); } // When all predicates are always false, do not use star-tree if (predicateEvaluators.isEmpty()) { return null; } - return Pair.of(identifier, predicateEvaluators); + return Pair.of(identifier, new CompositePredicateEvaluator(predicateEvaluators)); } /** * Extracts the predicates under the given OR clause, returns {@code false} if there is nested AND or NOT under OR * clause. - * TODO: Support NOT in star-tree */ - private static boolean extractOrClausePredicates(FilterContext filter, List<Predicate> predicates) { + private static boolean extractOrClausePredicates(FilterContext filter, + List<ObjectBooleanPair<Predicate>> predicates) { assert filter.getType() == FilterContext.Type.OR; for (FilterContext child : filter.getChildren()) { switch (child.getType()) { case AND: - case NOT: return false; case OR: if (!extractOrClausePredicates(child, predicates)) { return false; } break; + case NOT: + boolean negated = true; + FilterContext negatedChild = child.getChildren().get(0); + while (true) { + FilterContext.Type type = negatedChild.getType(); + if (type == FilterContext.Type.PREDICATE) { + predicates.add(ObjectBooleanPair.of(negatedChild.getPredicate(), negated)); + break; + } + if (type == FilterContext.Type.NOT) { + negated = !negated; + negatedChild = negatedChild.getChildren().get(0); + continue; + } + // Do not allow nested AND/OR under NOT + return false; + } + break; case PREDICATE: - predicates.add(child.getPredicate()); + predicates.add(ObjectBooleanPair.of(child.getPredicate(), false)); break; default: throw new IllegalStateException(); diff --git a/pinot-core/src/main/java/org/apache/pinot/core/startree/operator/StarTreeFilterOperator.java b/pinot-core/src/main/java/org/apache/pinot/core/startree/operator/StarTreeFilterOperator.java index 583dad5e0a..107390fecd 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/startree/operator/StarTreeFilterOperator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/startree/operator/StarTreeFilterOperator.java @@ -18,9 +18,11 @@ */ package org.apache.pinot.core.startree.operator; +import it.unimi.dsi.fastutil.ints.IntImmutableList; import it.unimi.dsi.fastutil.ints.IntIterator; import it.unimi.dsi.fastutil.ints.IntOpenHashSet; import it.unimi.dsi.fastutil.ints.IntSet; +import it.unimi.dsi.fastutil.objects.ObjectBooleanPair; import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Collections; @@ -175,19 +177,17 @@ public class StarTreeFilterOperator extends BaseFilterOperator { _predicateEvaluatorsMap.get(remainingPredicateColumn); DataSource dataSource = _starTreeV2.getDataSource(remainingPredicateColumn); for (CompositePredicateEvaluator compositePredicateEvaluator : compositePredicateEvaluators) { - List<PredicateEvaluator> predicateEvaluators = compositePredicateEvaluator.getPredicateEvaluators(); + List<ObjectBooleanPair<PredicateEvaluator>> predicateEvaluators = + compositePredicateEvaluator.getPredicateEvaluators(); int numPredicateEvaluators = predicateEvaluators.size(); if (numPredicateEvaluators == 1) { // Single predicate evaluator - childFilterOperators.add( - FilterOperatorUtils.getLeafFilterOperator(_queryContext, predicateEvaluators.get(0), dataSource, - numDocs)); + childFilterOperators.add(getFilterOperator(predicateEvaluators.get(0), dataSource, numDocs)); } else { // Predicate evaluators conjoined with OR List<BaseFilterOperator> orChildFilterOperators = new ArrayList<>(numPredicateEvaluators); - for (PredicateEvaluator childPredicateEvaluator : predicateEvaluators) { - orChildFilterOperators.add( - FilterOperatorUtils.getLeafFilterOperator(_queryContext, childPredicateEvaluator, dataSource, numDocs)); + for (ObjectBooleanPair<PredicateEvaluator> childPredicateEvaluator : predicateEvaluators) { + orChildFilterOperators.add(getFilterOperator(childPredicateEvaluator, dataSource, numDocs)); } childFilterOperators.add( FilterOperatorUtils.getOrFilterOperator(_queryContext, orChildFilterOperators, numDocs)); @@ -198,6 +198,17 @@ public class StarTreeFilterOperator extends BaseFilterOperator { return FilterOperatorUtils.getAndFilterOperator(_queryContext, childFilterOperators, numDocs); } + private BaseFilterOperator getFilterOperator(ObjectBooleanPair<PredicateEvaluator> predicateEvaluator, + DataSource dataSource, int numDocs) { + BaseFilterOperator leafFilterOperator = + FilterOperatorUtils.getLeafFilterOperator(_queryContext, predicateEvaluator.left(), dataSource, numDocs); + if (predicateEvaluator.rightBoolean()) { + return FilterOperatorUtils.getNotFilterOperator(_queryContext, leafFilterOperator, numDocs); + } else { + return leafFilterOperator; + } + } + /** * Helper method to traverse the star tree, get matching documents and keep track of all the predicate columns that * are not matched. Returns {@code null} if no matching dictionary id found for a column (i.e. the result for the @@ -386,24 +397,28 @@ public class StarTreeFilterOperator extends BaseFilterOperator { } int getPriority(CompositePredicateEvaluator compositePredicateEvaluator) { - List<PredicateEvaluator> predicateEvaluators = compositePredicateEvaluator.getPredicateEvaluators(); + List<ObjectBooleanPair<PredicateEvaluator>> predicateEvaluators = + compositePredicateEvaluator.getPredicateEvaluators(); if (predicateEvaluators.size() == 1) { - switch (predicateEvaluators.get(0).getPredicateType()) { + ObjectBooleanPair<PredicateEvaluator> predicateEvaluator = predicateEvaluators.get(0); + boolean negated = predicateEvaluator.rightBoolean(); + switch (predicateEvaluator.left().getPredicateType()) { case EQ: - return 1; + return negated ? 5 : 1; case IN: - return 2; + return negated ? 4 : 2; case RANGE: return 3; - case NOT_EQ: case NOT_IN: - return 4; + return negated ? 2 : 4; + case NOT_EQ: + return negated ? 1 : 5; default: - throw new UnsupportedOperationException(); + throw new IllegalStateException(); } } else { // Process OR at last - return 5; + return 6; } } }); @@ -433,12 +448,25 @@ public class StarTreeFilterOperator extends BaseFilterOperator { * Returns the matching dictionary ids for the given composite predicate evaluator. */ private IntSet getMatchingDictIds(CompositePredicateEvaluator compositePredicateEvaluator) { - IntSet matchingDictIds = new IntOpenHashSet(); - for (PredicateEvaluator predicateEvaluator : compositePredicateEvaluator.getPredicateEvaluators()) { - for (int matchingDictId : predicateEvaluator.getMatchingDictIds()) { - matchingDictIds.add(matchingDictId); + List<ObjectBooleanPair<PredicateEvaluator>> predicateEvaluators = + compositePredicateEvaluator.getPredicateEvaluators(); + if (predicateEvaluators.size() == 1) { + ObjectBooleanPair<PredicateEvaluator> predicateEvaluator = predicateEvaluators.get(0); + if (predicateEvaluator.rightBoolean()) { + return new IntOpenHashSet(predicateEvaluator.left().getNonMatchingDictIds()); + } else { + return new IntOpenHashSet(predicateEvaluator.left().getMatchingDictIds()); } + } else { + IntSet matchingDictIds = new IntOpenHashSet(); + for (ObjectBooleanPair<PredicateEvaluator> predicateEvaluator : predicateEvaluators) { + if (predicateEvaluator.rightBoolean()) { + matchingDictIds.addAll(new IntImmutableList(predicateEvaluator.left().getNonMatchingDictIds())); + } else { + matchingDictIds.addAll(new IntImmutableList(predicateEvaluator.left().getMatchingDictIds())); + } + } + return matchingDictIds; } - return matchingDictIds; } } diff --git a/pinot-core/src/test/java/org/apache/pinot/core/startree/v2/BaseStarTreeV2Test.java b/pinot-core/src/test/java/org/apache/pinot/core/startree/v2/BaseStarTreeV2Test.java index d4e6d3da46..d323f6d550 100644 --- a/pinot-core/src/test/java/org/apache/pinot/core/startree/v2/BaseStarTreeV2Test.java +++ b/pinot-core/src/test/java/org/apache/pinot/core/startree/v2/BaseStarTreeV2Test.java @@ -103,20 +103,25 @@ abstract class BaseStarTreeV2Test<R, A> { private static final String QUERY_FILTER_AND = " WHERE d1__COLUMN_NAME = 0 AND __d2 < 10"; // StarTree supports OR predicates only on a single dimension private static final String QUERY_FILTER_OR = " WHERE d1__COLUMN_NAME > 10 OR d1__COLUMN_NAME < 50"; + private static final String QUERY_FILTER_NOT = " WHERE NOT d1__COLUMN_NAME > 10"; + private static final String QUERY_FILTER_AND_NOT = " WHERE d1__COLUMN_NAME > 10 AND NOT __d2 < 10"; + private static final String QUERY_FILTER_OR_NOT = " WHERE d1__COLUMN_NAME > 50 OR NOT d1__COLUMN_NAME > 10"; + private static final String QUERY_NOT_NOT = " WHERE NOT NOT d1__COLUMN_NAME > 10"; private static final String QUERY_FILTER_COMPLEX_OR_MULTIPLE_DIMENSIONS = - " WHERE __d2 < 95 AND (d1__COLUMN_NAME > 10 OR d1__COLUMN_NAME < 50)"; + " WHERE __d2 < 95 AND (NOT d1__COLUMN_NAME > 10 OR d1__COLUMN_NAME > 50)"; private static final String QUERY_FILTER_COMPLEX_AND_MULTIPLE_DIMENSIONS_THREE_PREDICATES = - " WHERE __d2 < 95 AND __d2 > 25 AND (d1__COLUMN_NAME > 10 OR d1__COLUMN_NAME < 50)"; + " WHERE __d2 < 95 AND NOT __d2 < 25 AND (d1__COLUMN_NAME > 10 OR d1__COLUMN_NAME < 50)"; private static final String QUERY_FILTER_COMPLEX_OR_MULTIPLE_DIMENSIONS_THREE_PREDICATES = " WHERE (__d2 > 95 OR __d2 < 25) AND (d1__COLUMN_NAME > 10 OR d1__COLUMN_NAME < 50)"; private static final String QUERY_FILTER_COMPLEX_OR_SINGLE_DIMENSION = - " WHERE d1__COLUMN_NAME = 95 AND (d1__COLUMN_NAME > 90 OR d1__COLUMN_NAME < 100)"; + " WHERE NOT d1__COLUMN_NAME = 95 AND (d1__COLUMN_NAME > 90 OR d1__COLUMN_NAME < 100)"; // Unsupported filters private static final String QUERY_FILTER_OR_MULTIPLE_DIMENSIONS = " WHERE d1__COLUMN_NAME > 10 OR __d2 < 50"; private static final String QUERY_FILTER_OR_ON_AND = " WHERE (d1__COLUMN_NAME > 10 AND d1__COLUMN_NAME < 50) OR d1__COLUMN_NAME < 50"; - private static final String QUERY_FILTER_OR_ON_NOT = " WHERE (NOT d1__COLUMN_NAME > 10) OR d1__COLUMN_NAME < 50"; + private static final String QUERY_FILTER_NOT_ON_AND = " WHERE NOT (d1__COLUMN_NAME > 10 AND d1__COLUMN_NAME < 50)"; + private static final String QUERY_FILTER_NOT_ON_OR = " WHERE NOT (d1__COLUMN_NAME < 10 OR d1__COLUMN_NAME > 50)"; // Always false filters private static final String QUERY_FILTER_ALWAYS_FALSE = " WHERE d1__COLUMN_NAME > 100"; private static final String QUERY_FILTER_OR_ALWAYS_FALSE = " WHERE d1__COLUMN_NAME > 100 OR d1__COLUMN_NAME < 0"; @@ -199,7 +204,8 @@ abstract class BaseStarTreeV2Test<R, A> { String query = String.format("SELECT %s FROM %s", _aggregation, TABLE_NAME); testUnsupportedFilter(query + QUERY_FILTER_OR_MULTIPLE_DIMENSIONS); testUnsupportedFilter(query + QUERY_FILTER_OR_ON_AND); - testUnsupportedFilter(query + QUERY_FILTER_OR_ON_NOT); + testUnsupportedFilter(query + QUERY_FILTER_NOT_ON_AND); + testUnsupportedFilter(query + QUERY_FILTER_NOT_ON_OR); testUnsupportedFilter(query + QUERY_FILTER_ALWAYS_FALSE); testUnsupportedFilter(query + QUERY_FILTER_OR_ALWAYS_FALSE); } @@ -213,6 +219,10 @@ abstract class BaseStarTreeV2Test<R, A> { testQuery(query); testQuery(query + QUERY_FILTER_AND); testQuery(query + QUERY_FILTER_OR); + testQuery(query + QUERY_FILTER_NOT); + testQuery(query + QUERY_FILTER_AND_NOT); + testQuery(query + QUERY_FILTER_OR_NOT); + testQuery(query + QUERY_NOT_NOT); testQuery(query + QUERY_FILTER_COMPLEX_OR_MULTIPLE_DIMENSIONS); testQuery(query + QUERY_FILTER_COMPLEX_AND_MULTIPLE_DIMENSIONS_THREE_PREDICATES); testQuery(query + QUERY_FILTER_COMPLEX_OR_MULTIPLE_DIMENSIONS_THREE_PREDICATES); diff --git a/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkScanDocIdIterators.java b/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkScanDocIdIterators.java index 718d99e9ba..9669b82d91 100644 --- a/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkScanDocIdIterators.java +++ b/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkScanDocIdIterators.java @@ -179,21 +179,11 @@ public class BenchmarkScanDocIdIterators { return false; } - @Override - public int getNumMatchingDictIds() { - return 0; - } - @Override public int[] getMatchingDictIds() { return new int[0]; } - @Override - public int getNumNonMatchingDictIds() { - return 0; - } - @Override public int[] getNonMatchingDictIds() { return new int[0]; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org