This is an automated email from the ASF dual-hosted git repository. richardstartin pushed a commit to branch propagate-inversion in repository https://gitbox.apache.org/repos/asf/pinot.git
commit 5dbc01fb000c0f05e9e589da1db75ee3312ff650 Author: Richard Startin <richardstar...@apache.org> AuthorDate: Wed Jan 4 09:00:31 2023 +0000 propagate inverted status with BitmapDocIdIterator to avoid expensive flipping of bitmaps --- .../operator/dociditerators/AndDocIdIterator.java | 2 +- .../dociditerators/BitmapBasedDocIdIterator.java | 7 ++ .../InvertedBitmapDocIdIterator.java | 81 +++++++++++++++++++++ .../dociditerators/ScanBasedDocIdIterator.java | 8 +++ .../pinot/core/operator/docidsets/AndDocIdSet.java | 58 ++++++++++++--- .../core/operator/docidsets/BitmapDocIdSet.java | 14 +++- .../pinot/core/operator/docidsets/NotDocIdSet.java | 1 + .../pinot/core/operator/docidsets/OrDocIdSet.java | 6 +- .../core/operator/filter/AndFilterOperator.java | 11 +-- .../operator/filter/BitmapBasedFilterOperator.java | 23 +----- .../operator/filter/CombinedFilterOperator.java | 7 +- .../core/operator/filter/FilterOperatorUtils.java | 2 +- .../pinot/core/plan/AggregationPlanNode.java | 3 +- .../dociditerators/AndDocIdIteratorTest.java | 54 ++++++++++++-- .../operator/filter/AndFilterOperatorTest.java | 84 ++++++++++++++++++++-- .../pinot/perf/BenchmarkAndDocIdIterator.java | 6 +- 16 files changed, 309 insertions(+), 58 deletions(-) diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/AndDocIdIterator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/AndDocIdIterator.java index 08bc89ad57..d32a29daa0 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/AndDocIdIterator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/AndDocIdIterator.java @@ -32,7 +32,7 @@ public final class AndDocIdIterator implements BlockDocIdIterator { private int _nextDocId = 0; - public AndDocIdIterator(BlockDocIdIterator[] docIdIterators) { + public AndDocIdIterator(BlockDocIdIterator... docIdIterators) { _docIdIterators = docIdIterators; } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/BitmapBasedDocIdIterator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/BitmapBasedDocIdIterator.java index 222642f876..f3d62ee6b0 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/BitmapBasedDocIdIterator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/BitmapBasedDocIdIterator.java @@ -31,4 +31,11 @@ public interface BitmapBasedDocIdIterator extends BlockDocIdIterator { * Returns a bitmap of the matching document ids. */ ImmutableRoaringBitmap getDocIds(); + + /** + * Returns true if the doc ids represent absence rather than presence and need to be handled specially + * */ + default boolean isInverted() { + return false; + } } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/InvertedBitmapDocIdIterator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/InvertedBitmapDocIdIterator.java new file mode 100644 index 0000000000..d932f22815 --- /dev/null +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/InvertedBitmapDocIdIterator.java @@ -0,0 +1,81 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.core.operator.dociditerators; + +import org.apache.pinot.segment.spi.Constants; +import org.roaringbitmap.PeekableIntIterator; +import org.roaringbitmap.buffer.ImmutableRoaringBitmap; + + +/** + * The iterator performs a linear pass through the underlying child iterator and returns + * the complement of the result set. + */ +public class InvertedBitmapDocIdIterator implements BitmapBasedDocIdIterator { + private final ImmutableRoaringBitmap _docIds; + private final PeekableIntIterator _docIdIterator; + private final int _lastDoc; + private int _nextDocId; + private int _nextNonMatchingDocId; + + public InvertedBitmapDocIdIterator(ImmutableRoaringBitmap docIds, int numDocs) { + _docIds = docIds; + _docIdIterator = docIds.getIntIterator(); + _nextDocId = 0; + + int currentDocIdFromChildIterator = _docIdIterator.next(); + _nextNonMatchingDocId = currentDocIdFromChildIterator == Constants.EOF ? numDocs : currentDocIdFromChildIterator; + _lastDoc = numDocs - 1; + } + + @Override + public int next() { + while (_nextDocId == _nextNonMatchingDocId && _docIdIterator.hasNext()) { + _nextDocId++; + int nextNonMatchingDocId = _docIdIterator.next(); + _nextNonMatchingDocId = nextNonMatchingDocId == Constants.EOF ? _lastDoc : nextNonMatchingDocId; + } + if (_nextDocId >= _lastDoc) { + return Constants.EOF; + } + return _nextDocId++; + } + + @Override + public int advance(int targetDocId) { + _nextDocId = targetDocId; + if (targetDocId > _nextNonMatchingDocId) { + _docIdIterator.advanceIfNeeded(targetDocId); + _nextNonMatchingDocId = _docIdIterator.hasNext() + ? _docIdIterator.next() + : _lastDoc; + } + return next(); + } + + @Override + public ImmutableRoaringBitmap getDocIds() { + return _docIds; + } + + @Override + public boolean isInverted() { + return true; + } +} diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/ScanBasedDocIdIterator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/ScanBasedDocIdIterator.java index 1ed890cc83..cf3945ffab 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/ScanBasedDocIdIterator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/dociditerators/ScanBasedDocIdIterator.java @@ -39,6 +39,14 @@ public interface ScanBasedDocIdIterator extends BlockDocIdIterator { */ MutableRoaringBitmap applyAnd(ImmutableRoaringBitmap docIds); + /** + * Applies ANDNOT operation to the given bitmap of document ids, returns a bitmap of the matching document ids. + */ + default MutableRoaringBitmap applyAndNot(ImmutableRoaringBitmap docIds, int numDocs) { + // FIXME we're scanning anyway, so flipping a bitmap doesn't seem quite so bad, but this can be improved + return applyAnd(ImmutableRoaringBitmap.flip(docIds, 0L, numDocs)); + } + /** * Returns the number of entries (SV value contains one entry, MV value contains multiple entries) scanned during the * iteration. This method should be called after the iteration is done. diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/docidsets/AndDocIdSet.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/docidsets/AndDocIdSet.java index 8b43b7d340..55f97a1847 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/docidsets/AndDocIdSet.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/docidsets/AndDocIdSet.java @@ -27,6 +27,7 @@ import org.apache.pinot.common.utils.config.QueryOptionsUtils; import org.apache.pinot.core.common.BlockDocIdIterator; import org.apache.pinot.core.operator.dociditerators.AndDocIdIterator; import org.apache.pinot.core.operator.dociditerators.BitmapBasedDocIdIterator; +import org.apache.pinot.core.operator.dociditerators.InvertedBitmapDocIdIterator; import org.apache.pinot.core.operator.dociditerators.RangelessBitmapDocIdIterator; import org.apache.pinot.core.operator.dociditerators.ScanBasedDocIdIterator; import org.apache.pinot.core.operator.dociditerators.SortedDocIdIterator; @@ -56,11 +57,13 @@ import org.roaringbitmap.buffer.MutableRoaringBitmap; public final class AndDocIdSet implements FilterBlockDocIdSet { private final List<FilterBlockDocIdSet> _docIdSets; private final boolean _cardinalityBasedRankingForScan; + private final int _numDocs; - public AndDocIdSet(List<FilterBlockDocIdSet> docIdSets, Map<String, String> queryOptions) { + public AndDocIdSet(List<FilterBlockDocIdSet> docIdSets, Map<String, String> queryOptions, int numDocs) { _docIdSets = docIdSets; _cardinalityBasedRankingForScan = !MapUtils.isEmpty(queryOptions) && QueryOptionsUtils.isAndScanReorderingEnabled(queryOptions); + _numDocs = numDocs; } @Override @@ -88,8 +91,16 @@ public final class AndDocIdSet implements FilterBlockDocIdSet { } // evaluate the bitmaps in the order of the lowest matching num docIds comes first, so that we minimize the number - // of containers (range) for comparison from the beginning, as will minimize the effort of bitmap AND application - bitmapBasedDocIdIterators.sort(Comparator.comparing(x -> x.getDocIds().getCardinality())); + // of containers (range) for comparison from the beginning, as will minimize the effort of bitmap AND application. + // We also put inverted iterators last to avoid flipping them wherever possible, but if we have to flip one, we + // want it to be the largest one + bitmapBasedDocIdIterators.sort(Comparator.comparing(x -> { + int cardinality = x.getDocIds().getCardinality(); + if (x.isInverted()) { + return Integer.MAX_VALUE - cardinality; + } + return -cardinality; + })); // Evaluate the scan based operator with the highest cardinality coming first, this potentially reduce the range of // scanning from the beginning. Automatically place N/A cardinality column (negative infinity) to the back as we @@ -113,6 +124,7 @@ public final class AndDocIdSet implements FilterBlockDocIdSet { // BlockDocIdIterator, directly return the merged RangelessBitmapDocIdIterator; otherwise, construct and return // an AndDocIdIterator with the merged RangelessBitmapDocIdIterator and the remaining BlockDocIdIterators. + boolean inverted = false; ImmutableRoaringBitmap docIds; if (numSortedDocIdIterators > 0) { List<IntPair> docIdRanges; @@ -132,29 +144,53 @@ public final class AndDocIdSet implements FilterBlockDocIdSet { mutableDocIds.add(docIdRange.getLeft(), docIdRange.getRight() + 1L); } for (BitmapBasedDocIdIterator bitmapBasedDocIdIterator : bitmapBasedDocIdIterators) { - mutableDocIds.and(bitmapBasedDocIdIterator.getDocIds()); + if (bitmapBasedDocIdIterator.isInverted()) { + mutableDocIds.andNot(bitmapBasedDocIdIterator.getDocIds()); + } else { + mutableDocIds.and(bitmapBasedDocIdIterator.getDocIds()); + } } docIds = mutableDocIds; } else { if (numBitmapBasedDocIdIterators == 1) { - docIds = bitmapBasedDocIdIterators.get(0).getDocIds(); + BitmapBasedDocIdIterator bitmapBasedDocIdIterator = bitmapBasedDocIdIterators.get(0); + docIds = bitmapBasedDocIdIterator.getDocIds(); + inverted = bitmapBasedDocIdIterator.isInverted(); } else { - MutableRoaringBitmap mutableDocIds = bitmapBasedDocIdIterators.get(0).getDocIds().toMutableRoaringBitmap(); + BitmapBasedDocIdIterator firstBitmapBasedDocIdIterator = bitmapBasedDocIdIterators.get(0); + MutableRoaringBitmap mutableDocIds = firstBitmapBasedDocIdIterator.getDocIds().toMutableRoaringBitmap(); + if (firstBitmapBasedDocIdIterator.isInverted()) { + // because of the sort order, this means all the filters are inverted, so this may be avoidable, + // but it's also the largest bitmap so we should only create a small bitmap + mutableDocIds.flip(0L, _numDocs); + } for (int i = 1; i < numBitmapBasedDocIdIterators; i++) { - mutableDocIds.and(bitmapBasedDocIdIterators.get(i).getDocIds()); + BitmapBasedDocIdIterator bitmapBasedDocIdIterator = bitmapBasedDocIdIterators.get(i); + if (bitmapBasedDocIdIterator.isInverted()) { + mutableDocIds.andNot(bitmapBasedDocIdIterators.get(i).getDocIds()); + } else { + mutableDocIds.and(bitmapBasedDocIdIterators.get(i).getDocIds()); + } } docIds = mutableDocIds; } } for (ScanBasedDocIdIterator scanBasedDocIdIterator : scanBasedDocIdIterators) { - docIds = scanBasedDocIdIterator.applyAnd(docIds); + if (inverted) { + docIds = scanBasedDocIdIterator.applyAndNot(docIds, _numDocs); + inverted = false; + } else { + docIds = scanBasedDocIdIterator.applyAnd(docIds); + } } - RangelessBitmapDocIdIterator rangelessBitmapDocIdIterator = new RangelessBitmapDocIdIterator(docIds); + BitmapBasedDocIdIterator lastBitmapDocIdIterator = inverted + ? new InvertedBitmapDocIdIterator(docIds, _numDocs) + : new RangelessBitmapDocIdIterator(docIds); if (numRemainingDocIdIterators == 0) { - return rangelessBitmapDocIdIterator; + return lastBitmapDocIdIterator; } else { BlockDocIdIterator[] docIdIterators = new BlockDocIdIterator[numRemainingDocIdIterators + 1]; - docIdIterators[0] = rangelessBitmapDocIdIterator; + docIdIterators[0] = lastBitmapDocIdIterator; for (int i = 0; i < numRemainingDocIdIterators; i++) { docIdIterators[i + 1] = remainingDocIdIterators.get(i); } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/docidsets/BitmapDocIdSet.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/docidsets/BitmapDocIdSet.java index a69ac0066a..8ebf193c48 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/docidsets/BitmapDocIdSet.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/docidsets/BitmapDocIdSet.java @@ -18,15 +18,23 @@ */ package org.apache.pinot.core.operator.docidsets; +import org.apache.pinot.core.operator.dociditerators.BitmapBasedDocIdIterator; import org.apache.pinot.core.operator.dociditerators.BitmapDocIdIterator; +import org.apache.pinot.core.operator.dociditerators.InvertedBitmapDocIdIterator; import org.roaringbitmap.buffer.ImmutableRoaringBitmap; public class BitmapDocIdSet implements FilterBlockDocIdSet { - private final BitmapDocIdIterator _iterator; + private final BitmapBasedDocIdIterator _iterator; public BitmapDocIdSet(ImmutableRoaringBitmap docIds, int numDocs) { - _iterator = new BitmapDocIdIterator(docIds, numDocs); + this(docIds, numDocs, false); + } + + public BitmapDocIdSet(ImmutableRoaringBitmap docIds, int numDocs, boolean inverted) { + _iterator = inverted + ? new InvertedBitmapDocIdIterator(docIds, numDocs) + : new BitmapDocIdIterator(docIds, numDocs); } public BitmapDocIdSet(BitmapDocIdIterator iterator) { @@ -34,7 +42,7 @@ public class BitmapDocIdSet implements FilterBlockDocIdSet { } @Override - public BitmapDocIdIterator iterator() { + public BitmapBasedDocIdIterator iterator() { return _iterator; } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/docidsets/NotDocIdSet.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/docidsets/NotDocIdSet.java index 1fe6462b68..0c5b2a1085 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/docidsets/NotDocIdSet.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/docidsets/NotDocIdSet.java @@ -33,6 +33,7 @@ public class NotDocIdSet implements FilterBlockDocIdSet { @Override public BlockDocIdIterator iterator() { + // FIXME if the child set is an inverted bitmap, we could just invert it again return new NotDocIdIterator(_childDocIdSet.iterator(), _numDocs); } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/docidsets/OrDocIdSet.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/docidsets/OrDocIdSet.java index 6ead802acf..fa60f9a07b 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/docidsets/OrDocIdSet.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/docidsets/OrDocIdSet.java @@ -88,7 +88,11 @@ public final class OrDocIdSet implements FilterBlockDocIdSet { } } for (BitmapBasedDocIdIterator bitmapBasedDocIdIterator : bitmapBasedDocIdIterators) { - docIds.or(bitmapBasedDocIdIterator.getDocIds()); + if (bitmapBasedDocIdIterator.isInverted()) { + docIds.orNot(bitmapBasedDocIdIterator.getDocIds(), _numDocs); + } else { + docIds.or(bitmapBasedDocIdIterator.getDocIds()); + } } BitmapDocIdIterator bitmapDocIdIterator = new BitmapDocIdIterator(docIds, _numDocs); int numRemainingDocIdIterators = remainingDocIdIterators.size(); diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/AndFilterOperator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/AndFilterOperator.java index de2062fd6c..b43ce62c24 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/AndFilterOperator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/AndFilterOperator.java @@ -36,13 +36,16 @@ public class AndFilterOperator extends BaseFilterOperator { private final List<BaseFilterOperator> _filterOperators; private final Map<String, String> _queryOptions; - public AndFilterOperator(List<BaseFilterOperator> filterOperators, Map<String, String> queryOptions) { + private final int _numDocs; + + public AndFilterOperator(List<BaseFilterOperator> filterOperators, Map<String, String> queryOptions, int numDocs) { _filterOperators = filterOperators; _queryOptions = queryOptions; + _numDocs = numDocs; } - public AndFilterOperator(List<BaseFilterOperator> filterOperators) { - this(filterOperators, null); + public AndFilterOperator(List<BaseFilterOperator> filterOperators, int numDocs) { + this(filterOperators, null, numDocs); } @Override @@ -52,7 +55,7 @@ public class AndFilterOperator extends BaseFilterOperator { for (BaseFilterOperator filterOperator : _filterOperators) { filterBlockDocIdSets.add(filterOperator.nextBlock().getBlockDocIdSet()); } - return new FilterBlock(new AndDocIdSet(filterBlockDocIdSets, _queryOptions)); + return new FilterBlock(new AndDocIdSet(filterBlockDocIdSets, _queryOptions, _numDocs)); } @Override diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/BitmapBasedFilterOperator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/BitmapBasedFilterOperator.java index bd8fb41ce1..5abcafec12 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/BitmapBasedFilterOperator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/BitmapBasedFilterOperator.java @@ -65,11 +65,7 @@ public class BitmapBasedFilterOperator extends BaseFilterOperator { @Override protected FilterBlock getNextBlock() { if (_docIds != null) { - if (_exclusive) { - return new FilterBlock(new BitmapDocIdSet(ImmutableRoaringBitmap.flip(_docIds, 0L, _numDocs), _numDocs)); - } else { - return new FilterBlock(new BitmapDocIdSet(_docIds, _numDocs)); - } + return new FilterBlock(new BitmapDocIdSet(_docIds, _numDocs, _exclusive)); } int[] dictIds = _exclusive ? _predicateEvaluator.getNonMatchingDictIds() : _predicateEvaluator.getMatchingDictIds(); @@ -79,33 +75,20 @@ public class BitmapBasedFilterOperator extends BaseFilterOperator { } if (numDictIds == 1) { ImmutableRoaringBitmap docIds = _invertedIndexReader.getDocIds(dictIds[0]); - if (_exclusive) { - if (docIds instanceof MutableRoaringBitmap) { - MutableRoaringBitmap mutableRoaringBitmap = (MutableRoaringBitmap) docIds; - mutableRoaringBitmap.flip(0L, _numDocs); - return new FilterBlock(new BitmapDocIdSet(mutableRoaringBitmap, _numDocs)); - } else { - return new FilterBlock(new BitmapDocIdSet(ImmutableRoaringBitmap.flip(docIds, 0L, _numDocs), _numDocs)); - } - } else { - return new FilterBlock(new BitmapDocIdSet(docIds, _numDocs)); - } + return new FilterBlock(new BitmapDocIdSet(docIds, _numDocs, _exclusive)); } else { ImmutableRoaringBitmap[] bitmaps = new ImmutableRoaringBitmap[numDictIds]; for (int i = 0; i < numDictIds; i++) { bitmaps[i] = _invertedIndexReader.getDocIds(dictIds[i]); } MutableRoaringBitmap docIds = ImmutableRoaringBitmap.or(bitmaps); - if (_exclusive) { - docIds.flip(0L, _numDocs); - } InvocationRecording recording = Tracing.activeRecording(); if (recording.isEnabled()) { recording.setColumnName(_predicateEvaluator.getPredicate().getLhs().getIdentifier()); recording.setNumDocsMatchingAfterFilter(docIds.getCardinality()); recording.setFilter(FilterType.INDEX, String.valueOf(_predicateEvaluator.getPredicateType())); } - return new FilterBlock(new BitmapDocIdSet(docIds, _numDocs)); + return new FilterBlock(new BitmapDocIdSet(docIds, _numDocs, _exclusive)); } } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/CombinedFilterOperator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/CombinedFilterOperator.java index 89b856d9b6..ccd10fd1e4 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/CombinedFilterOperator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/CombinedFilterOperator.java @@ -38,12 +38,14 @@ public class CombinedFilterOperator extends BaseFilterOperator { private final BaseFilterOperator _mainFilterOperator; private final BaseFilterOperator _subFilterOperator; private final Map<String, String> _queryOptions; + private final int _numDocs; public CombinedFilterOperator(BaseFilterOperator mainFilterOperator, BaseFilterOperator subFilterOperator, - Map<String, String> queryOptions) { + Map<String, String> queryOptions, int numDocs) { _mainFilterOperator = mainFilterOperator; _subFilterOperator = subFilterOperator; _queryOptions = queryOptions; + _numDocs = numDocs; } @Override @@ -61,6 +63,7 @@ public class CombinedFilterOperator extends BaseFilterOperator { Tracing.activeRecording().setNumChildren(2); FilterBlockDocIdSet mainFilterDocIdSet = _mainFilterOperator.nextBlock().getNonScanFilterBLockDocIdSet(); FilterBlockDocIdSet subFilterDocIdSet = _subFilterOperator.nextBlock().getBlockDocIdSet(); - return new FilterBlock(new AndDocIdSet(Arrays.asList(mainFilterDocIdSet, subFilterDocIdSet), _queryOptions)); + return new FilterBlock(new AndDocIdSet(Arrays.asList(mainFilterDocIdSet, subFilterDocIdSet), _queryOptions, + _numDocs)); } } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/FilterOperatorUtils.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/FilterOperatorUtils.java index c343dcea88..9328bf732c 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/FilterOperatorUtils.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/FilterOperatorUtils.java @@ -107,7 +107,7 @@ public class FilterOperatorUtils { } else { // Return the AND filter operator with re-ordered child filter operators FilterOperatorUtils.reorderAndFilterChildOperators(queryContext, childFilterOperators); - return new AndFilterOperator(childFilterOperators, queryContext.getQueryOptions()); + return new AndFilterOperator(childFilterOperators, queryContext.getQueryOptions(), numDocs); } } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/plan/AggregationPlanNode.java b/pinot-core/src/main/java/org/apache/pinot/core/plan/AggregationPlanNode.java index 58d74fb00f..fb3c7acde8 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/plan/AggregationPlanNode.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/plan/AggregationPlanNode.java @@ -116,7 +116,8 @@ public class AggregationPlanNode implements PlanNode { } Pair<FilterPlanNode, BaseFilterOperator> pair = buildFilterOperator(currentFilterExpression); BaseFilterOperator wrappedFilterOperator = - new CombinedFilterOperator(mainPredicateFilterOperator, pair.getRight(), _queryContext.getQueryOptions()); + new CombinedFilterOperator(mainPredicateFilterOperator, pair.getRight(), _queryContext.getQueryOptions(), + numTotalDocs); TransformOperator newTransformOperator = buildTransformOperatorForFilteredAggregates(wrappedFilterOperator); // For each transform operator, associate it with the underlying expression. This allows // fetching the relevant TransformOperator when resolving blocks during aggregation diff --git a/pinot-core/src/test/java/org/apache/pinot/core/operator/dociditerators/AndDocIdIteratorTest.java b/pinot-core/src/test/java/org/apache/pinot/core/operator/dociditerators/AndDocIdIteratorTest.java index c556114879..eb80820393 100644 --- a/pinot-core/src/test/java/org/apache/pinot/core/operator/dociditerators/AndDocIdIteratorTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/core/operator/dociditerators/AndDocIdIteratorTest.java @@ -18,8 +18,8 @@ */ package org.apache.pinot.core.operator.dociditerators; -import org.apache.pinot.core.common.BlockDocIdIterator; import org.apache.pinot.segment.spi.Constants; +import org.roaringbitmap.buffer.ImmutableRoaringBitmap; import org.roaringbitmap.buffer.MutableRoaringBitmap; import org.testng.annotations.Test; @@ -41,10 +41,9 @@ public class AndDocIdIteratorTest { bitmap2.add(docIds2); MutableRoaringBitmap bitmap3 = new MutableRoaringBitmap(); bitmap3.add(docIds3); - AndDocIdIterator andDocIdIterator = new AndDocIdIterator(new BlockDocIdIterator[]{ - new RangelessBitmapDocIdIterator(bitmap1), new RangelessBitmapDocIdIterator(bitmap2), - new RangelessBitmapDocIdIterator(bitmap3) - }); + AndDocIdIterator andDocIdIterator = + new AndDocIdIterator(new RangelessBitmapDocIdIterator(bitmap1), new RangelessBitmapDocIdIterator(bitmap2), + new RangelessBitmapDocIdIterator(bitmap3)); assertEquals(andDocIdIterator.next(), 2); assertEquals(andDocIdIterator.next(), 7); @@ -53,4 +52,49 @@ public class AndDocIdIteratorTest { assertEquals(andDocIdIterator.next(), 20); assertEquals(andDocIdIterator.next(), Constants.EOF); } + + @Test + public void testAndDocIdIteratorWithInvertedChildren() { + int numDocs = 20; + // not = [4, 6, 8, 11, 14, 17, 19] + int[] docIds1 = new int[]{0, 1, 2, 3, 5, 7, 10, 12, 13, 15, 16, 18, 20}; + // not = [0, 3, 8, 10, 14, 17, 18] + int[] docIds2 = new int[]{1, 2, 4, 5, 6, 7, 9, 11, 12, 13, 15, 16, 19, 20}; + // not = [1, 5, 6, 9, 12, 14, 17, 18] + int[] docIds3 = new int[]{0, 2, 3, 4, 7, 8, 10, 11, 13, 15, 16, 19, 20}; + int[] expected = new int[]{14, 17}; + + ImmutableRoaringBitmap bitmap1 = ImmutableRoaringBitmap.bitmapOf(docIds1); + ImmutableRoaringBitmap bitmap2 = ImmutableRoaringBitmap.bitmapOf(docIds2); + ImmutableRoaringBitmap bitmap3 = ImmutableRoaringBitmap.bitmapOf(docIds3); + AndDocIdIterator andDocIdIterator = new AndDocIdIterator(new InvertedBitmapDocIdIterator(bitmap1, numDocs), + new InvertedBitmapDocIdIterator(bitmap2, numDocs), new InvertedBitmapDocIdIterator(bitmap3, numDocs)); + + for (int docId : expected) { + assertEquals(andDocIdIterator.next(), docId); + } + assertEquals(andDocIdIterator.next(), Constants.EOF); + } + + @Test + public void testAndDocIdIteratorWithMixedChildren() { + int numDocs = 20; + int[] docIds1 = new int[]{0, 1, 2, 3, 5, 7, 10, 12, 13, 14, 15, 16, 17, 18, 20}; + // not = [0, 3, 8, 10, 14, 17, 18] + int[] docIds2 = new int[]{1, 2, 4, 5, 6, 7, 9, 11, 12, 13, 15, 16, 19, 20}; + // not = [1, 5, 6, 9, 12, 14, 17, 18] + int[] docIds3 = new int[]{0, 2, 3, 4, 7, 8, 10, 11, 13, 15, 16, 19, 20}; + int[] expected = new int[]{14, 17, 18}; + + ImmutableRoaringBitmap bitmap1 = ImmutableRoaringBitmap.bitmapOf(docIds1); + ImmutableRoaringBitmap bitmap2 = ImmutableRoaringBitmap.bitmapOf(docIds2); + ImmutableRoaringBitmap bitmap3 = ImmutableRoaringBitmap.bitmapOf(docIds3); + AndDocIdIterator andDocIdIterator = new AndDocIdIterator(new BitmapDocIdIterator(bitmap1, numDocs), + new InvertedBitmapDocIdIterator(bitmap2, numDocs), new InvertedBitmapDocIdIterator(bitmap3, numDocs)); + + for (int docId : expected) { + assertEquals(andDocIdIterator.next(), docId); + } + assertEquals(andDocIdIterator.next(), Constants.EOF); + } } diff --git a/pinot-core/src/test/java/org/apache/pinot/core/operator/filter/AndFilterOperatorTest.java b/pinot-core/src/test/java/org/apache/pinot/core/operator/filter/AndFilterOperatorTest.java index 6481ae2256..deaf3a0203 100644 --- a/pinot-core/src/test/java/org/apache/pinot/core/operator/filter/AndFilterOperatorTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/core/operator/filter/AndFilterOperatorTest.java @@ -22,6 +22,7 @@ import java.util.ArrayList; import java.util.List; import org.apache.pinot.core.common.BlockDocIdIterator; import org.apache.pinot.segment.spi.Constants; +import org.roaringbitmap.buffer.ImmutableRoaringBitmap; import org.roaringbitmap.buffer.MutableRoaringBitmap; import org.testng.Assert; import org.testng.annotations.Test; @@ -37,7 +38,7 @@ public class AndFilterOperatorTest { List<BaseFilterOperator> operators = new ArrayList<>(); operators.add(new TestFilterOperator(docIds1)); operators.add(new TestFilterOperator(docIds2)); - AndFilterOperator andOperator = new AndFilterOperator(operators); + AndFilterOperator andOperator = new AndFilterOperator(operators, 40); BlockDocIdIterator iterator = andOperator.nextBlock().getBlockDocIdSet().iterator(); Assert.assertEquals(iterator.next(), 3); @@ -55,7 +56,7 @@ public class AndFilterOperatorTest { operators.add(new TestFilterOperator(docIds1)); operators.add(new TestFilterOperator(docIds2)); operators.add(new TestFilterOperator(docIds3)); - AndFilterOperator andOperator = new AndFilterOperator(operators); + AndFilterOperator andOperator = new AndFilterOperator(operators, 40); BlockDocIdIterator iterator = andOperator.nextBlock().getBlockDocIdSet().iterator(); Assert.assertEquals(iterator.next(), 3); @@ -72,12 +73,12 @@ public class AndFilterOperatorTest { List<BaseFilterOperator> childOperators = new ArrayList<>(); childOperators.add(new TestFilterOperator(docIds1)); childOperators.add(new TestFilterOperator(docIds2)); - AndFilterOperator childAndOperator = new AndFilterOperator(childOperators); + AndFilterOperator childAndOperator = new AndFilterOperator(childOperators, 40); List<BaseFilterOperator> operators = new ArrayList<>(); operators.add(childAndOperator); operators.add(new TestFilterOperator(docIds3)); - AndFilterOperator andOperator = new AndFilterOperator(operators); + AndFilterOperator andOperator = new AndFilterOperator(operators, 40); BlockDocIdIterator iterator = andOperator.nextBlock().getBlockDocIdSet().iterator(); Assert.assertEquals(iterator.next(), 3); @@ -112,8 +113,8 @@ public class AndFilterOperatorTest { numDocs)); } - AndFilterOperator andFilterOperator1 = new AndFilterOperator(childOperators1); - AndFilterOperator andFilterOperator2 = new AndFilterOperator(childOperators2); + AndFilterOperator andFilterOperator1 = new AndFilterOperator(childOperators1, numDocs); + AndFilterOperator andFilterOperator2 = new AndFilterOperator(childOperators2, numDocs); BlockDocIdIterator iterator1 = andFilterOperator1.getNextBlock().getBlockDocIdSet().iterator(); BlockDocIdIterator iterator2 = andFilterOperator2.getNextBlock().getBlockDocIdSet().iterator(); Assert.assertEquals(iterator1.next(), 0); @@ -145,7 +146,7 @@ public class AndFilterOperatorTest { List<BaseFilterOperator> operators = new ArrayList<>(); operators.add(childOrOperator); operators.add(new TestFilterOperator(docIds1)); - AndFilterOperator andOperator = new AndFilterOperator(operators); + AndFilterOperator andOperator = new AndFilterOperator(operators, 40); BlockDocIdIterator iterator = andOperator.nextBlock().getBlockDocIdSet().iterator(); Assert.assertEquals(iterator.next(), 2); @@ -154,4 +155,73 @@ public class AndFilterOperatorTest { Assert.assertEquals(iterator.next(), 28); Assert.assertEquals(iterator.next(), Constants.EOF); } + + + @Test + public void testComplexOrWithNot() { + int[] include1 = new int[]{2, 3, 6, 10, 15, 16, 28}; + int[] include2 = new int[]{3, 6, 8, 20, 28}; + int[] exclude1 = new int[]{1, 2, 3, 6, 30}; // negation = {4, 5, 7, ... 29} + int[] expected = new int[]{3, 6, 10, 15, 16, 28}; + + List<BaseFilterOperator> childOperators = new ArrayList<>(); + childOperators.add(new BitmapBasedFilterOperator(ImmutableRoaringBitmap.bitmapOf(exclude1), true, 40)); + childOperators.add(new BitmapBasedFilterOperator(ImmutableRoaringBitmap.bitmapOf(include2), false, 40)); + OrFilterOperator childOrOperator = new OrFilterOperator(childOperators, 40); + + List<BaseFilterOperator> operators = new ArrayList<>(); + operators.add(childOrOperator); + operators.add(new TestFilterOperator(include1)); + AndFilterOperator andOperator = new AndFilterOperator(operators, 40); + + BlockDocIdIterator iterator = andOperator.nextBlock().getBlockDocIdSet().iterator(); + for (int j : expected) { + Assert.assertEquals(iterator.next(), j); + } + Assert.assertEquals(iterator.next(), Constants.EOF); + } + + @Test + public void testComplexAndWithNot() { + int[] include1 = new int[]{2, 3, 6, 10, 15, 16, 28}; + int[] include2 = new int[]{3, 6, 8, 20, 28}; + int[] exclude1 = new int[]{1, 2, 3, 6, 30}; // negation = {4, 5, 7, ... 29} + int[] expected = new int[]{28}; + + List<BaseFilterOperator> operators = new ArrayList<>(); + operators.add(new BitmapBasedFilterOperator(ImmutableRoaringBitmap.bitmapOf(exclude1), true, 40)); + operators.add(new BitmapBasedFilterOperator(ImmutableRoaringBitmap.bitmapOf(include2), false, 40)); + operators.add(new TestFilterOperator(include1)); + AndFilterOperator andOperator = new AndFilterOperator(operators, 40); + + BlockDocIdIterator iterator = andOperator.nextBlock().getBlockDocIdSet().iterator(); + for (int j : expected) { + Assert.assertEquals(iterator.next(), j); + } + Assert.assertEquals(iterator.next(), Constants.EOF); + } + + @Test + public void testAndWithNot() { + int[] include1 = new int[]{2, 3, 6, 10, 15, 16, 28}; + int[] include2 = new int[]{3, 6, 8, 20, 28}; + int[] exclude1 = new int[]{1, 2, 3, 6, 30}; // negation = {4, 5, 7, ... 29} + int[] expected = new int[]{28}; + + List<BaseFilterOperator> childOperators = new ArrayList<>(); + childOperators.add(new BitmapBasedFilterOperator(ImmutableRoaringBitmap.bitmapOf(exclude1), true, 40)); + childOperators.add(new BitmapBasedFilterOperator(ImmutableRoaringBitmap.bitmapOf(include2), false, 40)); + AndFilterOperator childAndOperator = new AndFilterOperator(childOperators, 40); + + List<BaseFilterOperator> operators = new ArrayList<>(); + operators.add(childAndOperator); + operators.add(new TestFilterOperator(include1)); + AndFilterOperator andOperator = new AndFilterOperator(operators, 40); + + BlockDocIdIterator iterator = andOperator.nextBlock().getBlockDocIdSet().iterator(); + for (int j : expected) { + Assert.assertEquals(iterator.next(), j); + } + Assert.assertEquals(iterator.next(), Constants.EOF); + } } diff --git a/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkAndDocIdIterator.java b/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkAndDocIdIterator.java index 6982dc597b..7979f42e35 100644 --- a/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkAndDocIdIterator.java +++ b/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkAndDocIdIterator.java @@ -61,7 +61,8 @@ public class BenchmarkAndDocIdIterator { @OutputTimeUnit(TimeUnit.MILLISECONDS) public void benchAndFilterOperator(MyState myState, Blackhole bh) { for (int i = 0; i < 100; i++) { - bh.consume(new AndFilterOperator(myState._childOperators).nextBlock().getBlockDocIdSet().iterator()); + bh.consume(new AndFilterOperator(myState._childOperators, NUM_DOCS).nextBlock() + .getBlockDocIdSet().iterator()); } } @@ -70,7 +71,8 @@ public class BenchmarkAndDocIdIterator { @OutputTimeUnit(TimeUnit.MILLISECONDS) public void benchAndFilterOperatorDegenerate(MyState myState, Blackhole bh) { for (int i = 0; i < 100; i++) { - bh.consume(new AndFilterOperator(myState._childOperatorsNoOrdering).nextBlock().getBlockDocIdSet().iterator()); + bh.consume(new AndFilterOperator(myState._childOperatorsNoOrdering, NUM_DOCS).nextBlock() + .getBlockDocIdSet().iterator()); } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org