Jackie-Jiang commented on a change in pull request #8411: URL: https://github.com/apache/pinot/pull/8411#discussion_r837903870
########## File path: pinot-core/src/main/java/org/apache/pinot/core/operator/filter/AndFilterOperator.java ########## @@ -45,6 +47,28 @@ protected FilterBlock getNextBlock() { return new FilterBlock(new AndDocIdSet(filterBlockDocIdSets)); } + @Override + public boolean canOptimizeCount() { + boolean allChildrenCanProduceBitmaps = true; + for (BaseFilterOperator child : _filterOperators) { + allChildrenCanProduceBitmaps &= child.canProduceBitmaps(); + } + return allChildrenCanProduceBitmaps; + } + + @Override + public int getNumMatchingDocs() { Review comment: (minor) May add an `assert _filterOperators.size() > 1` for safety and readability ########## File path: pinot-core/src/main/java/org/apache/pinot/core/operator/filter/BitmapBasedFilterOperator.java ########## @@ -100,6 +101,67 @@ protected FilterBlock getNextBlock() { } } + @Override + public boolean canOptimizeCount() { + return true; + } + + @Override + public int getNumMatchingDocs() { + int count = 0; + if (_docIds == null) { + int[] dictIds = _exclusive + ? _predicateEvaluator.getNonMatchingDictIds() + : _predicateEvaluator.getMatchingDictIds(); + switch (dictIds.length) { + case 0: + break; + case 1: { + count = _invertedIndexReader.getDocIds(dictIds[0]).getCardinality(); + break; + } + case 2: { + count = ImmutableRoaringBitmap.orCardinality(_invertedIndexReader.getDocIds(dictIds[0]), + _invertedIndexReader.getDocIds(dictIds[1])); + break; + } + default: { + // this could be optimised if the bitmaps are known to be disjoint (as in a single value bitmap index) + MutableRoaringBitmap bitmap = new MutableRoaringBitmap(); + for (int dictId : dictIds) { + bitmap.or(_invertedIndexReader.getDocIds(dictId)); + } + count = bitmap.getCardinality(); + break; + } + } + } else { + count = _docIds.getCardinality(); + } + return _exclusive ? _numDocs - count : count; + } + + @Override + public boolean canProduceBitmaps() { + return true; + } + + @Override + public BitmapCollection getBitmaps() { + if (_docIds == null) { + int[] dictIds = _exclusive + ? _predicateEvaluator.getNonMatchingDictIds() + : _predicateEvaluator.getMatchingDictIds(); + ImmutableRoaringBitmap[] bitmaps = new ImmutableRoaringBitmap[dictIds.length]; + for (int i = 0; i < dictIds.length; i++) { + bitmaps[i] = (_invertedIndexReader.getDocIds(dictIds[i])); Review comment: (minor) ```suggestion bitmaps[i] = _invertedIndexReader.getDocIds(dictIds[i]); ``` ########## File path: pinot-core/src/main/java/org/apache/pinot/core/operator/filter/BitmapCollection.java ########## @@ -0,0 +1,103 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.core.operator.filter; + +import org.roaringbitmap.buffer.BufferFastAggregation; +import org.roaringbitmap.buffer.ImmutableRoaringBitmap; +import org.roaringbitmap.buffer.MutableRoaringBitmap; + + +public class BitmapCollection { + private final int _numDocs; + private boolean _inverted; + private final ImmutableRoaringBitmap[] _bitmaps; + + public BitmapCollection(int numDocs, boolean inverted, ImmutableRoaringBitmap... bitmaps) { + _numDocs = numDocs; + _inverted = inverted; + _bitmaps = bitmaps; + } + + public boolean isInverted() { + return _inverted; + } + + public ImmutableRoaringBitmap[] getBitmaps() { + return _bitmaps; + } + + public BitmapCollection invert() { + _inverted = !_inverted; + return this; + } + + public int andCardinality(BitmapCollection other) { + if (!_inverted) { + if (!other._inverted) { + return ImmutableRoaringBitmap.andCardinality(reduceInternal(), other.reduceInternal()); + } + return ImmutableRoaringBitmap.andNotCardinality(reduceInternal(), other.reduceInternal()); + } else { + if (!other._inverted) { + return ImmutableRoaringBitmap.andNotCardinality(other.reduceInternal(), reduceInternal()); + } + return _numDocs - ImmutableRoaringBitmap.orCardinality(reduceInternal(), other.reduceInternal()); + } + } + + public int orCardinality(BitmapCollection other) { + if (!_inverted) { + if (!other._inverted) { + return ImmutableRoaringBitmap.orCardinality(reduceInternal(), other.reduceInternal()); + } + ImmutableRoaringBitmap reduced = other.reduceInternal(); + return _numDocs - reduced.getCardinality() - ImmutableRoaringBitmap.andCardinality(reduceInternal(), reduced); + } else { + if (!other._inverted) { + ImmutableRoaringBitmap reduced = reduceInternal(); + return _numDocs - reduced.getCardinality() + + ImmutableRoaringBitmap.andCardinality(other.reduceInternal(), reduced); + } + return _numDocs - ImmutableRoaringBitmap.andCardinality(reduceInternal(), other.reduceInternal()); + } + } + + private ImmutableRoaringBitmap reduceInternal() { + if (_bitmaps.length == 1) { + return _bitmaps[0]; + } + return BufferFastAggregation.or(_bitmaps); + } + + public ImmutableRoaringBitmap reduce() { + if (!_inverted) { + return reduceInternal(); + } + return invertedOr(); + } + + private MutableRoaringBitmap invertedOr() { Review comment: Consider specialize empty and single element `_bitmaps` ########## File path: pinot-core/src/main/java/org/apache/pinot/core/operator/filter/SortedIndexBasedFilterOperator.java ########## @@ -132,6 +133,92 @@ protected FilterBlock getNextBlock() { } } + @Override + public boolean canOptimizeCount() { + return true; + } + + @Override + public int getNumMatchingDocs() { + int count = 0; + boolean exclusive = _predicateEvaluator.isExclusive(); + if (_predicateEvaluator instanceof SortedDictionaryBasedRangePredicateEvaluator) { + // For RANGE predicate, use start/end document id to construct a new document id range + SortedDictionaryBasedRangePredicateEvaluator rangePredicateEvaluator = + (SortedDictionaryBasedRangePredicateEvaluator) _predicateEvaluator; + int startDocId = _sortedIndexReader.getDocIds(rangePredicateEvaluator.getStartDictId()).getLeft(); + // NOTE: End dictionary id is exclusive in OfflineDictionaryBasedRangePredicateEvaluator. + int endDocId = _sortedIndexReader.getDocIds(rangePredicateEvaluator.getEndDictId() - 1).getRight(); + count = endDocId - startDocId + 1; + } else { + int[] dictIds = + exclusive ? _predicateEvaluator.getNonMatchingDictIds() : _predicateEvaluator.getMatchingDictIds(); + int numDictIds = dictIds.length; + // NOTE: PredicateEvaluator without matching/non-matching dictionary ids should not reach here. + Preconditions.checkState(numDictIds > 0); + if (numDictIds == 1) { + IntPair docIdRange = _sortedIndexReader.getDocIds(dictIds[0]); + count = docIdRange.getRight() - docIdRange.getLeft() + 1; + } else { + IntPair lastDocIdRange = _sortedIndexReader.getDocIds(dictIds[0]); + for (int i = 1; i < numDictIds; i++) { + IntPair docIdRange = _sortedIndexReader.getDocIds(dictIds[i]); + if (docIdRange.getLeft() == lastDocIdRange.getRight() + 1) { + lastDocIdRange.setRight(docIdRange.getRight()); + } else { + count += lastDocIdRange.getRight() - lastDocIdRange.getLeft() + 1; + lastDocIdRange = docIdRange; + } + } + count += lastDocIdRange.getRight() - lastDocIdRange.getLeft() + 1; + } + } + return exclusive ? _numDocs - count : count; + } + + @Override + public boolean canProduceBitmaps() { + return true; + } + + @Override + public BitmapCollection getBitmaps() { + MutableRoaringBitmap bitmap = new MutableRoaringBitmap(); + boolean exclusive = _predicateEvaluator.isExclusive(); + if (_predicateEvaluator instanceof SortedDictionaryBasedRangePredicateEvaluator) { + // For RANGE predicate, use start/end document id to construct a new document id range + SortedDictionaryBasedRangePredicateEvaluator rangePredicateEvaluator = + (SortedDictionaryBasedRangePredicateEvaluator) _predicateEvaluator; + int startDocId = _sortedIndexReader.getDocIds(rangePredicateEvaluator.getStartDictId()).getLeft(); + // NOTE: End dictionary id is exclusive in OfflineDictionaryBasedRangePredicateEvaluator. + int endDocId = _sortedIndexReader.getDocIds(rangePredicateEvaluator.getEndDictId() - 1).getRight(); + bitmap.add(startDocId, endDocId + 1L); + } else { + int[] dictIds = + exclusive ? _predicateEvaluator.getNonMatchingDictIds() : _predicateEvaluator.getMatchingDictIds(); + int numDictIds = dictIds.length; + // NOTE: PredicateEvaluator without matching/non-matching dictionary ids should not reach here. + Preconditions.checkState(numDictIds > 0); Review comment: Same here ########## File path: pinot-core/src/main/java/org/apache/pinot/core/operator/filter/OrFilterOperator.java ########## @@ -60,4 +62,25 @@ public String toExplainString() { public List<Operator> getChildOperators() { return new ArrayList<>(_filterOperators); } + + @Override + public boolean canOptimizeCount() { + boolean allChildrenProduceBitmaps = true; + for (BaseFilterOperator child : _filterOperators) { + allChildrenProduceBitmaps &= child.canProduceBitmaps(); + } + return allChildrenProduceBitmaps; + } + + @Override + public int getNumMatchingDocs() { Review comment: (minor) Add an `assert _filterOperators.size() > 1` for safety and readability ########## File path: pinot-core/src/main/java/org/apache/pinot/core/operator/filter/BitmapCollection.java ########## @@ -0,0 +1,103 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.core.operator.filter; + +import org.roaringbitmap.buffer.BufferFastAggregation; +import org.roaringbitmap.buffer.ImmutableRoaringBitmap; +import org.roaringbitmap.buffer.MutableRoaringBitmap; + + +public class BitmapCollection { + private final int _numDocs; + private boolean _inverted; + private final ImmutableRoaringBitmap[] _bitmaps; + + public BitmapCollection(int numDocs, boolean inverted, ImmutableRoaringBitmap... bitmaps) { + _numDocs = numDocs; + _inverted = inverted; + _bitmaps = bitmaps; + } + + public boolean isInverted() { + return _inverted; + } + + public ImmutableRoaringBitmap[] getBitmaps() { + return _bitmaps; + } + + public BitmapCollection invert() { + _inverted = !_inverted; + return this; + } + + public int andCardinality(BitmapCollection other) { + if (!_inverted) { + if (!other._inverted) { + return ImmutableRoaringBitmap.andCardinality(reduceInternal(), other.reduceInternal()); + } + return ImmutableRoaringBitmap.andNotCardinality(reduceInternal(), other.reduceInternal()); + } else { + if (!other._inverted) { + return ImmutableRoaringBitmap.andNotCardinality(other.reduceInternal(), reduceInternal()); + } + return _numDocs - ImmutableRoaringBitmap.orCardinality(reduceInternal(), other.reduceInternal()); + } + } + + public int orCardinality(BitmapCollection other) { + if (!_inverted) { + if (!other._inverted) { + return ImmutableRoaringBitmap.orCardinality(reduceInternal(), other.reduceInternal()); + } + ImmutableRoaringBitmap reduced = other.reduceInternal(); + return _numDocs - reduced.getCardinality() - ImmutableRoaringBitmap.andCardinality(reduceInternal(), reduced); + } else { + if (!other._inverted) { + ImmutableRoaringBitmap reduced = reduceInternal(); + return _numDocs - reduced.getCardinality() + + ImmutableRoaringBitmap.andCardinality(other.reduceInternal(), reduced); + } + return _numDocs - ImmutableRoaringBitmap.andCardinality(reduceInternal(), other.reduceInternal()); + } + } + + private ImmutableRoaringBitmap reduceInternal() { Review comment: Consider specialize empty `_bitmaps` ########## File path: pinot-core/src/main/java/org/apache/pinot/core/operator/filter/SortedIndexBasedFilterOperator.java ########## @@ -132,6 +133,92 @@ protected FilterBlock getNextBlock() { } } + @Override + public boolean canOptimizeCount() { + return true; + } + + @Override + public int getNumMatchingDocs() { + int count = 0; + boolean exclusive = _predicateEvaluator.isExclusive(); + if (_predicateEvaluator instanceof SortedDictionaryBasedRangePredicateEvaluator) { + // For RANGE predicate, use start/end document id to construct a new document id range + SortedDictionaryBasedRangePredicateEvaluator rangePredicateEvaluator = + (SortedDictionaryBasedRangePredicateEvaluator) _predicateEvaluator; + int startDocId = _sortedIndexReader.getDocIds(rangePredicateEvaluator.getStartDictId()).getLeft(); + // NOTE: End dictionary id is exclusive in OfflineDictionaryBasedRangePredicateEvaluator. + int endDocId = _sortedIndexReader.getDocIds(rangePredicateEvaluator.getEndDictId() - 1).getRight(); + count = endDocId - startDocId + 1; + } else { + int[] dictIds = + exclusive ? _predicateEvaluator.getNonMatchingDictIds() : _predicateEvaluator.getMatchingDictIds(); + int numDictIds = dictIds.length; + // NOTE: PredicateEvaluator without matching/non-matching dictionary ids should not reach here. + Preconditions.checkState(numDictIds > 0); Review comment: (MAJOR) This is not true. E.g. for `REGEXP_LIKE`, it might not get any matching dictIds, but we won't scan the dictionary before-head and set `_alwaysFalse` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org