Jackie-Jiang commented on code in PR #16789:
URL: https://github.com/apache/pinot/pull/16789#discussion_r2356373565


##########
pinot-core/src/main/java/org/apache/pinot/core/operator/AscDocIdSetOperator.java:
##########
@@ -32,11 +32,12 @@
 
 
 /**
- * The <code>DocIdSetOperator</code> takes a filter operator and returns 
blocks with set of the matched document Ids.
+ * The <code>AscendingDocIdSetOperator</code> takes a filter operator and 
returns blocks with set of the matched
+ * document Ids.
  * <p>Should call {@link #nextBlock()} multiple times until it returns 
<code>null</code> (already exhausts all the
  * matched documents) or already gathered enough documents (for selection 
queries).
  */
-public class DocIdSetOperator extends BaseOperator<DocIdSetBlock> {
+public class AscDocIdSetOperator extends BaseDocIdSetOperator {

Review Comment:
   (minor) Suggest keeping the name for this class as is, and add a 
`ReverseOrderDocIdSetOperator`



##########
pinot-core/src/main/java/org/apache/pinot/core/operator/DescDocIdSetOperator.java:
##########
@@ -0,0 +1,133 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.operator;
+
+import com.google.common.base.Preconditions;
+import java.util.Collections;
+import java.util.List;
+import org.apache.pinot.core.common.BlockDocIdIterator;
+import org.apache.pinot.core.common.BlockDocIdSet;
+import org.apache.pinot.core.common.Operator;
+import org.apache.pinot.core.operator.blocks.DocIdSetBlock;
+import org.apache.pinot.core.operator.dociditerators.BitmapBasedDocIdIterator;
+import org.apache.pinot.core.operator.filter.BaseFilterOperator;
+import org.apache.pinot.core.plan.DocIdSetPlanNode;
+import org.apache.pinot.segment.spi.Constants;
+import org.apache.pinot.spi.trace.Tracing;
+import org.roaringbitmap.IntIterator;
+import org.roaringbitmap.RoaringBitmap;
+import org.roaringbitmap.RoaringBitmapWriter;
+
+
+public class DescDocIdSetOperator extends BaseDocIdSetOperator {
+  private static final String EXPLAIN_NAME = "DOC_ID_SET";
+
+  private static final ThreadLocal<int[]> THREAD_LOCAL_DOC_IDS =
+      ThreadLocal.withInitial(() -> new 
int[DocIdSetPlanNode.MAX_DOC_PER_CALL]);
+
+  private final BaseFilterOperator _filterOperator;
+  private final int _maxSizeOfDocIdSet;
+
+  private BlockDocIdSet _blockDocIdSet;
+  private int _currentDocId = 0;
+  private IntIterator _reverseIterator;
+
+  public DescDocIdSetOperator(BaseFilterOperator filterOperator, int 
maxSizeOfDocIdSet) {
+    Preconditions.checkArgument(maxSizeOfDocIdSet > 0 && maxSizeOfDocIdSet <= 
DocIdSetPlanNode.MAX_DOC_PER_CALL);
+    _filterOperator = filterOperator;
+    _maxSizeOfDocIdSet = maxSizeOfDocIdSet;
+  }
+
+  @Override
+  protected DocIdSetBlock getNextBlock() {
+    if (_reverseIterator == null) {
+      initializeBitmap();
+    }
+
+    if (_currentDocId == Constants.EOF) {
+      return null;
+    }
+
+    Tracing.ThreadAccountantOps.sample();
+
+    int pos = 0;
+    int[] docIds = THREAD_LOCAL_DOC_IDS.get();
+    for (int i = 0; i < _maxSizeOfDocIdSet && _reverseIterator.hasNext(); i++) 
{
+      _currentDocId = _reverseIterator.next();
+      docIds[pos++] = _currentDocId;
+    }
+    if (pos > 0) {
+      return new DocIdSetBlock(docIds, pos);
+    } else {
+      return null;
+    }
+  }
+
+  private void initializeBitmap() {
+    _blockDocIdSet = _filterOperator.nextBlock().getBlockDocIdSet();
+    BlockDocIdIterator iterator = _blockDocIdSet.iterator();
+    if (iterator instanceof BitmapBasedDocIdIterator) {
+      _reverseIterator = ((BitmapBasedDocIdIterator) 
iterator).getDocIds().getReverseIntIterator();
+    } else {
+      RoaringBitmapWriter<RoaringBitmap> writer = 
RoaringBitmapWriter.writer().get();
+      int docId = iterator.next();
+      while (docId != Constants.EOF) {
+        writer.add(docId);
+        docId = iterator.next();
+      }
+      _reverseIterator = writer.get().getReverseIntIterator();
+    }
+  }
+
+  @Override
+  public String toExplainString() {
+    return EXPLAIN_NAME;
+  }
+
+  @Override
+  public List<Operator> getChildOperators() {
+    return Collections.singletonList(_filterOperator);

Review Comment:
   (nit) Use `List.of()`



##########
pinot-core/src/main/java/org/apache/pinot/core/operator/SegmentBlockOperator.java:
##########
@@ -0,0 +1,42 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.operator;
+
+import org.apache.pinot.core.common.Block;
+import org.apache.pinot.core.common.Operator;
+
+
+/// An operator that is bound to a specific segment.

Review Comment:
   There are a lot of operators bound to a segment (e.g. select, aggregate, 
distinct etc.).
   I believe the intention here is to add an interface for operators that can 
iterate in reverse order, so maybe name it `ReversibleDocScanOperator`.
   Given there is only normal order and reverse order, we may also simplify the 
method to be `boolean isReverseOrder()` and add a method 
`ReversibleDocScanOperator toReverseOrder()`. I feel adding a `DidOrder` enum 
is a little bit over-killing



##########
pinot-core/src/main/java/org/apache/pinot/core/plan/SelectionPlanNode.java:
##########
@@ -120,6 +122,34 @@ public Operator<SelectionResultsBlock> run() {
     return new SelectionOrderByOperator(_indexSegment, _queryContext, 
expressions, projectOperator);
   }
 
+  private BaseProjectOperator<?> getSortedByProject(List<ExpressionContext> 
expressions, int maxDocsPerCall,
+      List<OrderByExpressionContext> orderByExpressions) {
+    BaseProjectOperator<?> projectOperator =
+        new ProjectPlanNode(_segmentContext, _queryContext, expressions, 
maxDocsPerCall).run();
+
+    boolean asc = orderByExpressions.get(0).isAsc();
+    if (!asc
+        && reverseOptimizationEnabled(_queryContext)
+        && 
!projectOperator.isCompatibleWith(SegmentBlockOperator.DidOrder.DESC)) {
+      try {
+        return projectOperator.withOrder(SegmentBlockOperator.DidOrder.DESC);
+      } catch (IllegalArgumentException | UnsupportedOperationException e) {
+        // This happens when the operator cannot provide the required order 
between blocks
+        // Fallback to SelectionOrderByOperator
+        return projectOperator;
+      }
+    }
+    return projectOperator;
+  }
+
+  private boolean reverseOptimizationEnabled(QueryContext queryContext) {

Review Comment:
   (minor) Consider moving this to `QueryOptionsUtils` for central management 
and reuse in the future



##########
pinot-core/src/main/java/org/apache/pinot/core/operator/BitmapDocIdSetOperator.java:
##########
@@ -32,51 +33,50 @@
  * <p>Should call {@link #nextBlock()} multiple times until it returns 
<code>null</code> (already exhausts all the
  * documents) or already gathered enough documents (for selection queries).
  */
-public class BitmapDocIdSetOperator extends BaseOperator<DocIdSetBlock> {
+public class BitmapDocIdSetOperator extends BaseDocIdSetOperator {
 
   private static final String EXPLAIN_NAME = "DOC_ID_SET_BITMAP";
-
-  // TODO: Consider using BatchIterator to fill the document ids. Currently 
BatchIterator only reads bits for one
-  //       container instead of trying to fill up the buffer with bits from 
multiple containers. If in the future
-  //       BatchIterator provides an API to fill up the buffer, switch to 
BatchIterator.
-  private final IntIterator _intIterator;
+  @Nullable
+  private IntIteratorDocIdSetOperator _docIdIteratorOperator = null;
   private final int[] _docIdBuffer;
+  private final ImmutableBitmapDataProvider _docIds;
+  private final DidOrder _didOrder;
 
-  public BitmapDocIdSetOperator(ImmutableBitmapDataProvider bitmap) {
-    _intIterator = bitmap.getIntIterator();
-    _docIdBuffer = new int[DocIdSetPlanNode.MAX_DOC_PER_CALL];
+  public BitmapDocIdSetOperator(ImmutableBitmapDataProvider docIds, int[] 
docIdBuffer, DidOrder didOrder) {
+    _docIds = docIds;
+    _docIdBuffer = docIdBuffer;
+    _didOrder = didOrder;
   }
 
-  public BitmapDocIdSetOperator(ImmutableBitmapDataProvider bitmap, int 
numDocs) {
-    _intIterator = bitmap.getIntIterator();
-    _docIdBuffer = new int[Math.min(numDocs, 
DocIdSetPlanNode.MAX_DOC_PER_CALL)];
+  public static BitmapDocIdSetOperator ascending(ImmutableBitmapDataProvider 
docIds) {
+    return ascending(docIds, new int[DocIdSetPlanNode.MAX_DOC_PER_CALL]);
   }
 
-  public BitmapDocIdSetOperator(IntIterator intIterator, int[] docIdBuffer) {
-    _intIterator = intIterator;
-    _docIdBuffer = docIdBuffer;
+  public static BitmapDocIdSetOperator ascending(ImmutableBitmapDataProvider 
docIds, int numDocs) {
+    return ascending(docIds, new int[Math.min(numDocs, 
DocIdSetPlanNode.MAX_DOC_PER_CALL)]);
   }
 
-  public BitmapDocIdSetOperator(ImmutableBitmapDataProvider bitmap, int[] 
docIdBuffer) {
-    _intIterator = bitmap.getIntIterator();
-    _docIdBuffer = docIdBuffer;
+  public static BitmapDocIdSetOperator ascending(ImmutableBitmapDataProvider 
docIds, int[] docIdBuffer) {
+    return new BitmapDocIdSetOperator(docIds, docIdBuffer, DidOrder.ASC);
+  }
+
+  public static BitmapDocIdSetOperator descending(ImmutableBitmapDataProvider 
docIds, int numDocs) {
+    return descending(docIds, new int[Math.min(numDocs, 
DocIdSetPlanNode.MAX_DOC_PER_CALL)]);
+  }
+
+  public static BitmapDocIdSetOperator descending(ImmutableBitmapDataProvider 
bitmap, int[] docIdBuffer) {
+    return new BitmapDocIdSetOperator(bitmap, docIdBuffer, DidOrder.ASC);
   }
 
   @Override
   protected DocIdSetBlock getNextBlock() {
-    int bufferSize = _docIdBuffer.length;
-    int index = 0;
-    while (index < bufferSize && _intIterator.hasNext()) {
-      _docIdBuffer[index++] = _intIterator.next();
-    }
-    if (index > 0) {
-      return new DocIdSetBlock(_docIdBuffer, index);
-    } else {
-      return null;
+    if (_docIdIteratorOperator == null) {
+      IntIterator iterator = _didOrder == DidOrder.ASC ? 
_docIds.getIntIterator() : _docIds.getReverseIntIterator();
+      _docIdIteratorOperator = new IntIteratorDocIdSetOperator(iterator, 
_docIdBuffer, _didOrder);

Review Comment:
   Do we need to introduce this new `IntIteratorDocIdSetOperator`? I feel we 
just need to check if `_intIterator` is initialized, and follow the existing 
logic



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to