pmpailis commented on PR #14922:
URL: https://github.com/apache/lucene/pull/14922#issuecomment-3067418296

   > So we probably need to fix DENSE#intoBitSetWithinBlock to adjust the range 
of bits to copy based on the destination bit set, and then ensure that the 
source bit set has no set bit between the last OR'ed bit and upTo.
   
   So that would translate to properly setting `upTo` as in `BitSetIterator`, 
and also throw an exception if we (i) didn't exhaust destination bit set, and 
(ii) there are still any bits set in the `(destTo, upTo]` range in the source 
bitset? 
   
   Would something like the following (conceptually) capture what we need? 
   ```diff
   diff --git 
a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/IndexedDISI.java 
b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/IndexedDISI.java
   index 1d64be1a92..78604322ff 100644
   --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/IndexedDISI.java
   +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/IndexedDISI.java
   @@ -473,7 +473,7 @@ public final class IndexedDISI extends 
AbstractDocIdSetIterator {
    
      @Override
      public void intoBitSet(int upTo, FixedBitSet bitSet, int offset) throws 
IOException {
   -    assert doc >= offset;
   +    assert doc >= offset : "offset=" + offset + " doc=" + doc;
        while (doc < upTo && method.intoBitSetWithinBlock(this, upTo, bitSet, 
offset) == false) {
          readBlockHeader();
          boolean found = method.advanceWithinBlock(this, block);
   @@ -719,10 +719,10 @@ public final class IndexedDISI extends 
AbstractDocIdSetIterator {
            if (disi.bitSet == null) {
              disi.bitSet = new FixedBitSet(BLOCK_SIZE);
            }
   -
   -        int sourceFrom = disi.doc & 0xFFFF;
   -        int sourceTo = Math.min(upTo - disi.block, BLOCK_SIZE);
            int destFrom = disi.doc - offset;
   +        int destTo =  Math.min(upTo, offset + bitSet.length());
   +        int sourceFrom = disi.doc & 0xFFFF;
   +        int sourceTo = Math.min(destTo - disi.block, BLOCK_SIZE);
    
            long fp = disi.slice.getFilePointer();
            disi.slice.seek(fp - Long.BYTES); // seek back a long to include 
current word (disi.word).
   @@ -730,14 +730,32 @@ public final class IndexedDISI extends 
AbstractDocIdSetIterator {
            disi.slice.readLongs(disi.bitSet.getBits(), disi.wordIndex, 
numWords);
            FixedBitSet.orRange(disi.bitSet, sourceFrom, bitSet, destFrom, 
sourceTo - sourceFrom);
    
   +        int nextSourceBit = sourceTo + 1;
   +        int sourceUpTo = Math.min(disi.bitSet.length(), upTo);
   +        if(destTo < upTo && nextSourceBit < sourceUpTo && 
disi.bitSet.nextSetBit(nextSourceBit, sourceUpTo) != NO_MORE_DOCS) {
   +          throw new IllegalStateException("There are bits set in the source 
bitset that are not accounted for."
   +              + " sourceFrom="
   +              + sourceFrom
   +              + ", sourceTo="
   +              + sourceTo
   +              + ", destFrom="
   +              + destFrom
   +              + ", destTo="
   +              + destTo
   +              + ", offset="
   +              + offset
   +              + ", disi.bitSet.nextSetBit()="
   +              + disi.bitSet.nextSetBit(nextSourceBit, 
Math.min(disi.bitSet.length(), upTo)));
   +        }
   +
            int blockEnd = disi.block | 0xFFFF;
   -        if (upTo > blockEnd) {
   +        if (destTo > blockEnd) {
              disi.slice.seek(disi.blockEnd);
              disi.index += disi.bitSet.cardinality(sourceFrom, sourceTo);
              return false;
            } else {
              disi.slice.seek(fp);
   -          return advanceWithinBlock(disi, upTo);
   +          return advanceWithinBlock(disi, destTo);
            }
          }
   ``` 


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org
For additional commands, e-mail: issues-h...@lucene.apache.org

Reply via email to