pmpailis commented on PR #14922: URL: https://github.com/apache/lucene/pull/14922#issuecomment-3067418296
> So we probably need to fix DENSE#intoBitSetWithinBlock to adjust the range of bits to copy based on the destination bit set, and then ensure that the source bit set has no set bit between the last OR'ed bit and upTo. So that would translate to properly setting `upTo` as in `BitSetIterator`, and also throw an exception if we (i) didn't exhaust destination bit set, and (ii) there are still any bits set in the `(destTo, upTo]` range in the source bitset? Would something like the following (conceptually) capture what we need? ```diff diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/IndexedDISI.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/IndexedDISI.java index 1d64be1a92..78604322ff 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/IndexedDISI.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/IndexedDISI.java @@ -473,7 +473,7 @@ public final class IndexedDISI extends AbstractDocIdSetIterator { @Override public void intoBitSet(int upTo, FixedBitSet bitSet, int offset) throws IOException { - assert doc >= offset; + assert doc >= offset : "offset=" + offset + " doc=" + doc; while (doc < upTo && method.intoBitSetWithinBlock(this, upTo, bitSet, offset) == false) { readBlockHeader(); boolean found = method.advanceWithinBlock(this, block); @@ -719,10 +719,10 @@ public final class IndexedDISI extends AbstractDocIdSetIterator { if (disi.bitSet == null) { disi.bitSet = new FixedBitSet(BLOCK_SIZE); } - - int sourceFrom = disi.doc & 0xFFFF; - int sourceTo = Math.min(upTo - disi.block, BLOCK_SIZE); int destFrom = disi.doc - offset; + int destTo = Math.min(upTo, offset + bitSet.length()); + int sourceFrom = disi.doc & 0xFFFF; + int sourceTo = Math.min(destTo - disi.block, BLOCK_SIZE); long fp = disi.slice.getFilePointer(); disi.slice.seek(fp - Long.BYTES); // seek back a long to include current word (disi.word). @@ -730,14 +730,32 @@ public final class IndexedDISI extends AbstractDocIdSetIterator { disi.slice.readLongs(disi.bitSet.getBits(), disi.wordIndex, numWords); FixedBitSet.orRange(disi.bitSet, sourceFrom, bitSet, destFrom, sourceTo - sourceFrom); + int nextSourceBit = sourceTo + 1; + int sourceUpTo = Math.min(disi.bitSet.length(), upTo); + if(destTo < upTo && nextSourceBit < sourceUpTo && disi.bitSet.nextSetBit(nextSourceBit, sourceUpTo) != NO_MORE_DOCS) { + throw new IllegalStateException("There are bits set in the source bitset that are not accounted for." + + " sourceFrom=" + + sourceFrom + + ", sourceTo=" + + sourceTo + + ", destFrom=" + + destFrom + + ", destTo=" + + destTo + + ", offset=" + + offset + + ", disi.bitSet.nextSetBit()=" + + disi.bitSet.nextSetBit(nextSourceBit, Math.min(disi.bitSet.length(), upTo))); + } + int blockEnd = disi.block | 0xFFFF; - if (upTo > blockEnd) { + if (destTo > blockEnd) { disi.slice.seek(disi.blockEnd); disi.index += disi.bitSet.cardinality(sourceFrom, sourceTo); return false; } else { disi.slice.seek(fp); - return advanceWithinBlock(disi, upTo); + return advanceWithinBlock(disi, destTo); } } ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org