benwtrent commented on code in PR #13872: URL: https://github.com/apache/lucene/pull/13872#discussion_r1792308666
########## lucene/core/src/java/org/apache/lucene/codecs/lucene99/OffHeapQuantizedByteVectorValues.java: ########## @@ -127,31 +121,42 @@ public int size() { } @Override - public byte[] vectorValue(int targetOrd) throws IOException { - if (lastOrd == targetOrd) { - return binaryValue; - } - slice.seek((long) targetOrd * byteSize); - slice.readBytes(byteBuffer.array(), byteBuffer.arrayOffset(), numBytes); - slice.readFloats(scoreCorrectionConstant, 0, 1); - decompressBytes(binaryValue, numBytes); - lastOrd = targetOrd; - return binaryValue; - } + public QuantizedBytes vectors() throws IOException { + return new QuantizedBytes() { + ByteBuffer byteBuffer = ByteBuffer.allocate(dimension); + byte[] binaryValue = byteBuffer.array(); + IndexInput input = slice.clone(); + float[] scoreCorrectionConstant = new float[1]; Review Comment: all these should be private & final. There are other instances where you do something similar, let's make things final that can be and private things can should be. ########## lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene94/OffHeapByteVectorValues.java: ########## @@ -68,17 +63,28 @@ public int size() { } @Override - public byte[] vectorValue(int targetOrd) throws IOException { - if (lastOrd != targetOrd) { - readValue(targetOrd); - lastOrd = targetOrd; - } - return binaryValue; - } + public Bytes vectors() throws IOException { + return new Bytes() { + IndexInput input = slice.clone(); + ByteBuffer byteBuffer = ByteBuffer.allocate(byteSize); + ; Review Comment: ```suggestion ``` ########## lucene/core/src/java/org/apache/lucene/index/ExitableDirectoryReader.java: ########## @@ -489,8 +490,15 @@ public int size() { } @Override - public byte[] vectorValue(int ord) throws IOException { - return vectorValues.vectorValue(ord); + public Bytes vectors() throws IOException { + return new Bytes() { + Bytes vectors = vectorValues.vectors(); + + @Override + public byte[] get(int ord) throws IOException { + return vectors.get(ord); + } + }; Review Comment: ```suggestion return vectorValues.vectors(); ``` ########## lucene/core/src/java/org/apache/lucene/codecs/hnsw/DefaultFlatVectorScorer.java: ########## @@ -88,34 +88,28 @@ public String toString() { /** RandomVectorScorerSupplier for bytes vector */ private static final class ByteScoringSupplier implements RandomVectorScorerSupplier { - private final ByteVectorValues vectors; - private final ByteVectorValues vectors1; - private final ByteVectorValues vectors2; + private final ByteVectorValues vectorValues; private final VectorSimilarityFunction similarityFunction; private ByteScoringSupplier( - ByteVectorValues vectors, VectorSimilarityFunction similarityFunction) throws IOException { - this.vectors = vectors; - vectors1 = vectors.copy(); - vectors2 = vectors.copy(); + ByteVectorValues vectorValues, VectorSimilarityFunction similarityFunction) + throws IOException { + this.vectorValues = vectorValues; this.similarityFunction = similarityFunction; } @Override - public RandomVectorScorer scorer(int ord) { - return new RandomVectorScorer.AbstractRandomVectorScorer(vectors) { + public RandomVectorScorer scorer(int ord) throws IOException { + ByteVectorValues.Bytes vectors1 = vectorValues.vectors(); + ByteVectorValues.Bytes vectors2 = vectorValues.vectors(); + return new RandomVectorScorer.AbstractRandomVectorScorer(vectorValues) { Review Comment: I would expect this to create way more garbage during HNSW graph building. The `RandomVectorScorerSupplier` is passed around to the diverse checking, which will now, on each scorer that is created (which will likely be many of them for every node we add), we allocate new scratch space. Before, we had a single set of scratch space created just in the `RandomVectorScorerSupplier`. I worry this will have a measurable performance impact and hurt heap usage. ########## lucene/core/src/java/org/apache/lucene/index/ExitableDirectoryReader.java: ########## @@ -441,8 +441,14 @@ public int dimension() { } @Override - public float[] vectorValue(int ord) throws IOException { - return vectorValues.vectorValue(ord); + public Floats vectors() throws IOException { + Floats vectors = vectorValues.vectors(); + return new Floats() { + @Override + public float[] get(int ord) throws IOException { + return vectors.get(ord); + } + }; Review Comment: ```suggestion public Floats vectors() throws IOException { return vectorValues.vectors(); } ``` ########## lucene/core/src/java21/org/apache/lucene/internal/vectorization/Lucene99MemorySegmentByteVectorScorerSupplier.java: ########## @@ -112,20 +96,20 @@ static final class CosineSupplier extends Lucene99MemorySegmentByteVectorScorerS @Override public RandomVectorScorer scorer(int ord) { checkOrdinal(ord); + MemorySegmentAccessInput slice = input.clone(); + byte[] scratch1 = new byte[vectorByteSize]; + byte[] scratch2 = new byte[vectorByteSize]; Review Comment: now we allocate scratch even if we don't need it, maybe this isn't that big of a deal? Same goes for all the other memsegment scorers, we don't really need the scratch unless a memory segment isn't available. ########## lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsReader.java: ########## @@ -421,27 +421,28 @@ public int size() { } @Override - public float[] vectorValue(int ord) throws IOException { - return rawVectorValues.vectorValue(ord); + public Floats vectors() throws IOException { + Floats rawVectors = rawVectorValues.vectors(); + return new Floats() { + @Override + public float[] get(int ord) throws IOException { + return rawVectors.get(ord); + } + }; Review Comment: ```suggestion return rawVectorValues.vectors(); ``` ########## lucene/core/src/java/org/apache/lucene/index/SortingCodecReader.java: ########## @@ -319,9 +319,15 @@ private static class SortingFloatVectorValues extends FloatVectorValues { } @Override - public float[] vectorValue(int ord) throws IOException { - // ords are interpreted in the delegate's ord-space. - return delegate.vectorValue(ord); + public Floats vectors() throws IOException { + Floats delegateVectors = delegate.vectors(); + return new Floats() { + @Override + public float[] get(int ord) throws IOException { + // ords are interpreted in the delegate's ord-space. + return delegateVectors.get(ord); + } + }; Review Comment: ```suggestion // ords are interpreted in the delegate's ord-space. return delegate.vectors(); ``` ########## lucene/core/src/java/org/apache/lucene/index/SortingCodecReader.java: ########## @@ -356,8 +357,15 @@ private static class SortingByteVectorValues extends ByteVectorValues { } @Override - public byte[] vectorValue(int ord) throws IOException { - return delegate.vectorValue(ord); + public Bytes vectors() throws IOException { + return new Bytes() { + Bytes vectors = delegate.vectors(); + + @Override + public byte[] get(int ord) throws IOException { + return vectors.get(ord); + } + }; Review Comment: ```suggestion return delegate.vectors(); ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org