jtibshirani commented on code in PR #792:
URL: https://github.com/apache/lucene/pull/792#discussion_r865291279


##########
lucene/core/src/java/org/apache/lucene/codecs/lucene91/Lucene91HnswVectorsReader.java:
##########
@@ -388,115 +400,239 @@ private static class FieldEntry {
     int size() {
       return size;
     }
-
-    int ordToDoc(int ord) {
-      return ordToDocOperator.applyAsInt(ord);
-    }
   }
 
-  /** Read the vector values from the index input. This supports both iterated 
and random access. */
-  static class OffHeapVectorValues extends VectorValues
-      implements RandomAccessVectorValues, RandomAccessVectorValuesProducer {
-
-    private final int dimension;
-    private final int size;
-    private final int[] ordToDoc;
-    private final IntUnaryOperator ordToDocOperator;
-    private final IndexInput dataIn;
-    private final BytesRef binaryValue;
-    private final ByteBuffer byteBuffer;
-    private final int byteSize;
-    private final float[] value;
+  static class DenseOffHeapVectorValues extends OffHeapVectorValues {
 
     private int ord = -1;
     private int doc = -1;
 
-    OffHeapVectorValues(int dimension, int size, int[] ordToDoc, IndexInput 
dataIn) {
-      this.dimension = dimension;
-      this.size = size;
-      this.ordToDoc = ordToDoc;
-      ordToDocOperator = ordToDoc == null ? IntUnaryOperator.identity() : 
(ord) -> ordToDoc[ord];
-      this.dataIn = dataIn;
-      byteSize = Float.BYTES * dimension;
-      byteBuffer = ByteBuffer.allocate(byteSize);
-      value = new float[dimension];
-      binaryValue = new BytesRef(byteBuffer.array(), byteBuffer.arrayOffset(), 
byteSize);
+    public DenseOffHeapVectorValues(int dimension, int size, IndexInput slice) 
{
+      super(dimension, size, slice);
     }
 
     @Override
-    public int dimension() {
-      return dimension;
+    public float[] vectorValue() throws IOException {
+      slice.seek((long) ord * byteSize);
+      slice.readFloats(value, 0, value.length);
+      return value;
     }
 
     @Override
-    public int size() {
-      return size;
+    public BytesRef binaryValue() throws IOException {
+      slice.seek((long) ord * byteSize);
+      slice.readBytes(byteBuffer.array(), byteBuffer.arrayOffset(), byteSize, 
false);
+      return binaryValue;
+    }
+
+    @Override
+    public int docID() {
+      return doc;
+    }
+
+    @Override
+    public int nextDoc() throws IOException {
+      return advance(doc + 1);
+    }
+
+    @Override
+    public int advance(int target) throws IOException {
+      assert docID() < target;
+      ord = target;
+      if (target >= size) {
+        return doc = NO_MORE_DOCS;
+      }
+      return doc = target;
+    }
+
+    @Override
+    public RandomAccessVectorValues randomAccess() throws IOException {
+      return new DenseOffHeapVectorValues(dimension, size, slice.clone());
+    }
+
+    @Override
+    public int ordToDoc(int ord) {
+      return ord;
+    }
+  }
+
+  static class SparseOffHeapVectorValues extends OffHeapVectorValues {

Review Comment:
   It'd be good to make all these subclasses private.



##########
lucene/core/src/java/org/apache/lucene/codecs/lucene91/Lucene91HnswVectorsReader.java:
##########
@@ -388,115 +400,239 @@ private static class FieldEntry {
     int size() {
       return size;
     }
-
-    int ordToDoc(int ord) {
-      return ordToDocOperator.applyAsInt(ord);
-    }
   }
 
-  /** Read the vector values from the index input. This supports both iterated 
and random access. */
-  static class OffHeapVectorValues extends VectorValues
-      implements RandomAccessVectorValues, RandomAccessVectorValuesProducer {
-
-    private final int dimension;
-    private final int size;
-    private final int[] ordToDoc;
-    private final IntUnaryOperator ordToDocOperator;
-    private final IndexInput dataIn;
-    private final BytesRef binaryValue;
-    private final ByteBuffer byteBuffer;
-    private final int byteSize;
-    private final float[] value;
+  static class DenseOffHeapVectorValues extends OffHeapVectorValues {
 
     private int ord = -1;
     private int doc = -1;
 
-    OffHeapVectorValues(int dimension, int size, int[] ordToDoc, IndexInput 
dataIn) {
-      this.dimension = dimension;
-      this.size = size;
-      this.ordToDoc = ordToDoc;
-      ordToDocOperator = ordToDoc == null ? IntUnaryOperator.identity() : 
(ord) -> ordToDoc[ord];
-      this.dataIn = dataIn;
-      byteSize = Float.BYTES * dimension;
-      byteBuffer = ByteBuffer.allocate(byteSize);
-      value = new float[dimension];
-      binaryValue = new BytesRef(byteBuffer.array(), byteBuffer.arrayOffset(), 
byteSize);
+    public DenseOffHeapVectorValues(int dimension, int size, IndexInput slice) 
{

Review Comment:
   I think we can remove the `ord` variable now, since in the dense case `doc` 
and `ord` are always the same.



##########
lucene/core/src/java/org/apache/lucene/codecs/lucene91/Lucene91HnswVectorsReader.java:
##########
@@ -258,14 +257,20 @@ public TopDocs search(String field, float[] target, int 
k, Bits acceptDocs, int
   }
 
   private OffHeapVectorValues getOffHeapVectorValues(FieldEntry fieldEntry) 
throws IOException {
+    if (fieldEntry.docsWithFieldOffset == -2) {

Review Comment:
   Some more suggestions to make the code cleaner:
   * We could move this method to the `OffHeapVectorValues` class as a static 
constructor. It would be like `static OffHeapVectorValues load(FieldEntry 
fieldEntry, IndexInput vectorData) { ... }`.
   * We can move `OffHeapVectorValues` to its own class now that it has grown. 
It would still be package-private.
   * We could move the `getAcceptOrds` method to `OffHeapVectorValues`. This 
lets us remove the check `if (vectorValues instanceOf DenseOffHeapVectorValues) 
{ .. }`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org
For additional commands, e-mail: issues-h...@lucene.apache.org

Reply via email to