muse-dev[bot] commented on a change in pull request #2022:
URL: https://github.com/apache/lucene-solr/pull/2022#discussion_r511202225



##########
File path: 
lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90VectorReader.java
##########
@@ -165,42 +191,88 @@ public VectorValues getVectorValues(String field) throws 
IOException {
     return new OffHeapVectorValues(fieldEntry, bytesSlice);
   }
 
+  // exposed for testing
+  public KnnGraphValues getGraphValues(String field) throws IOException {
+    FieldInfo info = fieldInfos.fieldInfo(field);
+    if (info == null) {
+      throw new IllegalArgumentException("No such field '" + field + "'");
+    }
+    FieldEntry entry = fields.get(field);
+    if (entry != null && entry.indexDataLength > 0) {
+      return getGraphValues(entry);
+    } else {
+      return KnnGraphValues.EMPTY;
+    }
+  }
+
+  private KnnGraphValues getGraphValues(FieldEntry entry) throws IOException {
+    if (isHnswStrategy(entry.searchStrategy)) {
+      HnswGraphFieldEntry graphEntry = (HnswGraphFieldEntry) entry;
+      IndexInput bytesSlice = vectorIndex.slice("graph-data", 
entry.indexDataOffset, entry.indexDataLength);
+      return new IndexedKnnGraphReader(graphEntry, bytesSlice);
+    } else {
+      return KnnGraphValues.EMPTY;
+    }
+  }
+
   @Override
   public void close() throws IOException {
-    vectorData.close();
+    IOUtils.close(vectorData, vectorIndex);
   }
 
   private static class FieldEntry {
 
     final int dimension;
     final VectorValues.SearchStrategy searchStrategy;
-    final int maxDoc;
 
     final long vectorDataOffset;
     final long vectorDataLength;
+    final long indexDataOffset;
+    final long indexDataLength;
     final int[] ordToDoc;
 
-    FieldEntry(int dimension, VectorValues.SearchStrategy searchStrategy, int 
maxDoc,
-               long vectorDataOffset, long vectorDataLength, int[] ordToDoc) {
-      this.dimension = dimension;
+    FieldEntry(DataInput input, VectorValues.SearchStrategy searchStrategy) 
throws IOException {
       this.searchStrategy = searchStrategy;
-      this.maxDoc = maxDoc;
-      this.vectorDataOffset = vectorDataOffset;
-      this.vectorDataLength = vectorDataLength;
-      this.ordToDoc = ordToDoc;
+      vectorDataOffset = input.readVLong();
+      vectorDataLength = input.readVLong();
+      indexDataOffset = input.readVLong();
+      indexDataLength = input.readVLong();
+      dimension = input.readInt();
+      int size = input.readInt();
+      ordToDoc = new int[size];
+      for (int i = 0; i < size; i++) {
+        int doc = input.readVInt();
+        ordToDoc[i] = doc;
+      }
     }
 
     int size() {
       return ordToDoc.length;
     }
   }
 
+  private static class HnswGraphFieldEntry extends FieldEntry {
+
+    final long[] ordOffsets;
+
+    HnswGraphFieldEntry(DataInput input, VectorValues.SearchStrategy 
searchStrategy) throws IOException {
+      super(input, searchStrategy);
+      ordOffsets = new long[size()];
+      long offset = 0;
+      for (int i = 0; i < ordOffsets.length; i++) {
+        offset += input.readVLong();
+        ordOffsets[i] = offset;
+      }
+    }
+  }
+
   /** Read the vector values from the index input. This supports both iterated 
and random access. */
-  private final static class OffHeapVectorValues extends VectorValues {
+  private final class OffHeapVectorValues extends VectorValues {
 
     final FieldEntry fieldEntry;
     final IndexInput dataIn;
 
+    final Random random = new Random();

Review comment:
       *PREDICTABLE_RANDOM:*  This random generator (java.util.Random) is 
predictable 
[(details)](https://find-sec-bugs.github.io/bugs.htm#PREDICTABLE_RANDOM)

##########
File path: 
lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphBuilder.java
##########
@@ -0,0 +1,186 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.util.hnsw;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.lucene.index.KnnGraphValues;
+import org.apache.lucene.index.VectorValues;
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * Builder for HNSW graph. See {@link HnswGraph} for a gloss on the algorithm 
and the meaning of the hyperparameters.
+ */
+public final class HnswGraphBuilder {
+
+  // default random seed for level generation
+  private static final long DEFAULT_RAND_SEED = System.currentTimeMillis();
+
+  // expose for testing.
+  public static long randSeed = DEFAULT_RAND_SEED;
+
+  // These "default" hyperparameter settings are exposed (and nonfinal) to 
enable performance testing
+  // since the indexing API doesn't provide any control over them.
+
+  // default max connections per node
+  static int DEFAULT_MAX_CONN = 16;
+
+  // default candidate list size
+  static int DEFAULT_BEAM_WIDTH = 16;
+
+  private final int maxConn;
+  private final int beamWidth;
+  private final BoundedVectorValues boundedVectors;
+  private final VectorValues.SearchStrategy searchStrategy;
+  private final HnswGraph hnsw;
+  private final Random random;
+
+  /**
+   * Reads all the vectors from a VectorValues, builds a graph connecting them 
by their dense ordinals, using default
+   * hyperparameter settings, and returns the resulting graph.
+   * @param vectorValues the vectors whose relations are represented by the 
graph
+   */
+  public static HnswGraph build(VectorValues vectorValues) throws IOException {
+    HnswGraphBuilder builder = new 
HnswGraphBuilder(vectorValues.randomAccess());
+    return builder.build(vectorValues.randomAccess());
+  }
+
+  /**
+   * Reads all the vectors from a VectorValues, builds a graph connecting them 
by their dense ordinals, using the given
+   * hyperparameter settings, and returns the resulting graph.
+   * @param vectorValues the vectors whose relations are represented by the 
graph
+   * @param maxConn the number of connections to make when adding a new graph 
node; roughly speaking the graph fanout.
+   * @param beamWidth the size of the beam search to use when finding nearest 
neighbors.
+   * @param seed the seed for a random number generator used during graph 
construction. Provide this to ensure repeatable construction.
+   */
+  public static HnswGraph build(VectorValues vectorValues, int maxConn, int 
beamWidth, long seed) throws IOException {
+    HnswGraphBuilder builder = new 
HnswGraphBuilder(vectorValues.randomAccess(), maxConn, beamWidth, seed);
+    return builder.build(vectorValues.randomAccess());
+  }
+
+  /**
+   * Reads all the vectors from two copies of a random access VectorValues. 
Providing two copies enables efficient retrieval
+   * without extra data copying, while avoiding collision of the returned 
values.
+   * @param vectors the vectors for which to build a nearest neighbors graph. 
Must be an independet accessor for the vectors
+   */
+  private HnswGraph build(VectorValues.RandomAccess vectors) throws 
IOException {
+    for (int node = 1; node < vectors.size(); node++) {
+      insert(vectors.vectorValue(node));
+    }
+    return hnsw;
+  }
+
+  /** Construct the builder with default configurations */
+  private HnswGraphBuilder(VectorValues.RandomAccess vectors) {
+    this(vectors, DEFAULT_MAX_CONN, DEFAULT_BEAM_WIDTH, randSeed);
+  }
+
+  /** Full constructor */
+  private HnswGraphBuilder(VectorValues.RandomAccess vectors, int maxConn, int 
beamWidth, long seed) {
+    searchStrategy = vectors.searchStrategy();
+    if (searchStrategy == VectorValues.SearchStrategy.NONE) {
+      throw new IllegalStateException("No distance function");
+    }
+    this.maxConn = maxConn;
+    this.beamWidth = beamWidth;
+    boundedVectors = new BoundedVectorValues(vectors);
+    this.hnsw = new HnswGraph();
+    random = new Random(seed);

Review comment:
       *PREDICTABLE_RANDOM:*  This random generator (java.util.Random) is 
predictable 
[(details)](https://find-sec-bugs.github.io/bugs.htm#PREDICTABLE_RANDOM)




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org
For additional commands, e-mail: issues-h...@lucene.apache.org

Reply via email to