stefanvodita commented on code in PR #12548:
URL: https://github.com/apache/lucene/pull/12548#discussion_r1341952008


##########
lucene/core/src/test/org/apache/lucene/search/TestVectorSimilarityValuesSource.java:
##########
@@ -0,0 +1,385 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.search;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.KnnByteVectorField;
+import org.apache.lucene.document.KnnFloatVectorField;
+import org.apache.lucene.document.SortedDocValuesField;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.VectorSimilarityFunction;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.tests.analysis.MockAnalyzer;
+import org.apache.lucene.tests.index.RandomIndexWriter;
+import org.apache.lucene.tests.util.LuceneTestCase;
+import org.apache.lucene.util.BytesRef;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+
+public class TestVectorSimilarityValuesSource extends LuceneTestCase {
+  private static Directory dir;
+  private static Analyzer analyzer;
+  private static IndexReader reader;
+  private static IndexSearcher searcher;
+
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    dir = newDirectory();
+    analyzer = new MockAnalyzer(random());
+    IndexWriterConfig iwConfig = newIndexWriterConfig(analyzer);
+    iwConfig.setMergePolicy(newLogMergePolicy());
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConfig);
+
+    Document document = new Document();
+    document.add(new StringField("id", "1", Field.Store.NO));
+    document.add(new SortedDocValuesField("id", new BytesRef("1")));
+    document.add(new KnnFloatVectorField("knnFloatField1", new float[] {1.f, 
2.f, 3.f}));
+    document.add(
+        new KnnFloatVectorField(
+            "knnFloatField2",
+            new float[] {2.2f, -3.2f, -3.1f},
+            VectorSimilarityFunction.DOT_PRODUCT));
+    document.add(
+        new KnnFloatVectorField(
+            "knnFloatField3", new float[] {4.5f, 10.3f, -7.f}, 
VectorSimilarityFunction.COSINE));
+    document.add(
+        new KnnFloatVectorField(
+            "knnFloatField4",
+            new float[] {-1.3f, 1.0f, 1.0f},
+            VectorSimilarityFunction.MAXIMUM_INNER_PRODUCT));
+    document.add(new KnnFloatVectorField("knnFloatField5", new float[] {-6.7f, 
-1.0f, -0.9f}));
+    document.add(new KnnByteVectorField("knnByteField1", new byte[] {106, 80, 
127}));
+    document.add(
+        new KnnByteVectorField(
+            "knnByteField2", new byte[] {4, 2, 3}, 
VectorSimilarityFunction.DOT_PRODUCT));
+    document.add(
+        new KnnByteVectorField(
+            "knnByteField3", new byte[] {-121, -64, -1}, 
VectorSimilarityFunction.COSINE));
+    document.add(
+        new KnnByteVectorField(
+            "knnByteField4",
+            new byte[] {-127, 127, 127},
+            VectorSimilarityFunction.MAXIMUM_INNER_PRODUCT));
+    iw.addDocument(document);
+
+    Document document2 = new Document();
+    document2.add(new StringField("id", "2", Field.Store.NO));
+    document2.add(new SortedDocValuesField("id", new BytesRef("2")));
+    document2.add(new KnnFloatVectorField("knnFloatField1", new float[] {1.f, 
2.f, 3.f}));
+    document2.add(
+        new KnnFloatVectorField(
+            "knnFloatField2",
+            new float[] {-5.2f, 8.7f, 3.1f},
+            VectorSimilarityFunction.DOT_PRODUCT));
+    document2.add(
+        new KnnFloatVectorField(
+            "knnFloatField3", new float[] {0.2f, -3.2f, 3.1f}, 
VectorSimilarityFunction.COSINE));
+    document2.add(new KnnFloatVectorField("knnFloatField5", new float[] {2.f, 
13.2f, 9.1f}));
+    document2.add(new KnnByteVectorField("knnByteField1", new byte[] {1, -2, 
-30}));
+    document2.add(
+        new KnnByteVectorField(
+            "knnByteField2", new byte[] {40, 21, 3}, 
VectorSimilarityFunction.DOT_PRODUCT));
+    document2.add(
+        new KnnByteVectorField(
+            "knnByteField3", new byte[] {9, 2, 3}, 
VectorSimilarityFunction.COSINE));
+    document2.add(
+        new KnnByteVectorField(
+            "knnByteField4",
+            new byte[] {14, 29, 31},
+            VectorSimilarityFunction.MAXIMUM_INNER_PRODUCT));
+    iw.addDocument(document2);
+
+    Document document3 = new Document();
+    document3.add(new StringField("id", "3", Field.Store.NO));
+    document3.add(new SortedDocValuesField("id", new BytesRef("3")));
+    document3.add(new KnnFloatVectorField("knnFloatField1", new float[] {1.f, 
2.f, 3.f}));
+    document3.add(
+        new KnnFloatVectorField(
+            "knnFloatField2", new float[] {-8.f, 7.f, -6.f}, 
VectorSimilarityFunction.DOT_PRODUCT));
+    document3.add(new KnnFloatVectorField("knnFloatField5", new float[] {5.2f, 
3.2f, 3.1f}));
+    document3.add(new KnnByteVectorField("knnByteField1", new byte[] {-128, 0, 
127}));
+    document3.add(
+        new KnnByteVectorField(
+            "knnByteField2", new byte[] {-1, -2, -3}, 
VectorSimilarityFunction.DOT_PRODUCT));
+    document3.add(
+        new KnnByteVectorField(
+            "knnByteField3", new byte[] {4, 2, 3}, 
VectorSimilarityFunction.COSINE));
+    document3.add(
+        new KnnByteVectorField(
+            "knnByteField4",
+            new byte[] {-4, -2, -128},
+            VectorSimilarityFunction.MAXIMUM_INNER_PRODUCT));
+    document3.add(new KnnByteVectorField("knnByteField5", new byte[] {-120, 
-2, 3}));
+    iw.addDocument(document3);
+
+    reader = iw.getReader();
+    searcher = newSearcher(reader);
+    iw.close();
+  }
+
+  @AfterClass
+  public static void afterClass() throws Exception {

Review Comment:
   You've previously committed a 
[change](https://github.com/apache/lucene/commit/368dbffef3d2d44256fe63d4f026e76e211d9eb6)
 replacing many separate `.close()` calls with `IOUtils.close()`. Why not use 
that here?



##########
lucene/core/src/java/org/apache/lucene/search/FloatVectorSimilarityValuesSource.java:
##########
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.search;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Objects;
+import org.apache.lucene.index.FloatVectorValues;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.VectorSimilarityFunction;
+
+/**
+ * A {@link DoubleValuesSource} which computes the vector similarity scores 
between the query vector
+ * and the {@link org.apache.lucene.document.KnnFloatVectorField} for 
documents.
+ */
+class FloatVectorSimilarityValuesSource extends DoubleValuesSource {

Review Comment:
   This and `BytesVectorSimilarityValuesSource` look very similar. Should they 
inherit from a `VectorSimilarityValuesSource` to ensure we can add common 
functionality in the future?



##########
lucene/core/src/java/org/apache/lucene/search/DoubleValuesSource.java:
##########
@@ -172,6 +173,40 @@ public LongValuesSource rewrite(IndexSearcher searcher) 
throws IOException {
     }
   }
 
+  /**
+   * Returns a DoubleValues instance for computing the vector similarity score 
per document against
+   * the byte query vector
+   *
+   * @param ctx the context for which to return the DoubleValues
+   * @param queryVector byte query vector
+   * @param vectorField knn byte field name
+   * @return DoubleValues instance
+   * @throws IOException if an {@link IOException} occurs
+   */
+  public static DoubleValues similarityToQueryVector(
+      LeafReaderContext ctx, byte[] queryVector, String vectorField) throws 
IOException {
+    assert 
ctx.reader().getFieldInfos().fieldInfo(vectorField).getVectorEncoding()

Review Comment:
   What do you think of throwing an exception even when assertions are not 
enabled?



##########
lucene/core/src/java/org/apache/lucene/search/ByteVectorSimilarityValuesSource.java:
##########
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.search;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Objects;
+import org.apache.lucene.index.ByteVectorValues;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.VectorSimilarityFunction;
+
+/**
+ * A {@link DoubleValuesSource} which computes the vector similarity scores 
between the query vector
+ * and the {@link org.apache.lucene.document.KnnByteVectorField} for documents.
+ */
+class ByteVectorSimilarityValuesSource extends DoubleValuesSource {
+  private final byte[] queryVector;
+  private final String fieldName;
+
+  public ByteVectorSimilarityValuesSource(byte[] vector, String fieldName) {
+    this.queryVector = vector;
+    this.fieldName = fieldName;
+  }
+
+  @Override
+  public DoubleValues getValues(LeafReaderContext ctx, DoubleValues scores) 
throws IOException {
+    final ByteVectorValues vectorValues = 
ctx.reader().getByteVectorValues(fieldName);
+    VectorSimilarityFunction function =
+        
ctx.reader().getFieldInfos().fieldInfo(fieldName).getVectorSimilarityFunction();
+    return new DoubleValues() {
+      @Override
+      public double doubleValue() throws IOException {
+        return function.compare(queryVector, vectorValues.vectorValue());
+      }
+
+      @Override
+      public boolean advanceExact(int doc) throws IOException {
+        if (doc >= vectorValues.docID()

Review Comment:
   This is equivalent to `return doc >= vectorValues.docID() && 
(vectorValues.docID() == doc || vectorValues.advance(doc) == doc);`, which 
avoids the branch.



##########
lucene/core/src/test/org/apache/lucene/search/TestVectorSimilarityValuesSource.java:
##########
@@ -0,0 +1,385 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.search;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.KnnByteVectorField;
+import org.apache.lucene.document.KnnFloatVectorField;
+import org.apache.lucene.document.SortedDocValuesField;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.VectorSimilarityFunction;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.tests.analysis.MockAnalyzer;
+import org.apache.lucene.tests.index.RandomIndexWriter;
+import org.apache.lucene.tests.util.LuceneTestCase;
+import org.apache.lucene.util.BytesRef;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+
+public class TestVectorSimilarityValuesSource extends LuceneTestCase {

Review Comment:
   Thank you for adding a thorough test!



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org
For additional commands, e-mail: issues-h...@lucene.apache.org

Reply via email to