mikemccand commented on issue #13880: URL: https://github.com/apache/lucene/issues/13880#issuecomment-2402543191
Hmm, well, I coded that up, on top of my PR from #13874, with this diff: ``` raptorlake:912x[fix_back_compat]$ git diff diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestInt8HnswBackwardsCompatibility.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_in\ dex/TestInt8HnswBackwardsCompatibility.java index d4bb2d8b24f..321811b7dc9 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestInt8HnswBackwardsCompatibility.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestInt8HnswBackwardsCompatibility.java @@ -23,17 +23,22 @@ import java.io.IOException; import org.apache.lucene.backward_codecs.lucene99.Lucene99Codec; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.KnnVectorsFormat; +import org.apache.lucene.codecs.KnnVectorsReader; import org.apache.lucene.codecs.lucene99.Lucene99HnswScalarQuantizedVectorsFormat; +import org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsReader; import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat; +import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.KnnFloatVectorField; import org.apache.lucene.document.StringField; +import org.apache.lucene.index.CodecReader; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.NoMergePolicy; import org.apache.lucene.index.VectorSimilarityFunction; import org.apache.lucene.search.IndexSearcher; @@ -145,4 +150,19 @@ public class TestInt8HnswBackwardsCompatibility extends BackwardsCompatibilityTe assertKNNSearch(searcher, KNN_VECTOR, 10, 10, "0"); } } + + // #13880: make sure the BWC index really contains quantized HNSW not float32 + public void testIndexIsReallyQuantized() throws Exception { + try (DirectoryReader reader = DirectoryReader.open(directory)) { + for (LeafReaderContext leafContext : reader.leaves()) { + KnnVectorsReader knnVectorsReader = ((CodecReader) leafContext.reader()).getVectorReader(); + assertTrue("expected PerFieldKnnVectorsFormat.FieldsReader but got: " + knnVectorsReader, + knnVectorsReader instanceof PerFieldKnnVectorsFormat.FieldsReader); + + KnnVectorsReader forField = ((PerFieldKnnVectorsFormat.FieldsReader) knnVectorsReader).getFieldReader(KNN_VECTOR_FIELD); + assertTrue("KnnVectorsReader should be quantized, but got: " + forField, + forField instanceof Lucene99ScalarQuantizedVectorsReader); + } + } + } } ``` I wanted to confirm the test will pass with that PR that fixed the BWC indices to actually use quantized HNSW. But it fails! With exceptions like this: ``` 2> NOTE: reproduce with: gradlew test --tests TestInt8HnswBackwardsCompatibility.testIndexIsQuantized -Dtests.seed=848561A0021D6FE4 -Dtests.locale=en-JM -Dtests.timezone=Chile/Con\ tinental -Dtests.asserts=true -Dtests.file.encoding=UTF-8 > java.lang.AssertionError: KnnVectorsReader should be quantized, but got: org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader@425e674f > at __randomizedtesting.SeedInfo.seed([848561A0021D6FE4:B93E033BC6EAF741]:0) > at junit@4.13.1/org.junit.Assert.fail(Assert.java:89) > at junit@4.13.1/org.junit.Assert.assertTrue(Assert.java:42) > at org.apache.lucene.backward_index.TestInt8HnswBackwardsCompatibility.testIndexIsQuantized(TestInt8HnswBackwardsCompatibility.java:163) > at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.base/java.lang.reflect.Method.invoke(Method.java:566) > at randomizedtesting.runner@2.8.1/com.carrotsearch.randomizedtesting.RandomizedRunner.invoke(RandomizedRunner.java:1758) > at randomizedtesting.runner@2.8.1/com.carrotsearch.randomizedtesting.RandomizedRunner$8.evaluate(RandomizedRunner.java:946) > at randomizedtesting.runner@2.8.1/com.carrotsearch.randomizedtesting.RandomizedRunner$9.evaluate(RandomizedRunner.java:982) > at randomizedtesting.runner@2.8.1/com.carrotsearch.randomizedtesting.RandomizedRunner$10.evaluate(RandomizedRunner.java:996) > at org.apache.lucene.test_framework@9.12.0-SNAPSHOT/org.apache.lucene.tests.util.TestRuleSetupTeardownChained$1.evaluate(TestRuleSetupTeardownChained.java:48) > at org.apache.lucene.test_framework@9.12.0-SNAPSHOT/org.apache.lucene.tests.util.AbstractBeforeAfterRule$1.evaluate(AbstractBeforeAfterRule.java:43) > at org.apache.lucene.test_framework@9.12.0-SNAPSHOT/org.apache.lucene.tests.util.TestRuleThreadAndTestName$1.evaluate(TestRuleThreadAndTestName.java:45) > at org.apache.lucene.test_framework@9.12.0-SNAPSHOT/org.apache.lucene.tests.util.TestRuleIgnoreAfterMaxFailures$1.evaluate(TestRuleIgnoreAfterMaxFailures.java:60) > at org.apache.lucene.test_framework@9.12.0-SNAPSHOT/org.apache.lucene.tests.util.TestRuleMarkFailure$1.evaluate(TestRuleMarkFailure.java:44) > at junit@4.13.1/org.junit.rules.RunRules.evaluate(RunRules.java:20) > at randomizedtesting.runner@2.8.1/com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) > at randomizedtesting.runner@2.8.1/com.carrotsearch.randomizedtesting.ThreadLeakControl$StatementRunner.run(ThreadLeakControl.java:390) > at randomizedtesting.runner@2.8.1/com.carrotsearch.randomizedtesting.ThreadLeakControl.forkTimeoutingTask(ThreadLeakControl.java:843) > at randomizedtesting.runner@2.8.1/com.carrotsearch.randomizedtesting.ThreadLeakControl$3.evaluate(ThreadLeakControl.java:490) > at randomizedtesting.runner@2.8.1/com.carrotsearch.randomizedtesting.RandomizedRunner.runSingleTest(RandomizedRunner.java:955) > at randomizedtesting.runner@2.8.1/com.carrotsearch.randomizedtesting.RandomizedRunner$5.evaluate(RandomizedRunner.java:840) > at randomizedtesting.runner@2.8.1/com.carrotsearch.randomizedtesting.RandomizedRunner$6.evaluate(RandomizedRunner.java:891) > at randomizedtesting.runner@2.8.1/com.carrotsearch.randomizedtesting.RandomizedRunner$7.evaluate(RandomizedRunner.java:902) > at org.apache.lucene.test_framework@9.12.0-SNAPSHOT/org.apache.lucene.tests.util.AbstractBeforeAfterRule$1.evaluate(AbstractBeforeAfterRule.java:43) > at randomizedtesting.runner@2.8.1/com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) > at org.apache.lucene.test_framework@9.12.0-SNAPSHOT/org.apache.lucene.tests.util.TestRuleStoreClassName$1.evaluate(TestRuleStoreClassName.java:38) > at randomizedtesting.runner@2.8.1/com.carrotsearch.randomizedtesting.rules.NoShadowingOrOverridesOnMethodsRule$1.evaluate(NoShadowingOrOverridesOnMethodsRule.java:40) > at randomizedtesting.runner@2.8.1/com.carrotsearch.randomizedtesting.rules.NoShadowingOrOverridesOnMethodsRule$1.evaluate(NoShadowingOrOverridesOnMethodsRule.java:40) > at randomizedtesting.runner@2.8.1/com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) > at randomizedtesting.runner@2.8.1/com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) > at org.apache.lucene.test_framework@9.12.0-SNAPSHOT/org.apache.lucene.tests.util.TestRuleAssertionsRequired$1.evaluate(TestRuleAssertionsRequired.java:53) > at org.apache.lucene.test_framework@9.12.0-SNAPSHOT/org.apache.lucene.tests.util.AbstractBeforeAfterRule$1.evaluate(AbstractBeforeAfterRule.java:43) > at org.apache.lucene.test_framework@9.12.0-SNAPSHOT/org.apache.lucene.tests.util.TestRuleMarkFailure$1.evaluate(TestRuleMarkFailure.java:44) > at org.apache.lucene.test_framework@9.12.0-SNAPSHOT/org.apache.lucene.tests.util.TestRuleIgnoreAfterMaxFailures$1.evaluate(TestRuleIgnoreAfterMaxFailures.java:60) > at org.apache.lucene.test_framework@9.12.0-SNAPSHOT/org.apache.lucene.tests.util.TestRuleIgnoreTestSuites$1.evaluate(TestRuleIgnoreTestSuites.java:47) > at junit@4.13.1/org.junit.rules.RunRules.evaluate(RunRules.java:20) > at randomizedtesting.runner@2.8.1/com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) > at randomizedtesting.runner@2.8.1/com.carrotsearch.randomizedtesting.ThreadLeakControl$StatementRunner.run(ThreadLeakControl.java:390) > at randomizedtesting.runner@2.8.1/com.carrotsearch.randomizedtesting.ThreadLeakControl.lambda$forkTimeoutingTask$0(ThreadLeakControl.java:850) > at java.base/java.lang.Thread.run(Thread.java:829) ``` [Aside: it's annoying that the `Reproduce with:` line does not have specifics about which of the parameterized versions it ran with ... this seems like a bug in Randomizedtesting? CC @dweiss. I must scroll up to see which zip file it is testing...] So now I am confused ... I need to take a break (I'm at Community Over Code in Denver, and first session is about to start!!). Maybe after the break I'll mull and understand why the new test case is failing on the zip files that I thought I had fixed!! -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org