irvingzhang commented on a change in pull request #1169: LUCENE-9004: A minor feature and patch -- support deleting vector values and fix segments merging URL: https://github.com/apache/lucene-solr/pull/1169#discussion_r368349044
########## File path: lucene/core/src/test/org/apache/lucene/index/TestKnnGraph.java ########## @@ -92,7 +108,277 @@ public void testSingleDocRecall() throws Exception { iw.commit(); assertConsistentGraph(iw, values); - assertRecall(dir, 0, values[0]); + assertRecall(dir, 1, values[0]); + } + } + + public void testDocsDeletionAndRecall() throws Exception { + /** + * {@code KnnExactVectorValueWeight} applies in-set (i.e. the query vector is exactly in the index) + * deletion strategy to filter all unmatched results searched by {@link org.apache.lucene.search.KnnGraphQuery.KnnExactVectorValueQuery}, + * and deletes at most ef*segmentCnt vectors that are the same to the specified queryVector. + */ + final class KnnExactVectorValueWeight extends ConstantScoreWeight { + private final String field; + private final ScoreMode scoreMode; + private final float[] queryVector; + private final int ef; + + KnnExactVectorValueWeight(Query query, float score, ScoreMode scoreMode, String field, float[] queryVector, int ef) { + super(query, score); + this.field = field; + this.scoreMode = scoreMode; + this.queryVector = queryVector; + this.ef = ef; + } + + /** + * Returns a {@link Scorer} which can iterate in order over all matching + * documents and assign them a score. + * <p> + * <b>NOTE:</b> null can be returned if no documents will be scored by this + * query. + * <p> + * <b>NOTE</b>: The returned {@link Scorer} does not have + * {@link LeafReader#getLiveDocs()} applied, they need to be checked on top. + * + * @param context the {@link LeafReaderContext} for which to return the {@link Scorer}. + * @return a {@link Scorer} which scores documents in/out-of order. + * @throws IOException if there is a low-level I/O error + */ + @Override + public Scorer scorer(LeafReaderContext context) throws IOException { + ScorerSupplier supplier = scorerSupplier(context); + if (supplier == null) { + return null; + } + return supplier.get(Long.MAX_VALUE); + } + + @Override + public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { + FieldInfo fi = context.reader().getFieldInfos().fieldInfo(field); + int numDimensions = fi.getVectorNumDimensions(); + if (numDimensions != queryVector.length) { + throw new IllegalArgumentException("field=\"" + field + "\" was indexed with dimensions=" + numDimensions + + "; this is incompatible with query dimensions=" + queryVector.length); + } + + final HNSWGraphReader hnswReader = new HNSWGraphReader(field, context); + final VectorValues vectorValues = context.reader().getVectorValues(field); + if (vectorValues == null) { + // No docs in this segment/field indexed any vector values + return null; + } + + final Weight weight = this; + return new ScorerSupplier() { + @Override + public Scorer get(long leadCost) throws IOException { + final Neighbors neighbors = hnswReader.searchNeighbors(queryVector, ef, vectorValues); + + if (neighbors.size() > 0) { + Neighbor top = neighbors.top(); + if (top.distance() > 0) { + neighbors.clear(); + } else { + final List<Neighbor> toDeleteNeighbors = new ArrayList<>(neighbors.size()); Review comment: Yes, and thanks. I hope to test some cases where segments contain deleted vectors. I expect the deleted vector values are deterministic, making the assertions meet in any execution. The classes, KnnExactVectorValueQuery and KnnExactVectorValueWeight, are just used for my test case, so I put them in the test file. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org