jmazanec15 commented on code in PR #12050: URL: https://github.com/apache/lucene/pull/12050#discussion_r1061902422
########## lucene/core/src/java/org/apache/lucene/codecs/lucene95/Lucene95HnswVectorsWriter.java: ########## @@ -461,6 +467,126 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE } } + private void maybeInitializeFromGraph( + HnswGraphBuilder<?> hnswGraphBuilder, MergeState mergeState, FieldInfo fieldInfo) + throws IOException { + int initializerIndex = selectGraphForInitialization(mergeState, fieldInfo); + if (initializerIndex == -1) { + return; + } + + HnswGraph initializerGraph = + getHnswGraphFromReader(fieldInfo.name, mergeState.knnVectorsReaders[initializerIndex]); + Map<Integer, Integer> ordinalMapper = + getOldToNewOrdinalMap(mergeState, fieldInfo, initializerIndex); + hnswGraphBuilder.initializeFromGraph(initializerGraph, ordinalMapper); + } + + private int selectGraphForInitialization(MergeState mergeState, FieldInfo fieldInfo) + throws IOException { + // Find the KnnVectorReader with the most docs that meets the following criteria: + // 1. Does not contain any deleted docs + // 2. Is a Lucene95HnswVectorsReader/PerFieldKnnVectorReader + // If no readers exist that meet this criteria, return -1. If they do, return their index in + // merge state + int maxCandidateVectorCount = 0; + int initializerIndex = -1; + + for (int i = 0; i < mergeState.liveDocs.length; i++) { + KnnVectorsReader currKnnVectorsReader = mergeState.knnVectorsReaders[i]; + if (mergeState.knnVectorsReaders[i] + instanceof PerFieldKnnVectorsFormat.FieldsReader candidateReader) { + currKnnVectorsReader = candidateReader.getFieldReader(fieldInfo.name); + } + + if (!allMatch(mergeState.liveDocs[i]) + || !(currKnnVectorsReader instanceof Lucene95HnswVectorsReader candidateReader)) { + continue; + } + + VectorValues vectorValues = candidateReader.getVectorValues(fieldInfo.name); + if (vectorValues == null) { + continue; + } + + int candidateVectorCount = vectorValues.size(); + if (candidateVectorCount > maxCandidateVectorCount) { + maxCandidateVectorCount = candidateVectorCount; + initializerIndex = i; + } + } + return initializerIndex; + } + + private HnswGraph getHnswGraphFromReader(String fieldName, KnnVectorsReader knnVectorsReader) + throws IOException { + if (knnVectorsReader instanceof PerFieldKnnVectorsFormat.FieldsReader perFieldReader + && perFieldReader.getFieldReader(fieldName) + instanceof Lucene95HnswVectorsReader fieldReader) { + return fieldReader.getGraph(fieldName); + } + + if (knnVectorsReader instanceof Lucene95HnswVectorsReader) { + return ((Lucene95HnswVectorsReader) knnVectorsReader).getGraph(fieldName); + } + Review Comment: Good idea, I will add this comment here -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org