zhaih commented on code in PR #12660: URL: https://github.com/apache/lucene/pull/12660#discussion_r1367955272
########## lucene/core/src/java/org/apache/lucene/util/hnsw/HnswConcurrentMergeBuilder.java: ########## @@ -0,0 +1,234 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.util.hnsw; + +import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; +import static org.apache.lucene.util.hnsw.HnswGraphBuilder.HNSW_COMPONENT; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; +import java.util.concurrent.atomic.AtomicInteger; +import org.apache.lucene.util.BitSet; +import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.InfoStream; +import org.apache.lucene.util.ThreadInterruptedException; + +/** + * A graph builder that manages multiple workers, it only supports adding the whole graph all at + * once. It will spawn a thread for each worker and the workers will pick the work in batches. + */ +public class HnswConcurrentMergeBuilder implements IHnswGraphBuilder { + + private static final int BATCH_SIZE = 2048; + + private final ExecutorService exec; + private final ConcurrentMergeWorker[] workers; + private InfoStream infoStream = InfoStream.getDefault(); + + public HnswConcurrentMergeBuilder( + ExecutorService exec, + int numWorker, + RandomVectorScorerSupplier scorerSupplier, + int M, + int beamWidth, + OnHeapHnswGraph hnsw, + BitSet initializedNodes) + throws IOException { + this.exec = exec; + AtomicInteger workProgress = new AtomicInteger(0); + workers = new ConcurrentMergeWorker[numWorker]; + for (int i = 0; i < numWorker; i++) { + workers[i] = + new ConcurrentMergeWorker( + scorerSupplier.copy(), + M, + beamWidth, + HnswGraphBuilder.randSeed, + hnsw, + initializedNodes, + workProgress); + } + } + + @Override + public OnHeapHnswGraph build(int maxOrd) throws IOException { + if (infoStream.isEnabled(HNSW_COMPONENT)) { + infoStream.message( + HNSW_COMPONENT, + "build graph from " + maxOrd + " vectors, with " + workers.length + " workers"); + } + List<Future<?>> futures = new ArrayList<>(); + for (int i = 0; i < workers.length; i++) { + int finalI = i; + futures.add( + exec.submit( + () -> { + try { + workers[finalI].run(maxOrd); + } catch (IOException e) { + throw new RuntimeException(e); + } + })); + } + Throwable exc = null; + for (Future<?> future : futures) { + try { + future.get(); + } catch (InterruptedException e) { + var newException = new ThreadInterruptedException(e); + if (exc == null) { + exc = newException; + } else { + exc.addSuppressed(newException); + } + } catch (ExecutionException e) { + if (exc == null) { + exc = e.getCause(); + } else { + exc.addSuppressed(e.getCause()); + } + } + } + if (exc != null) { + // The error handling was copied from TaskExecutor. should we just use TaskExecutor instead? + throw IOUtils.rethrowAlways(exc); + } + return workers[0].getGraph(); + } + + @Override + public void addGraphNode(int node) throws IOException { + throw new UnsupportedOperationException("This builder is for merge only"); + } + + @Override + public void setInfoStream(InfoStream infoStream) { + this.infoStream = infoStream; + for (IHnswGraphBuilder worker : workers) { + worker.setInfoStream(infoStream); + } + } + + @Override + public OnHeapHnswGraph getGraph() { + return workers[0].getGraph(); + } + + private static final class ConcurrentMergeWorker extends HnswGraphBuilder { + + private final AtomicInteger workProgress; + private final BitSet initializedNodes; + + private ConcurrentMergeWorker( + RandomVectorScorerSupplier scorerSupplier, + int M, + int beamWidth, + long seed, + OnHeapHnswGraph hnsw, + BitSet initializedNodes, + AtomicInteger workProgress) + throws IOException { + super( + scorerSupplier, + M, + beamWidth, + seed, + hnsw, + new MergeSearcher( + new NeighborQueue(beamWidth, true), new FixedBitSet(hnsw.maxNodeId() + 1))); + this.workProgress = workProgress; + this.initializedNodes = initializedNodes; + } + + private void run(int maxOrd) throws IOException { + int start = getStartPos(maxOrd); + int end; + while (start != -1) { + end = Math.min(maxOrd, start + BATCH_SIZE); + addVectors(start, end); + start = getStartPos(maxOrd); + } + } + + private int getStartPos(int maxOrd) { + int start = workProgress.get(); + while (start < maxOrd) { + if (workProgress.compareAndSet(start, start + BATCH_SIZE)) { + break; + } else { + assert workProgress.get() > start; + start = workProgress.get(); + } + } + if (start < maxOrd) { + return start; + } else { + return -1; + } + } + + @Override + public void addGraphNode(int node) throws IOException { + if (initializedNodes != null && initializedNodes.get(node)) { + return; + } + super.addGraphNode(node); + } + } + + /** + * This searcher will obtain the lock and make a copy of neighborArray when seeking the graph such + * that concurrent modification of the graph will not impact the search + */ + private static class MergeSearcher extends HnswGraphSearcher { + private int[] nodeBuffer; + private int upto; + private int size; + + private MergeSearcher(NeighborQueue candidates, BitSet visited) { + super(candidates, visited); + } + + @Override + void graphSeek(HnswGraph graph, int level, int targetNode) { + NeighborArray neighborArray = ((OnHeapHnswGraph) graph).getNeighbors(level, targetNode); + long start = System.nanoTime(); + neighborArray.rwlock.readLock().lock(); + NeighborArray.contentionTime.addAndGet(System.nanoTime() - start); + if (nodeBuffer == null || nodeBuffer.length < neighborArray.size()) { + nodeBuffer = new int[neighborArray.size()]; + } + size = neighborArray.size(); + if (size >= 0) System.arraycopy(neighborArray.node, 0, nodeBuffer, 0, size); + neighborArray.rwlock.readLock().unlock(); Review Comment: Yeah let me do some experiment -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org