uschindler commented on a change in pull request #1397: LUCENE-9304: Refactor DWPTPool to pool DWPT directly URL: https://github.com/apache/lucene-solr/pull/1397#discussion_r404610773
########## File path: lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThreadPool.java ########## @@ -16,228 +16,173 @@ */ package org.apache.lucene.index; +import java.io.Closeable; +import java.io.IOException; import java.util.ArrayList; +import java.util.Collections; +import java.util.IdentityHashMap; +import java.util.Iterator; import java.util.List; -import java.util.concurrent.locks.ReentrantLock; +import java.util.Set; +import java.util.function.Predicate; +import org.apache.lucene.store.AlreadyClosedException; +import org.apache.lucene.util.IOSupplier; import org.apache.lucene.util.ThreadInterruptedException; /** - * {@link DocumentsWriterPerThreadPool} controls {@link ThreadState} instances - * and their thread assignments during indexing. Each {@link ThreadState} holds - * a reference to a {@link DocumentsWriterPerThread} that is once a - * {@link ThreadState} is obtained from the pool exclusively used for indexing a - * single document by the obtaining thread. Each indexing thread must obtain - * such a {@link ThreadState} to make progress. Depending on the - * {@link DocumentsWriterPerThreadPool} implementation {@link ThreadState} + * {@link DocumentsWriterPerThreadPool} controls {@link DocumentsWriterPerThread} instances + * and their thread assignments during indexing. Each {@link DocumentsWriterPerThread} is once a + * obtained from the pool exclusively used for indexing a + * single document or list of documents by the obtaining thread. Each indexing thread must obtain + * such a {@link DocumentsWriterPerThread} to make progress. Depending on the + * {@link DocumentsWriterPerThreadPool} implementation {@link DocumentsWriterPerThread} * assignments might differ from document to document. * <p> - * Once a {@link DocumentsWriterPerThread} is selected for flush the thread pool - * is reusing the flushing {@link DocumentsWriterPerThread}s ThreadState with a - * new {@link DocumentsWriterPerThread} instance. + * Once a {@link DocumentsWriterPerThread} is selected for flush the {@link DocumentsWriterPerThread} will + * be checked out of the thread pool and won't be reused for indexing. See {@link #checkout(DocumentsWriterPerThread)}. * </p> */ -final class DocumentsWriterPerThreadPool { - - /** - * {@link ThreadState} references and guards a - * {@link DocumentsWriterPerThread} instance that is used during indexing to - * build a in-memory index segment. {@link ThreadState} also holds all flush - * related per-thread data controlled by {@link DocumentsWriterFlushControl}. - * <p> - * A {@link ThreadState}, its methods and members should only accessed by one - * thread a time. Users must acquire the lock via {@link ThreadState#lock()} - * and release the lock in a finally block via {@link ThreadState#unlock()} - * before accessing the state. - */ - @SuppressWarnings("serial") - final static class ThreadState extends ReentrantLock { - DocumentsWriterPerThread dwpt; - // TODO this should really be part of DocumentsWriterFlushControl - // write access guarded by DocumentsWriterFlushControl - volatile boolean flushPending = false; - // TODO this should really be part of DocumentsWriterFlushControl - // write access guarded by DocumentsWriterFlushControl - long bytesUsed = 0; - - // set by DocumentsWriter after each indexing op finishes - volatile long lastSeqNo; - - ThreadState(DocumentsWriterPerThread dpwt) { - this.dwpt = dpwt; - } - - private void reset() { - assert this.isHeldByCurrentThread(); - this.dwpt = null; - this.bytesUsed = 0; - this.flushPending = false; - } - - boolean isInitialized() { - assert this.isHeldByCurrentThread(); - return dwpt != null; - } - - /** - * Returns the number of currently active bytes in this ThreadState's - * {@link DocumentsWriterPerThread} - */ - public long getBytesUsedPerThread() { - assert this.isHeldByCurrentThread(); - // public for FlushPolicy - return bytesUsed; - } - - /** - * Returns this {@link ThreadState}s {@link DocumentsWriterPerThread} - */ - public DocumentsWriterPerThread getDocumentsWriterPerThread() { - assert this.isHeldByCurrentThread(); - // public for FlushPolicy - return dwpt; - } - - /** - * Returns <code>true</code> iff this {@link ThreadState} is marked as flush - * pending otherwise <code>false</code> - */ - public boolean isFlushPending() { - return flushPending; - } - } +final class DocumentsWriterPerThreadPool implements Iterable<DocumentsWriterPerThread>, Closeable { - private final List<ThreadState> threadStates = new ArrayList<>(); + private final Set<DocumentsWriterPerThread> dwpts = Collections.newSetFromMap(new IdentityHashMap<>()); + private final List<DocumentsWriterPerThread> freeList = new ArrayList<>(); Review comment: Just an idea: Maybe use `Deque<DocumentsWriterPerThread> freeList = new ArrayDeque<>();` here, as it allows to iterate in both directions (it has `descendingIterator()`). To me this also looks better, because the whole thing is mostly used as a deque (LIFO). ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org