vigyasharma commented on code in PR #13951: URL: https://github.com/apache/lucene/pull/13951#discussion_r1851338572
########## lucene/core/src/java/org/apache/lucene/index/IndexWriterRAMManager.java: ########## @@ -0,0 +1,217 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.index; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.locks.ReentrantLock; + +/** + * For managing multiple instances of {@link IndexWriter} sharing the same buffer (configured by + * {@link IndexWriterConfig#setRAMBufferSizeMB}) + */ +public class IndexWriterRAMManager { + private final LinkedIdToWriter idToWriter = new LinkedIdToWriter(); + private final AtomicInteger idGenerator = new AtomicInteger(); + private double ramBufferSizeMB; + + /** + * Default constructor + * + * @param ramBufferSizeMB the RAM buffer size to use between all registered {@link IndexWriter} + * instances + */ + IndexWriterRAMManager(double ramBufferSizeMB) { + if (ramBufferSizeMB != IndexWriterConfig.DISABLE_AUTO_FLUSH && ramBufferSizeMB <= 0.0) { + throw new IllegalArgumentException("ramBufferSize should be > 0.0 MB when enabled"); + } + this.ramBufferSizeMB = ramBufferSizeMB; + } + + /** Set the buffer size for this manager */ + public void setRamBufferSizeMB(double ramBufferSizeMB) { + this.ramBufferSizeMB = ramBufferSizeMB; + } + + /** Get the buffer size assigned to this manager */ + public double getRamBufferSizeMB() { + return ramBufferSizeMB; + } + + /** + * Calls {@link IndexWriter#flushNextBuffer()} in a round-robin fashion starting from the first + * writer added that has not been removed yet. Subsequent calls will flush the next writer in line + * and eventually loop back to the beginning. Returns the flushed writer id for testing + */ + public int flushRoundRobin() throws IOException { + return idToWriter.flushRoundRobin(); + } + + /** Registers a writer and returns the associated ID, protected for testing */ + protected int registerWriter(IndexWriter writer) { + int id = idGenerator.incrementAndGet(); + idToWriter.addWriter(writer, id); + return id; + } + + /** Removes a writer given the writer's ide, protected for testing */ + protected void removeWriter(int id) { + idToWriter.removeWriter(id); + } + + private void flushIfNecessary( + FlushPolicy flushPolicy, PerWriterIndexWriterRAMManager perWriterRAMManager) + throws IOException { + if (ramBufferSizeMB != IndexWriterConfig.DISABLE_AUTO_FLUSH) { + flushPolicy.flushWriter(this, perWriterRAMManager); + } + } + + private long updateAndGetCurrentBytesUsed(int id) { + return idToWriter.getTotalRamTracker(id); + } + + /** + * For use in {@link IndexWriter}, manages communication with the {@link IndexWriterRAMManager} + */ + public static class PerWriterIndexWriterRAMManager { Review Comment: Do we really need this class? `IndexWriter` (and `FlushPolicy` too) should already have a reference to the ram manager via `IndexWriterConfig`. How about we just store the writer's "ramManagerID" inside `IndexWriter`, and use it to invoke ram manager APIs directly? In fact, can we work with the writer object directly instead of keeping these "id" mappings? Similar to how FlushPolicy accepts the calling DWPT in its APIs... ########## lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java: ########## @@ -142,7 +142,21 @@ public IndexWriterConfig() { * problem you should switch to {@link LogByteSizeMergePolicy} or {@link LogDocMergePolicy}. */ public IndexWriterConfig(Analyzer analyzer) { - super(analyzer); + this(analyzer, new IndexWriterRAMManager(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB)); Review Comment: Any reason for not making this change in the default constructor? We could avoid making changes to all the tests. ########## lucene/core/src/java/org/apache/lucene/index/IndexWriterRAMManager.java: ########## @@ -0,0 +1,217 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.index; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.locks.ReentrantLock; + +/** + * For managing multiple instances of {@link IndexWriter} sharing the same buffer (configured by + * {@link IndexWriterConfig#setRAMBufferSizeMB}) + */ +public class IndexWriterRAMManager { + private final LinkedIdToWriter idToWriter = new LinkedIdToWriter(); + private final AtomicInteger idGenerator = new AtomicInteger(); + private double ramBufferSizeMB; + + /** + * Default constructor + * + * @param ramBufferSizeMB the RAM buffer size to use between all registered {@link IndexWriter} + * instances + */ + IndexWriterRAMManager(double ramBufferSizeMB) { + if (ramBufferSizeMB != IndexWriterConfig.DISABLE_AUTO_FLUSH && ramBufferSizeMB <= 0.0) { + throw new IllegalArgumentException("ramBufferSize should be > 0.0 MB when enabled"); + } + this.ramBufferSizeMB = ramBufferSizeMB; + } + + /** Set the buffer size for this manager */ + public void setRamBufferSizeMB(double ramBufferSizeMB) { + this.ramBufferSizeMB = ramBufferSizeMB; + } + + /** Get the buffer size assigned to this manager */ + public double getRamBufferSizeMB() { + return ramBufferSizeMB; + } + + /** + * Calls {@link IndexWriter#flushNextBuffer()} in a round-robin fashion starting from the first + * writer added that has not been removed yet. Subsequent calls will flush the next writer in line + * and eventually loop back to the beginning. Returns the flushed writer id for testing + */ + public int flushRoundRobin() throws IOException { + return idToWriter.flushRoundRobin(); + } + + /** Registers a writer and returns the associated ID, protected for testing */ + protected int registerWriter(IndexWriter writer) { + int id = idGenerator.incrementAndGet(); + idToWriter.addWriter(writer, id); + return id; + } + + /** Removes a writer given the writer's ide, protected for testing */ + protected void removeWriter(int id) { + idToWriter.removeWriter(id); + } + + private void flushIfNecessary( + FlushPolicy flushPolicy, PerWriterIndexWriterRAMManager perWriterRAMManager) + throws IOException { + if (ramBufferSizeMB != IndexWriterConfig.DISABLE_AUTO_FLUSH) { + flushPolicy.flushWriter(this, perWriterRAMManager); + } + } + + private long updateAndGetCurrentBytesUsed(int id) { + return idToWriter.getTotalRamTracker(id); + } + + /** + * For use in {@link IndexWriter}, manages communication with the {@link IndexWriterRAMManager} + */ + public static class PerWriterIndexWriterRAMManager { + + private final int id; + private final IndexWriterRAMManager manager; + + PerWriterIndexWriterRAMManager(IndexWriter writer, IndexWriterRAMManager manager) { + id = manager.registerWriter(writer); + this.manager = manager; + } + + void removeWriter() { + manager.removeWriter(id); + } + + void flushIfNecessary(FlushPolicy flushPolicy) throws IOException { + manager.flushIfNecessary(flushPolicy, this); + } + + long getTotalBufferBytesUsed() { + return manager.updateAndGetCurrentBytesUsed(id); + } + } + + private static class LinkedIdToWriter { Review Comment: How about using a java Queue implementation instead of the custom linked-list logic? You could round-robin on elements by removing, processing and add them back to the queue. I suppose this queue size would be small, so array deque and linked lists are both fine? We can also get some thread safe implementations out of the box. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org