jpountz commented on code in PR #13951: URL: https://github.com/apache/lucene/pull/13951#discussion_r1815149823
########## lucene/core/src/java/org/apache/lucene/index/IndexWriterRAMManager.java: ########## @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.index; + +import java.io.IOException; +import java.util.Collection; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicInteger; + +/** + * For managing multiple instances of {@link IndexWriter} sharing the same buffer (configured by + * {@link IndexWriterConfig#setRAMBufferSizeMB}) Review Comment: It should be the other way around in my opinion, the RAM buffer size should be on `IndexWriterRAMManager`, and setting a `ramBufferSizeMB` on `IndexWriterConfig` would internally create a new `IndexWriterRAMManager` under the hood that is shared with no other `IndexWriter`. ########## lucene/core/src/java/org/apache/lucene/index/IndexWriterRAMManager.java: ########## @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.index; + +import java.io.IOException; +import java.util.Collection; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicInteger; + +/** + * For managing multiple instances of {@link IndexWriter} sharing the same buffer (configured by + * {@link IndexWriterConfig#setRAMBufferSizeMB}) + */ +public class IndexWriterRAMManager { + + private final IndexWriterConfig config; + private final Map<Integer, IndexWriter> idToWriter = new ConcurrentHashMap<>(); + private final AtomicInteger idGenerator = new AtomicInteger(); + + /** + * Default constructor + * + * @param config the index writer config containing the max RAM buffer size + */ + public IndexWriterRAMManager(IndexWriterConfig config) { + this.config = config; + } + + private int registerWriter(IndexWriter writer) { + int id = idGenerator.incrementAndGet(); + idToWriter.put(id, writer); + return id; + } + + private void removeWriter(int id) { + if (idToWriter.containsKey(id) == false) { + throw new IllegalArgumentException( + "Writer " + id + " has not been registered or has been removed already"); + } + idToWriter.remove(id); + } + + private void flushIfNecessary(int id) throws IOException { + if (idToWriter.containsKey(id) == false) { + throw new IllegalArgumentException( + "Writer " + id + " has not been registered or has been removed already"); + } + long totalRam = 0L; + for (IndexWriter writer : idToWriter.values()) { + totalRam += writer.ramBytesUsed(); + } + if (totalRam >= config.getRAMBufferSizeMB() * 1024 * 1024) { + IndexWriter writerToFlush = chooseWriterToFlush(idToWriter.values(), idToWriter.get(id)); + writerToFlush.flushNextBuffer(); + } + } + + /** + * Chooses which writer should be flushed. Default implementation chooses the writer with most RAM + * usage Review Comment: FWIW we ran benchmarks in Elasticsearch, and the approach that worked the best was to flush IndexWriters in a round-robin fashion. This is not intuitive at first sight, but I believe that it works better in practice because it is more likely to flush DWPTs that are little used, and also because otherwise you favor IndexWriters that do little indexing over IndexWriters that do heavy indexing (and are thus more likely to have a large buffer). ########## lucene/core/src/java/org/apache/lucene/index/IndexWriter.java: ########## @@ -939,11 +941,14 @@ protected final void ensureOpen() throws AlreadyClosedException { * @param d the index directory. The index is either created or appended according <code> * conf.getOpenMode()</code>. * @param conf the configuration settings according to which IndexWriter should be initialized. + * @param indexWriterRAMManager The RAM manager used for multi-tenant RAM management * @throws IOException if the directory cannot be read/written to, or if it does not exist and * <code>conf.getOpenMode()</code> is <code>OpenMode.APPEND</code> or if there is any other * low-level IO error */ - public IndexWriter(Directory d, IndexWriterConfig conf) throws IOException { + public IndexWriter( + Directory d, IndexWriterConfig conf, IndexWriterRAMManager indexWriterRAMManager) Review Comment: It should be on the `IndexWriterConfig` rather than another `IndexWriter` ctor argument. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org