uschindler commented on code in PR #13555: URL: https://github.com/apache/lucene/pull/13555#discussion_r1672768505
########## lucene/core/src/java21/org/apache/lucene/store/GroupedArena.java: ########## @@ -0,0 +1,212 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.store; + +import java.lang.foreign.AddressLayout; +import java.lang.foreign.Arena; +import java.lang.foreign.MemoryLayout; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.ValueLayout; +import java.nio.file.Path; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicInteger; +import org.apache.lucene.index.IndexFileNames; + +@SuppressWarnings("preview") +final class GroupedArena implements Arena { + + private final String scopeId; + + private final ConcurrentHashMap<String, GroupedArena> arenas; + + private final Arena backing; + + private final AtomicInteger refCt; + + static Arena get(Path p, ConcurrentHashMap<String, GroupedArena> arenas) { + String filename = p.getFileName().toString(); + String segmentName = IndexFileNames.parseSegmentName(filename); + if (filename.length() == segmentName.length()) { + // no segment found; return a 1-off Arena + return Arena.ofShared(); + } + String scopeId = p.getParent().resolve(segmentName).toString(); + Arena ret; + do { + boolean[] computed = new boolean[1]; + final GroupedArena template = + arenas.computeIfAbsent( + scopeId, + (s) -> { + computed[0] = true; + return new GroupedArena(s, arenas); + }); + if (computed[0]) { + return template; + } + ret = template.cloneIfActive(); + } while (ret == null); // TODO: will this ever actually loop? + return ret; + } + + GroupedArena(String scopeId, ConcurrentHashMap<String, GroupedArena> arenas) { + this.scopeId = scopeId; + this.arenas = arenas; + this.backing = Arena.ofShared(); + this.refCt = new AtomicInteger(1); + } + + private GroupedArena(GroupedArena template) { + this.scopeId = template.scopeId; + this.arenas = template.arenas; + this.backing = template.backing; + this.refCt = template.refCt; + } + + private GroupedArena cloneIfActive() { + if (refCt.getAndIncrement() > 0) { + // the usual (always?) case + return new GroupedArena(this); + } else { + // TODO: this should never happen? + return null; + } + } + + @Override + public void close() { + int ct = refCt.decrementAndGet(); + if (ct == 0) { + arenas.remove(scopeId); + if (refCt.get() == 0) { + // TODO: this should always be the case? But if it's not, it should be a benign + // race condition. Whatever caller incremented `refCt` will close it, and if + // anyone tries to open a new arena with the same `scopeId` that we removed + // above, they'll simply create a new Arena, and we're no worse off than we + // would have been if every Arena was created as a one-off. + backing.close(); + } + } else { + assert ct > 0 : "refCt should never be negative; found " + ct; + } + } + + @Override Review Comment: I am not so happy that we need to implement all those methods. Let's keep the default ones. Maybe let the required ones throw UOE, because we never use the arena to allocate memory. ########## lucene/core/src/java21/org/apache/lucene/store/GroupedArena.java: ########## @@ -0,0 +1,212 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.store; + +import java.lang.foreign.AddressLayout; +import java.lang.foreign.Arena; +import java.lang.foreign.MemoryLayout; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.ValueLayout; +import java.nio.file.Path; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicInteger; +import org.apache.lucene.index.IndexFileNames; + +@SuppressWarnings("preview") +final class GroupedArena implements Arena { + + private final String scopeId; + + private final ConcurrentHashMap<String, GroupedArena> arenas; + + private final Arena backing; + + private final AtomicInteger refCt; + + static Arena get(Path p, ConcurrentHashMap<String, GroupedArena> arenas) { + String filename = p.getFileName().toString(); + String segmentName = IndexFileNames.parseSegmentName(filename); + if (filename.length() == segmentName.length()) { + // no segment found; return a 1-off Arena + return Arena.ofShared(); + } + String scopeId = p.getParent().resolve(segmentName).toString(); + Arena ret; + do { Review Comment: this code is a bit strange, also with the boolean[] as modifiable holder. I'd rewrite this. Basically the code should better check the refcount of the GroupedArena and if it is 0 return itself, incrementing by 1. For all other cases it shoiuld just return a clone with incremented refcount. Ret should never be 0, please remove the code that could make it null in the coloneIfActive(). While loop shouldn't be needed. ########## lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInputProvider.java: ########## @@ -24,12 +24,15 @@ import java.nio.file.Path; import java.nio.file.StandardOpenOption; import java.util.Optional; +import java.util.concurrent.ConcurrentHashMap; import org.apache.lucene.util.Constants; import org.apache.lucene.util.Unwrappable; @SuppressWarnings("preview") final class MemorySegmentIndexInputProvider implements MMapDirectory.MMapIndexInputProvider { + private static final ConcurrentHashMap<String, GroupedArena> ARENAS = new ConcurrentHashMap<>(); Review Comment: I don't like that it is a static global arena. It should be private per MMapDirectory instance. This should be doable, as we can pass the map to `GroupedArena.get(path, arena)`. So it can also be a private per-instance one. If you do this, the file name is the only key needed. The current code with adding full path is only needed because it is global. The ConcurrentHashMap expeczted should be typed `ConcurrentHashMap<String, ?>` as `Arena` should never escape the private Java 21 code. I am afraid of memory leaks caused by this. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org