mikemccand commented on code in PR #12345: URL: https://github.com/apache/lucene/pull/12345#discussion_r1415450166
########## lucene/core/src/java/org/apache/lucene/index/ExitableIndexReader.java: ########## @@ -0,0 +1,436 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.index; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import org.apache.lucene.util.Bits; + +/** + * The {@link ExitableIndexReader} is used to timeout I/O operation which is done during query + * rewrite. After this time is exceeded, the search thread is stopped by throwing a {@link + * ExitableIndexReader.TimeExceededException} + */ +public final class ExitableIndexReader extends IndexReader { + private final IndexReader indexReader; + private final QueryTimeout queryTimeout; + + /** + * Create a ExitableIndexReader wrapper over another {@link IndexReader} with a specified timeout. + * + * @param indexReader the wrapped {@link IndexReader} + * @param queryTimeout max time allowed for collecting hits after which {@link + * ExitableIndexReader.TimeExceededException} is thrown + */ + public ExitableIndexReader(IndexReader indexReader, QueryTimeout queryTimeout) { + this.indexReader = indexReader; + this.queryTimeout = queryTimeout; + doWrapIndexReader(indexReader, queryTimeout); + } + + /** Thrown when elapsed search time exceeds allowed search time. */ + @SuppressWarnings("serial") + static class TimeExceededException extends RuntimeException { + private TimeExceededException() { + super("TimeLimit Exceeded"); + } + + private TimeExceededException(Exception e) { + super(e); + } + } + + @Override + public TermVectors termVectors() throws IOException { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + return indexReader.termVectors(); + } + + @Override + public int numDocs() { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + return indexReader.numDocs(); + } + + @Override + public int maxDoc() { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + return indexReader.maxDoc(); + } + + @Override + public StoredFields storedFields() throws IOException { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + return indexReader.storedFields(); + } + + @Override + protected void doClose() throws IOException { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + indexReader.doClose(); + } + + @Override + public IndexReaderContext getContext() { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + return indexReader.getContext(); + } + + @Override + public CacheHelper getReaderCacheHelper() { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + return indexReader.getReaderCacheHelper(); + } + + @Override + public int docFreq(Term term) throws IOException { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + return indexReader.docFreq(term); + } + + @Override + public long totalTermFreq(Term term) throws IOException { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + return indexReader.totalTermFreq(term); + } + + @Override + public long getSumDocFreq(String field) throws IOException { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + return indexReader.getSumDocFreq(field); + } + + @Override + public int getDocCount(String field) throws IOException { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + return indexReader.getDocCount(field); + } + + @Override + public long getSumTotalTermFreq(String field) throws IOException { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + return indexReader.getSumTotalTermFreq(field); + } + + /** Method to wrap leaf readers of underlying index reader */ + protected static void doWrapIndexReader(IndexReader in, QueryTimeout queryTimeout) { + try { + Map<CacheKey, LeafReader> readerCache = new HashMap<>(); + List<LeafReaderContext> leaves = in.leaves(); + List<LeafReader> readers = new ArrayList<>(); + for (LeafReaderContext leafCtx : leaves) { + LeafReader reader = leafCtx.reader(); + readers.add(reader); + // we try to reuse the life docs instances here if the reader cache key didn't change + if (reader instanceof ExitableIndexReader.TimeoutLeafReader + && reader.getReaderCacheHelper() != null) { + readerCache.put((reader).getReaderCacheHelper().getKey(), reader); + } + } + ExitableSubReaderWrapper exitableSubReaderWrapper = + new ExitableSubReaderWrapper(readerCache, queryTimeout); + exitableSubReaderWrapper.wrap(readers); + } catch (TimeExceededException e) { + throw new TimeExceededException(e); + } + } + + private static class ExitableSubReaderWrapper extends FilterDirectoryReader.SubReaderWrapper { + private final Map<CacheKey, LeafReader> mapping; + + private final QueryTimeout queryTimeout; + + public ExitableSubReaderWrapper( + Map<CacheKey, LeafReader> oldReadersCache, QueryTimeout queryTimeout) { + assert oldReadersCache != null; + this.mapping = oldReadersCache; + this.queryTimeout = queryTimeout; + } + + @Override + protected LeafReader[] wrap(List<? extends LeafReader> readers) { + List<LeafReader> wrapped = new ArrayList<>(readers.size()); + for (LeafReader reader : readers) { + LeafReader wrap = wrap(reader); + assert wrap != null; + if (wrap.numDocs() != 0) { + wrapped.add(wrap); + } + } + return wrapped.toArray(new LeafReader[0]); + } + + @Override + public LeafReader wrap(LeafReader reader) { + CacheHelper readerCacheHelper = reader.getReaderCacheHelper(); + if (readerCacheHelper != null && mapping.containsKey(readerCacheHelper.getKey())) { + // if the reader cache helper didn't change and we have it in the cache don't bother + // creating a new one + return mapping.get(readerCacheHelper.getKey()); + } + return new TimeoutLeafReader(reader, queryTimeout); + } + } + + /** + * TimeoutLeafReader is wrapper class for FilterLeafReader which is imposing timeout on different + * operations of FilterLeafReader + */ + public static class TimeoutLeafReader extends FilterLeafReader { Review Comment: Does it need to be public? ########## lucene/core/src/java/org/apache/lucene/index/ExitableIndexReader.java: ########## @@ -0,0 +1,436 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.index; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import org.apache.lucene.util.Bits; + +/** + * The {@link ExitableIndexReader} is used to timeout I/O operation which is done during query + * rewrite. After this time is exceeded, the search thread is stopped by throwing a {@link + * ExitableIndexReader.TimeExceededException} + */ +public final class ExitableIndexReader extends IndexReader { + private final IndexReader indexReader; + private final QueryTimeout queryTimeout; + + /** + * Create a ExitableIndexReader wrapper over another {@link IndexReader} with a specified timeout. + * + * @param indexReader the wrapped {@link IndexReader} + * @param queryTimeout max time allowed for collecting hits after which {@link + * ExitableIndexReader.TimeExceededException} is thrown + */ + public ExitableIndexReader(IndexReader indexReader, QueryTimeout queryTimeout) { + this.indexReader = indexReader; + this.queryTimeout = queryTimeout; + doWrapIndexReader(indexReader, queryTimeout); + } + + /** Thrown when elapsed search time exceeds allowed search time. */ + @SuppressWarnings("serial") + static class TimeExceededException extends RuntimeException { + private TimeExceededException() { + super("TimeLimit Exceeded"); + } + + private TimeExceededException(Exception e) { + super(e); + } + } + + @Override + public TermVectors termVectors() throws IOException { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + return indexReader.termVectors(); + } + + @Override + public int numDocs() { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + return indexReader.numDocs(); + } + + @Override + public int maxDoc() { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + return indexReader.maxDoc(); + } + + @Override + public StoredFields storedFields() throws IOException { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + return indexReader.storedFields(); + } + + @Override + protected void doClose() throws IOException { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + indexReader.doClose(); + } + + @Override + public IndexReaderContext getContext() { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + return indexReader.getContext(); + } + + @Override + public CacheHelper getReaderCacheHelper() { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + return indexReader.getReaderCacheHelper(); + } + + @Override + public int docFreq(Term term) throws IOException { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + return indexReader.docFreq(term); + } + + @Override + public long totalTermFreq(Term term) throws IOException { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + return indexReader.totalTermFreq(term); + } + + @Override + public long getSumDocFreq(String field) throws IOException { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + return indexReader.getSumDocFreq(field); + } + + @Override + public int getDocCount(String field) throws IOException { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + return indexReader.getDocCount(field); + } + + @Override + public long getSumTotalTermFreq(String field) throws IOException { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + return indexReader.getSumTotalTermFreq(field); + } + + /** Method to wrap leaf readers of underlying index reader */ + protected static void doWrapIndexReader(IndexReader in, QueryTimeout queryTimeout) { + try { + Map<CacheKey, LeafReader> readerCache = new HashMap<>(); + List<LeafReaderContext> leaves = in.leaves(); + List<LeafReader> readers = new ArrayList<>(); + for (LeafReaderContext leafCtx : leaves) { + LeafReader reader = leafCtx.reader(); + readers.add(reader); + // we try to reuse the life docs instances here if the reader cache key didn't change Review Comment: `life` -> `live` ########## lucene/facet/src/java/org/apache/lucene/facet/StringDocValuesReaderState.java: ########## @@ -46,9 +47,13 @@ public class StringDocValuesReaderState { * (e.g., to pickup NRT updates) requires constructing a new state instance. */ public StringDocValuesReaderState(IndexReader reader, String field) throws IOException { - this.reader = reader; + if (reader instanceof ExitableIndexReader) { Review Comment: I don't think we should do this here? Caller should not be using a `ExitableDirectoryReader` when constructing this state? ########## lucene/facet/src/test/org/apache/lucene/facet/TestRandomSamplingFacetsCollector.java: ########## @@ -147,7 +149,10 @@ public void testRandomSampling() throws Exception { Math.min(5 * sampled.value.floatValue(), numDocs / 10.f), 1.0); } - - IOUtils.close(searcher.getIndexReader(), taxoReader, dir, taxoDir); + IndexReader r = searcher.getIndexReader(); Review Comment: We shouldn't do this either? In general all code using `IndexReader` should not need to specially check for `ExitableDirectoryReader`. Rather, callers should not pass `ExitableDirectoryReader` to these places. ########## lucene/core/src/java/org/apache/lucene/index/ExitableIndexReader.java: ########## @@ -0,0 +1,436 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.index; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import org.apache.lucene.util.Bits; + +/** + * The {@link ExitableIndexReader} is used to timeout I/O operation which is done during query + * rewrite. After this time is exceeded, the search thread is stopped by throwing a {@link + * ExitableIndexReader.TimeExceededException} + */ +public final class ExitableIndexReader extends IndexReader { + private final IndexReader indexReader; + private final QueryTimeout queryTimeout; + + /** + * Create a ExitableIndexReader wrapper over another {@link IndexReader} with a specified timeout. + * + * @param indexReader the wrapped {@link IndexReader} + * @param queryTimeout max time allowed for collecting hits after which {@link + * ExitableIndexReader.TimeExceededException} is thrown + */ + public ExitableIndexReader(IndexReader indexReader, QueryTimeout queryTimeout) { + this.indexReader = indexReader; + this.queryTimeout = queryTimeout; + doWrapIndexReader(indexReader, queryTimeout); + } + + /** Thrown when elapsed search time exceeds allowed search time. */ + @SuppressWarnings("serial") + static class TimeExceededException extends RuntimeException { + private TimeExceededException() { + super("TimeLimit Exceeded"); + } + + private TimeExceededException(Exception e) { + super(e); + } + } + + @Override + public TermVectors termVectors() throws IOException { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + return indexReader.termVectors(); + } + + @Override + public int numDocs() { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + return indexReader.numDocs(); + } + + @Override + public int maxDoc() { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + return indexReader.maxDoc(); + } + + @Override + public StoredFields storedFields() throws IOException { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + return indexReader.storedFields(); + } + + @Override + protected void doClose() throws IOException { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + indexReader.doClose(); + } + + @Override + public IndexReaderContext getContext() { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + return indexReader.getContext(); + } + + @Override + public CacheHelper getReaderCacheHelper() { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + return indexReader.getReaderCacheHelper(); + } + + @Override + public int docFreq(Term term) throws IOException { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + return indexReader.docFreq(term); + } + + @Override + public long totalTermFreq(Term term) throws IOException { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + return indexReader.totalTermFreq(term); + } + + @Override + public long getSumDocFreq(String field) throws IOException { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + return indexReader.getSumDocFreq(field); + } + + @Override + public int getDocCount(String field) throws IOException { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + return indexReader.getDocCount(field); + } + + @Override + public long getSumTotalTermFreq(String field) throws IOException { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + return indexReader.getSumTotalTermFreq(field); + } + + /** Method to wrap leaf readers of underlying index reader */ + protected static void doWrapIndexReader(IndexReader in, QueryTimeout queryTimeout) { + try { + Map<CacheKey, LeafReader> readerCache = new HashMap<>(); + List<LeafReaderContext> leaves = in.leaves(); + List<LeafReader> readers = new ArrayList<>(); + for (LeafReaderContext leafCtx : leaves) { + LeafReader reader = leafCtx.reader(); + readers.add(reader); + // we try to reuse the life docs instances here if the reader cache key didn't change + if (reader instanceof ExitableIndexReader.TimeoutLeafReader + && reader.getReaderCacheHelper() != null) { + readerCache.put((reader).getReaderCacheHelper().getKey(), reader); + } + } + ExitableSubReaderWrapper exitableSubReaderWrapper = + new ExitableSubReaderWrapper(readerCache, queryTimeout); + exitableSubReaderWrapper.wrap(readers); + } catch (TimeExceededException e) { + throw new TimeExceededException(e); + } + } + + private static class ExitableSubReaderWrapper extends FilterDirectoryReader.SubReaderWrapper { + private final Map<CacheKey, LeafReader> mapping; + + private final QueryTimeout queryTimeout; + + public ExitableSubReaderWrapper( + Map<CacheKey, LeafReader> oldReadersCache, QueryTimeout queryTimeout) { + assert oldReadersCache != null; + this.mapping = oldReadersCache; + this.queryTimeout = queryTimeout; + } + + @Override + protected LeafReader[] wrap(List<? extends LeafReader> readers) { + List<LeafReader> wrapped = new ArrayList<>(readers.size()); + for (LeafReader reader : readers) { + LeafReader wrap = wrap(reader); + assert wrap != null; + if (wrap.numDocs() != 0) { + wrapped.add(wrap); + } + } + return wrapped.toArray(new LeafReader[0]); + } + + @Override + public LeafReader wrap(LeafReader reader) { + CacheHelper readerCacheHelper = reader.getReaderCacheHelper(); + if (readerCacheHelper != null && mapping.containsKey(readerCacheHelper.getKey())) { + // if the reader cache helper didn't change and we have it in the cache don't bother + // creating a new one + return mapping.get(readerCacheHelper.getKey()); + } + return new TimeoutLeafReader(reader, queryTimeout); + } + } + + /** + * TimeoutLeafReader is wrapper class for FilterLeafReader which is imposing timeout on different + * operations of FilterLeafReader + */ + public static class TimeoutLeafReader extends FilterLeafReader { + /** To be wrapped {@link LeafReader} */ + protected final LeafReader in; + + /** QueryTimeout parameter */ + private final QueryTimeout queryTimeout; + + @Override + public CacheHelper getReaderCacheHelper() { + return null; + } + + @Override + public CacheHelper getCoreCacheHelper() { + return null; + } + + /** + * Create a TimeoutLeafReader wrapper over another {@link FilterLeafReader} with a specified + * timeout. + * + * @param in the wrapped {@link LeafReader} + * @param queryTimeout max time allowed for collecting hits after which {@link + * ExitableIndexReader.TimeExceededException} is thrown + */ + protected TimeoutLeafReader(LeafReader in, QueryTimeout queryTimeout) { + super(in); + if (in == null) { Review Comment: You could instead / more compactly do: ``` super(Objects.requireNonNull(in)); ``` I think? ########## lucene/core/src/test/org/apache/lucene/index/TestExitableIndexReader.java: ########## @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.index; + +import java.io.IOException; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.ExitableIndexReader.TimeExceededException; +import org.apache.lucene.search.FuzzyQuery; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.analysis.MockAnalyzer; +import org.apache.lucene.tests.util.LuceneTestCase; + +/** Tests the {@link TestExitableIndexReader}. */ +public class TestExitableIndexReader extends LuceneTestCase { + + public void testFuzzyQueryRewriteTimeout() throws IOException { + Directory directory = newDirectory(); + IndexWriter writer = + new IndexWriter(directory, newIndexWriterConfig(new MockAnalyzer(random()))); + int n = 10000; + for (int i = 0; i < n; i++) { + Document d = new Document(); + d.add(newTextField("abc", "ones ", Field.Store.YES)); + writer.addDocument(d); + } + writer.forceMerge(1); + writer.commit(); + writer.close(); + + FuzzyQuery query = new FuzzyQuery(new Term("field", "abc"), FuzzyQuery.defaultMaxEdits, 1); + + DirectoryReader directoryReader = DirectoryReader.open(directory); + IndexSearcher searcher = new IndexSearcher(directoryReader); + searcher.setTimeout(countingQueryTimeout(1)); + expectThrows(TimeExceededException.class, () -> searcher.search(query, n)); + + directoryReader.close(); + directory.close(); + } + + private static QueryTimeout countingQueryTimeout(int timeallowed) { + + return new QueryTimeout() { + static int counter = 0; Review Comment: You don't need `= 0` -- it's already Java's default. ########## lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java: ########## @@ -372,6 +373,9 @@ public static LeafSlice[] slices( /** Return the {@link IndexReader} this searches. */ public IndexReader getIndexReader() { + if (queryTimeout != null) { Review Comment: Hmm this seems dangerous -- getters shouldn't be doing magical / surprising things I think? Could we instead require/expect caller to creating the `IndexSearcher` with the `ExitableDirectoryReader`? Edit: or perhaps when `IndexSearcher` calls rewrite, specifically, if there is a `queryTimeout`, it could wrap the reader at that point, only? ########## lucene/core/src/test/org/apache/lucene/index/TestExitableIndexReader.java: ########## @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.index; + +import java.io.IOException; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.ExitableIndexReader.TimeExceededException; +import org.apache.lucene.search.FuzzyQuery; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.analysis.MockAnalyzer; +import org.apache.lucene.tests.util.LuceneTestCase; + +/** Tests the {@link TestExitableIndexReader}. */ +public class TestExitableIndexReader extends LuceneTestCase { + + public void testFuzzyQueryRewriteTimeout() throws IOException { + Directory directory = newDirectory(); + IndexWriter writer = + new IndexWriter(directory, newIndexWriterConfig(new MockAnalyzer(random()))); + int n = 10000; + for (int i = 0; i < n; i++) { + Document d = new Document(); + d.add(newTextField("abc", "ones ", Field.Store.YES)); + writer.addDocument(d); + } + writer.forceMerge(1); + writer.commit(); + writer.close(); + + FuzzyQuery query = new FuzzyQuery(new Term("field", "abc"), FuzzyQuery.defaultMaxEdits, 1); + + DirectoryReader directoryReader = DirectoryReader.open(directory); + IndexSearcher searcher = new IndexSearcher(directoryReader); + searcher.setTimeout(countingQueryTimeout(1)); + expectThrows(TimeExceededException.class, () -> searcher.search(query, n)); + + directoryReader.close(); + directory.close(); + } + + private static QueryTimeout countingQueryTimeout(int timeallowed) { Review Comment: Rename to `timeAllowed`? ########## lucene/core/src/java/org/apache/lucene/index/ExitableIndexReader.java: ########## @@ -0,0 +1,436 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.index; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import org.apache.lucene.util.Bits; + +/** + * The {@link ExitableIndexReader} is used to timeout I/O operation which is done during query + * rewrite. After this time is exceeded, the search thread is stopped by throwing a {@link + * ExitableIndexReader.TimeExceededException} + */ +public final class ExitableIndexReader extends IndexReader { + private final IndexReader indexReader; + private final QueryTimeout queryTimeout; + + /** + * Create a ExitableIndexReader wrapper over another {@link IndexReader} with a specified timeout. + * + * @param indexReader the wrapped {@link IndexReader} + * @param queryTimeout max time allowed for collecting hits after which {@link + * ExitableIndexReader.TimeExceededException} is thrown + */ + public ExitableIndexReader(IndexReader indexReader, QueryTimeout queryTimeout) { + this.indexReader = indexReader; + this.queryTimeout = queryTimeout; + doWrapIndexReader(indexReader, queryTimeout); + } + + /** Thrown when elapsed search time exceeds allowed search time. */ + @SuppressWarnings("serial") + static class TimeExceededException extends RuntimeException { + private TimeExceededException() { + super("TimeLimit Exceeded"); + } + + private TimeExceededException(Exception e) { + super(e); + } + } + + @Override + public TermVectors termVectors() throws IOException { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + return indexReader.termVectors(); + } + + @Override + public int numDocs() { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + return indexReader.numDocs(); + } + + @Override + public int maxDoc() { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + return indexReader.maxDoc(); + } + + @Override + public StoredFields storedFields() throws IOException { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + return indexReader.storedFields(); + } + + @Override + protected void doClose() throws IOException { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + indexReader.doClose(); + } + + @Override + public IndexReaderContext getContext() { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + return indexReader.getContext(); + } + + @Override + public CacheHelper getReaderCacheHelper() { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + return indexReader.getReaderCacheHelper(); + } + + @Override + public int docFreq(Term term) throws IOException { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + return indexReader.docFreq(term); + } + + @Override + public long totalTermFreq(Term term) throws IOException { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + return indexReader.totalTermFreq(term); + } + + @Override + public long getSumDocFreq(String field) throws IOException { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + return indexReader.getSumDocFreq(field); + } + + @Override + public int getDocCount(String field) throws IOException { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + return indexReader.getDocCount(field); + } + + @Override + public long getSumTotalTermFreq(String field) throws IOException { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + return indexReader.getSumTotalTermFreq(field); + } + + /** Method to wrap leaf readers of underlying index reader */ + protected static void doWrapIndexReader(IndexReader in, QueryTimeout queryTimeout) { + try { + Map<CacheKey, LeafReader> readerCache = new HashMap<>(); + List<LeafReaderContext> leaves = in.leaves(); + List<LeafReader> readers = new ArrayList<>(); + for (LeafReaderContext leafCtx : leaves) { + LeafReader reader = leafCtx.reader(); + readers.add(reader); + // we try to reuse the life docs instances here if the reader cache key didn't change + if (reader instanceof ExitableIndexReader.TimeoutLeafReader + && reader.getReaderCacheHelper() != null) { + readerCache.put((reader).getReaderCacheHelper().getKey(), reader); + } + } + ExitableSubReaderWrapper exitableSubReaderWrapper = + new ExitableSubReaderWrapper(readerCache, queryTimeout); + exitableSubReaderWrapper.wrap(readers); + } catch (TimeExceededException e) { + throw new TimeExceededException(e); + } + } + + private static class ExitableSubReaderWrapper extends FilterDirectoryReader.SubReaderWrapper { + private final Map<CacheKey, LeafReader> mapping; + + private final QueryTimeout queryTimeout; + + public ExitableSubReaderWrapper( + Map<CacheKey, LeafReader> oldReadersCache, QueryTimeout queryTimeout) { + assert oldReadersCache != null; + this.mapping = oldReadersCache; + this.queryTimeout = queryTimeout; + } + + @Override + protected LeafReader[] wrap(List<? extends LeafReader> readers) { + List<LeafReader> wrapped = new ArrayList<>(readers.size()); + for (LeafReader reader : readers) { + LeafReader wrap = wrap(reader); + assert wrap != null; + if (wrap.numDocs() != 0) { + wrapped.add(wrap); + } + } + return wrapped.toArray(new LeafReader[0]); + } + + @Override + public LeafReader wrap(LeafReader reader) { + CacheHelper readerCacheHelper = reader.getReaderCacheHelper(); + if (readerCacheHelper != null && mapping.containsKey(readerCacheHelper.getKey())) { + // if the reader cache helper didn't change and we have it in the cache don't bother + // creating a new one + return mapping.get(readerCacheHelper.getKey()); + } + return new TimeoutLeafReader(reader, queryTimeout); + } + } + + /** + * TimeoutLeafReader is wrapper class for FilterLeafReader which is imposing timeout on different + * operations of FilterLeafReader + */ + public static class TimeoutLeafReader extends FilterLeafReader { + /** To be wrapped {@link LeafReader} */ + protected final LeafReader in; + + /** QueryTimeout parameter */ + private final QueryTimeout queryTimeout; + + @Override + public CacheHelper getReaderCacheHelper() { + return null; + } + + @Override + public CacheHelper getCoreCacheHelper() { + return null; + } + + /** + * Create a TimeoutLeafReader wrapper over another {@link FilterLeafReader} with a specified + * timeout. + * + * @param in the wrapped {@link LeafReader} + * @param queryTimeout max time allowed for collecting hits after which {@link + * ExitableIndexReader.TimeExceededException} is thrown + */ + protected TimeoutLeafReader(LeafReader in, QueryTimeout queryTimeout) { + super(in); + if (in == null) { + throw new NullPointerException("incoming LeafReader must not be null"); + } + this.in = in; + this.queryTimeout = queryTimeout; + in.registerParentReader(this); + } + + @Override + public Bits getLiveDocs() { + ensureOpen(); + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + return in.getLiveDocs(); + } + + @Override + public FieldInfos getFieldInfos() { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + return in.getFieldInfos(); + } + + @Override + public PointValues getPointValues(String field) throws IOException { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + return in.getPointValues(field); + } + + @Override + public FloatVectorValues getFloatVectorValues(String field) throws IOException { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + return in.getFloatVectorValues(field); + } + + @Override + public ByteVectorValues getByteVectorValues(String field) throws IOException { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + return in.getByteVectorValues(field); + } + + @Override + public TermVectors termVectors() throws IOException { + ensureOpen(); + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + return in.termVectors(); + } + + @Override + public int numDocs() { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + // Don't call ensureOpen() here (it could affect performance) + return in.numDocs(); + } + + @Override + public int maxDoc() { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + // Don't call ensureOpen() here (it could affect performance) + return in.maxDoc(); + } + + @Override + public StoredFields storedFields() throws IOException { + ensureOpen(); + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + return in.storedFields(); + } + + @Override + protected void doClose() throws IOException { + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + in.close(); + } + + @Override + public Terms terms(String field) throws IOException { + ensureOpen(); + if (queryTimeout.shouldExit()) { + throw new ExitableIndexReader.TimeExceededException(); + } + return in.terms(field); Review Comment: Hmm I think you need to further wrap here? Likely the timeout will not have been hit when the `rewrite` first starts. That `rewrite` first calls `.terms()` for each segment, and then does the hard work of getting the `.iterator()` from it and stepping / seeking through the resulting `TermsEnum`. So I think you would need to return a `ExitableTerms`, which in turn returns an `ExitableTermsEnum`, etc.? And perhaps same thing for points, doc values, though we could defer those for a followon change? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org