jpountz commented on PR #13359: URL: https://github.com/apache/lucene/pull/13359#issuecomment-2132902792
<details> <summary> Now that #13408 has been merged, I could update the benchmark to simply call IndexSearcher#search. </summary> ```java import java.io.IOException; import java.io.UncheckedIOException; import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; import java.util.List; import java.util.Random; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.StringField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Term; import org.apache.lucene.index.TieredMergePolicy; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Sort; import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; public class TermsEnumPrefetchBench { private static final int NUM_TERMS = 3; public static int DUMMY; public static void main(String[] args) throws Exception { Path dirPath = Paths.get(args[0]); Directory dir = FSDirectory.open(dirPath); if (DirectoryReader.indexExists(dir) == false) { TieredMergePolicy mp = new TieredMergePolicy(); mp.setSegmentsPerTier(100); mp.setMaxMergeAtOnce(100); mp.setMaxMergedSegmentMB(1024); try (IndexWriter w = new IndexWriter(dir, new IndexWriterConfig() .setMergePolicy(mp) .setRAMBufferSizeMB(1024))) { ExecutorService executor = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()); AtomicLong indexed = new AtomicLong(0); for (int task = 0; task < 1000; ++task) { executor.execute(() -> { Random r = ThreadLocalRandom.current(); for (int i = 0; i < 1_000; ++i) { Document doc = new Document(); for (int j = 0; j < 10_000; ++j) { doc.add(new StringField("f", Long.toString(r.nextLong(20_000_000_000L)), Store.NO)); } try { w.addDocument(doc); } catch (IOException e) { throw new UncheckedIOException(e); } final long actualIndexed = indexed.incrementAndGet(); if (actualIndexed % 10_000 == 0) { System.out.println("Indexed: " + actualIndexed); } } }); } executor.shutdown(); executor.awaitTermination(1, TimeUnit.DAYS); w.commit(); System.out.println("Start force merging"); w.forceMerge(1); System.out.println("Done force merging"); w.commit(); } } List<Long> latencies = new ArrayList<>(); try (IndexReader reader = DirectoryReader.open(dir)) { IndexSearcher searcher = new IndexSearcher(reader); Random r = ThreadLocalRandom.current(); for (int i = 0; i < 10_000; ++i) { long start = System.nanoTime(); BooleanQuery.Builder query = new BooleanQuery.Builder(); for (int t = 0; t < NUM_TERMS; ++t) { query.add(new TermQuery(new Term("f", Long.toString(r.nextLong(20_000_000_000L)))), Occur.SHOULD); } DUMMY += searcher.search(query.build(), 1, Sort.INDEXORDER).totalHits.value; long end = System.nanoTime(); latencies.add((end - start) / 1000); } } latencies.sort(null); System.out.println("P50: " + latencies.get(latencies.size() / 2)); System.out.println("P90: " + latencies.get(latencies.size() * 9 / 10)); System.out.println("P99: " + latencies.get(latencies.size() * 99 / 100)); } } ``` </details> Results still look good. Before the change: P50: 282 P90: 387 P99: 537 After the change: P50: 161 P90: 253 P99: 379 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org