jpountz commented on PR #13381: URL: https://github.com/apache/lucene/pull/13381#issuecomment-2119370881
<details> <summary>I added "search" concurrency to the benchmark to make it a bit more realistic</summary> ```java import java.io.IOException; import java.io.UncheckedIOException; import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Random; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ThreadLocalRandom; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.MMapDirectory; import org.apache.lucene.util.ThreadInterruptedException; public class PrefetchBench { private static final int CONCURRENCY = 10; private static final int NUM_TERMS = 3; private static final long FILE_SIZE = 100L * 1024 * 1024 * 1024; // 100GB private static final int NUM_BYTES = 16; public static int DUMMY; public static void main(String[] args) throws Exception { Path filePath = Paths.get(args[0]); Path dirPath = filePath.getParent(); String fileName = filePath.getFileName().toString(); Random r = ThreadLocalRandom.current(); try (Directory dir = new MMapDirectory(dirPath)) { if (Arrays.asList(dir.listAll()).contains(fileName) == false) { try (IndexOutput out = dir.createOutput(fileName, IOContext.DEFAULT)) { byte[] buf = new byte[8196]; for (long i = 0; i < FILE_SIZE; i += buf.length) { r.nextBytes(buf); out.writeBytes(buf, buf.length); } } } for (boolean dataFitsInCache : new boolean[] { false, true}) { try (IndexInput i0 = dir.openInput("file", IOContext.DEFAULT)) { final IndexInput input; if (dataFitsInCache) { // 16MB slice that should easily fit in the page cache input = i0.slice("slice", 0, 16 * 1024 * 1024); } else { input = i0; } final CountDownLatch latch = new CountDownLatch(1); RandomReader[] readers = new RandomReader[CONCURRENCY]; for (int i = 0; i < readers.length; ++i) { IndexInput[] inputs = new IndexInput[NUM_TERMS]; for (int j = 0; j < inputs.length; ++j) { inputs[j] = input.clone(); } readers[i] = new RandomReader(inputs, latch); readers[i].start(); } latch.countDown(); List<Long> prefetchLatencies = new ArrayList<>(); List<Long> noPrefetchLatencies = new ArrayList<>(); for (RandomReader reader : readers) { reader.join(); prefetchLatencies.addAll(reader.latencies[0]); noPrefetchLatencies.addAll(reader.latencies[1]); } prefetchLatencies.sort(null); noPrefetchLatencies.sort(null); System.out.println("Data " + (dataFitsInCache ? "fits" : "does not fit") + " in the page cache"); long prefetchP50 = prefetchLatencies.get(prefetchLatencies.size() / 2); long prefetchP90 = prefetchLatencies.get(prefetchLatencies.size() * 9 / 10); long prefetchP99 = prefetchLatencies.get(prefetchLatencies.size() * 99 / 100); long noPrefetchP50 = noPrefetchLatencies.get(noPrefetchLatencies.size() / 2); long noPrefetchP90 = noPrefetchLatencies.get(noPrefetchLatencies.size() * 9 / 10); long noPrefetchP99 = noPrefetchLatencies.get(noPrefetchLatencies.size() * 99 / 100); System.out.println(" With prefetching: P50=" + prefetchP50 + "ns P90=" + prefetchP90 + "ns P99=" + prefetchP99 + "ns"); System.out.println(" Without prefetching: P50=" + noPrefetchP50 + "ns P90=" + noPrefetchP90 + "ns P99=" + noPrefetchP99 + "ns"); } } } } private static class RandomReader extends Thread { private final IndexInput[] inputs; private final CountDownLatch latch; private final byte[][] b = new byte[NUM_TERMS][]; final List<Long>[] latencies = new List[2]; RandomReader(IndexInput[] inputs, CountDownLatch latch) { this.inputs = inputs; this.latch = latch; latencies[0] = new ArrayList<>(); latencies[1] = new ArrayList<>(); for (int i = 0; i < NUM_TERMS; ++i) { b[i] = new byte[NUM_BYTES]; } } @Override public void run() { try { latch.await(); final ThreadLocalRandom r = ThreadLocalRandom.current(); final long length = inputs[0].length(); for (int iter = 0; iter < 100_000; ++iter) { final boolean prefetch = (iter & 1) == 0; final long start = System.nanoTime(); for (IndexInput ii : inputs) { final long offset = r.nextLong(length - NUM_BYTES); ii.seek(offset); if (prefetch) { ii.prefetch(offset, 1); } } for (int i = 0; i < NUM_TERMS; ++i) { inputs[i].readBytes(b[i], 0, b[i].length); } final long end = System.nanoTime(); // Prevent the JVM from optimizing away the reads DUMMY = Arrays.stream(b).mapToInt(Arrays::hashCode).sum(); latencies[iter & 1].add(end - start); } } catch (IOException e) { throw new UncheckedIOException(e); } catch (InterruptedException e) { throw new ThreadInterruptedException(e); } } } } ``` </details> On the latest version of this PR, it reports: ``` Data does not fit in the page cache With prefetching: P50=104260ns P90=159710ns P99=228880ns Without prefetching: P50=242580ns P90=315821ns P99=405901ns Data fits in the page cache With prefetching: P50=310ns P90=6700ns P99=12320ns Without prefetching: P50=290ns P90=6770ns P99=11610ns ``` vs. the following on `main`: ``` Data does not fit in the page cache With prefetching: P50=97620ns P90=153050ns P99=220510ns Without prefetching: P50=226690ns P90=302530ns P99=392770ns Data fits in the page cache With prefetching: P50=6970ns P90=9380ns P99=12300ns Without prefetching: P50=290ns P90=5890ns P99=8560ns ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org