Re: [PR] Reduce the overhead of `IndexInput#prefetch` when data is cached in RAM. [lucene]

via GitHub Sun, 19 May 2024 14:53:34 -0700


jpountz commented on PR #13381:
URL: https://github.com/apache/lucene/pull/13381#issuecomment-2119370881


   <details>
   <summary>I added "search" concurrency to the benchmark to make it a bit more 
realistic</summary>
   
   ```java
   import java.io.IOException;
   import java.io.UncheckedIOException;
   import java.nio.file.Path;
   import java.nio.file.Paths;
   import java.util.ArrayList;
   import java.util.Arrays;
   import java.util.List;
   import java.util.Random;
   import java.util.concurrent.CountDownLatch;
   import java.util.concurrent.ThreadLocalRandom;
   
   import org.apache.lucene.store.Directory;
   import org.apache.lucene.store.IOContext;
   import org.apache.lucene.store.IndexInput;
   import org.apache.lucene.store.IndexOutput;
   import org.apache.lucene.store.MMapDirectory;
   import org.apache.lucene.util.ThreadInterruptedException;
   
   public class PrefetchBench {
   
     private static final int CONCURRENCY = 10;
     private static final int NUM_TERMS = 3;
     private static final long FILE_SIZE = 100L * 1024 * 1024 * 1024; // 100GB
     private static final int NUM_BYTES = 16;
     public static int DUMMY;
   
     public static void main(String[] args) throws Exception {
       Path filePath = Paths.get(args[0]);
       Path dirPath = filePath.getParent();
       String fileName = filePath.getFileName().toString();
       Random r = ThreadLocalRandom.current();
   
       try (Directory dir = new MMapDirectory(dirPath)) {
         if (Arrays.asList(dir.listAll()).contains(fileName) == false) {
           try (IndexOutput out = dir.createOutput(fileName, 
IOContext.DEFAULT)) {
             byte[] buf = new byte[8196];
             for (long i = 0; i < FILE_SIZE; i += buf.length) {
               r.nextBytes(buf);
               out.writeBytes(buf, buf.length);
             }
           }
         }
   
         for (boolean dataFitsInCache : new boolean[] { false, true}) {
           try (IndexInput i0 = dir.openInput("file", IOContext.DEFAULT)) {
             final IndexInput input;
             if (dataFitsInCache) {
               // 16MB slice that should easily fit in the page cache
               input = i0.slice("slice", 0, 16 * 1024 * 1024);
             } else {
               input = i0;
             }
   
             final CountDownLatch latch = new CountDownLatch(1);
             RandomReader[] readers = new RandomReader[CONCURRENCY];
             for (int i = 0; i < readers.length; ++i) {
               IndexInput[] inputs = new IndexInput[NUM_TERMS];
               for (int j = 0; j < inputs.length; ++j) {
                 inputs[j] = input.clone();
               }
               readers[i] = new RandomReader(inputs, latch);
               readers[i].start();
             }
             
             latch.countDown();
             List<Long> prefetchLatencies = new ArrayList<>();
             List<Long> noPrefetchLatencies = new ArrayList<>();
             for (RandomReader reader : readers) {
               reader.join();
               prefetchLatencies.addAll(reader.latencies[0]);
               noPrefetchLatencies.addAll(reader.latencies[1]);
             }
             prefetchLatencies.sort(null);
             noPrefetchLatencies.sort(null);
   
             System.out.println("Data " + (dataFitsInCache ? "fits" : "does not 
fit") + " in the page cache");
             long prefetchP50 = prefetchLatencies.get(prefetchLatencies.size() 
/ 2);
             long prefetchP90 = prefetchLatencies.get(prefetchLatencies.size() 
* 9 / 10);
             long prefetchP99 = prefetchLatencies.get(prefetchLatencies.size() 
* 99 / 100);
             long noPrefetchP50 = 
noPrefetchLatencies.get(noPrefetchLatencies.size() / 2);
             long noPrefetchP90 = 
noPrefetchLatencies.get(noPrefetchLatencies.size() * 9 / 10);
             long noPrefetchP99 = 
noPrefetchLatencies.get(noPrefetchLatencies.size() * 99 / 100);
   
             System.out.println("  With prefetching:    P50=" + prefetchP50 + 
"ns P90=" + prefetchP90 + "ns P99=" + prefetchP99 + "ns");
             System.out.println("  Without prefetching: P50=" + noPrefetchP50 + 
"ns P90=" + noPrefetchP90 + "ns P99=" + noPrefetchP99 + "ns");
           }
         }
       }
     }
   
     private static class RandomReader extends Thread {
   
       private final IndexInput[] inputs;
       private final CountDownLatch latch;
       private final byte[][] b = new byte[NUM_TERMS][];
       final List<Long>[] latencies = new List[2];
   
       RandomReader(IndexInput[] inputs, CountDownLatch latch) {
         this.inputs = inputs;
         this.latch = latch;
         latencies[0] = new ArrayList<>();
         latencies[1] = new ArrayList<>();
         for (int i = 0; i < NUM_TERMS; ++i) {
           b[i] = new byte[NUM_BYTES];
         }
       }
   
       @Override
       public void run() {
         try {
           latch.await();
   
           final ThreadLocalRandom r = ThreadLocalRandom.current();
           final long length = inputs[0].length();
           for (int iter = 0; iter < 100_000; ++iter) {
             final boolean prefetch = (iter & 1) == 0;
   
             final long start = System.nanoTime();
   
             for (IndexInput ii : inputs) {
               final long offset = r.nextLong(length - NUM_BYTES);
               ii.seek(offset);
               if (prefetch) {
                 ii.prefetch(offset, 1);
               }
             }
   
             for (int i = 0; i < NUM_TERMS; ++i) {
               inputs[i].readBytes(b[i], 0, b[i].length);
             }
   
             final long end = System.nanoTime();
   
             // Prevent the JVM from optimizing away the reads
             DUMMY = Arrays.stream(b).mapToInt(Arrays::hashCode).sum();
   
             latencies[iter & 1].add(end - start);
           }
         } catch (IOException e) {
           throw new UncheckedIOException(e);
         } catch (InterruptedException e) {
           throw new ThreadInterruptedException(e);
         }
       }
   
     }
   
   }
   
   ```
   </details>
   
   On the latest version of this PR, it reports:
   
   ```
   Data does not fit in the page cache
     With prefetching:    P50=104260ns P90=159710ns P99=228880ns
     Without prefetching: P50=242580ns P90=315821ns P99=405901ns
   Data fits in the page cache
     With prefetching:    P50=310ns P90=6700ns P99=12320ns
     Without prefetching: P50=290ns P90=6770ns P99=11610ns
   ```
   
   vs. the following on `main`:
   
   ```
   Data does not fit in the page cache
     With prefetching:    P50=97620ns P90=153050ns P99=220510ns
     Without prefetching: P50=226690ns P90=302530ns P99=392770ns
   Data fits in the page cache
     With prefetching:    P50=6970ns P90=9380ns P99=12300ns
     Without prefetching: P50=290ns P90=5890ns P99=8560ns
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org
For additional commands, e-mail: issues-h...@lucene.apache.org

Re: [PR] Reduce the overhead of `IndexInput#prefetch` when data is cached in RAM. [lucene]

Reply via email to