gf2121 commented on issue #12659:
URL: https://github.com/apache/lucene/issues/12659#issuecomment-1759244758

   I tried another reading way like:
   ```
   byte b = msbBytes[pos++];
   long i = b & 0x7FL;
   while (b < 0) {
     b = msbBytes[pos++];
     i = (i << 7) | (b & 0x7FL);
   }
   ```
   
   Following is the benchmark result for this (readVLongMSBNew)
   
   * Comparing `readVLongMSB` and `readVLong`, some `valueBit`s are better but 
others worse. 
   * Comparing `readMSBVLongNew` and `readVLong`, `readVLongMSBNew` win for all 
`valueBit`.
   
   I'm considering try the new reading way and see what will happen for nightly 
benchmark.
   ```
   Benchmark                           (size)  (valueBit)   Mode  Cnt   Score   
Error   Units
   ReadVLongBenchmark.readVLong           128           7  thrpt    5  29.333 ± 
0.626  ops/us
   ReadVLongBenchmark.readVLong           128          14  thrpt    5   3.643 ± 
0.026  ops/us
   ReadVLongBenchmark.readVLong           128          21  thrpt    5   2.411 ± 
0.047  ops/us
   ReadVLongBenchmark.readVLong           128          28  thrpt    5   2.075 ± 
0.004  ops/us
   ReadVLongBenchmark.readVLong           128          35  thrpt    5   1.836 ± 
0.002  ops/us
   ReadVLongBenchmark.readVLong           128          42  thrpt    5   1.943 ± 
0.006  ops/us
   ReadVLongBenchmark.readVLong           128          49  thrpt    5   1.514 ± 
0.006  ops/us
   ReadVLongBenchmark.readVLong           128          56  thrpt    5   1.463 ± 
0.002  ops/us
   ReadVLongBenchmark.readVLong           128          63  thrpt    5   1.338 ± 
0.002  ops/us
   ReadVLongBenchmark.readVLongMSB        128           7  thrpt    5  29.350 ± 
0.140  ops/us
   ReadVLongBenchmark.readVLongMSB        128          14  thrpt    5   4.474 ± 
0.025  ops/us
   ReadVLongBenchmark.readVLongMSB        128          21  thrpt    5   3.640 ± 
0.033  ops/us
   ReadVLongBenchmark.readVLongMSB        128          28  thrpt    5   2.439 ± 
0.324  ops/us
   ReadVLongBenchmark.readVLongMSB        128          35  thrpt    5   1.879 ± 
0.009  ops/us
   ReadVLongBenchmark.readVLongMSB        128          42  thrpt    5   1.508 ± 
0.037  ops/us
   ReadVLongBenchmark.readVLongMSB        128          49  thrpt    5   1.268 ± 
0.012  ops/us
   ReadVLongBenchmark.readVLongMSB        128          56  thrpt    5   1.091 ± 
0.024  ops/us
   ReadVLongBenchmark.readVLongMSB        128          63  thrpt    5   0.960 ± 
0.005  ops/us
   ReadVLongBenchmark.readVLongMSBNew     128           7  thrpt    5  31.110 ± 
0.254  ops/us
   ReadVLongBenchmark.readVLongMSBNew     128          14  thrpt    5   4.075 ± 
0.010  ops/us
   ReadVLongBenchmark.readVLongMSBNew     128          21  thrpt    5   2.600 ± 
0.008  ops/us
   ReadVLongBenchmark.readVLongMSBNew     128          28  thrpt    5   2.244 ± 
0.006  ops/us
   ReadVLongBenchmark.readVLongMSBNew     128          35  thrpt    5   1.946 ± 
0.005  ops/us
   ReadVLongBenchmark.readVLongMSBNew     128          42  thrpt    5   2.102 ± 
0.016  ops/us
   ReadVLongBenchmark.readVLongMSBNew     128          49  thrpt    5   1.615 ± 
0.209  ops/us
   ReadVLongBenchmark.readVLongMSBNew     128          56  thrpt    5   1.614 ± 
0.032  ops/us
   ReadVLongBenchmark.readVLongMSBNew     128          63  thrpt    5   1.465 ± 
0.008  ops/us
   ```
   
   &nbsp;
   <details>
   <summary>CODE</summary>
   
   ```
   package testing;
   
   import org.apache.lucene.store.ByteArrayDataOutput;
   import org.apache.lucene.store.DataOutput;
   import org.apache.lucene.util.packed.PackedInts;
   import org.openjdk.jmh.annotations.*;
   
   import java.io.IOException;
   import java.util.concurrent.ThreadLocalRandom;
   import java.util.concurrent.TimeUnit;
   
   @BenchmarkMode(Mode.Throughput)
   @OutputTimeUnit(TimeUnit.MICROSECONDS)
   @State(Scope.Benchmark)
   @Warmup(iterations = 3, time = 3)
   @Measurement(iterations = 5, time = 3)
   @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
   public class ReadVLongBenchmark {
   
     private byte[] bytes;
     private byte[] msbBytes;
   
     @Param({"128"})
     int size;
   
     @Param({"7", "14", "21", "28", "35", "42", "49", "56", "63"})
     int valueBit;
   
     @Setup(Level.Trial)
     public void init() throws Exception {
       bytes = new byte[size * (valueBit / 7)];
       msbBytes = new byte[size * (valueBit / 7)];
       ByteArrayDataOutput out = new ByteArrayDataOutput(bytes);
       ByteArrayDataOutput msbOut = new ByteArrayDataOutput(msbBytes);
       final long min = 1L << (valueBit - 1);
       final long max = valueBit == 63 ? Long.MAX_VALUE : (1L << valueBit) - 1;
       for (int i = 0; i < size; i++) {
         long l = ThreadLocalRandom.current().nextLong(min, max);
         if (PackedInts.bitsRequired(l) != valueBit) {
           throw new IllegalStateException();
         }
         out.writeVLong(l);
         writeMSBVLong(l, msbOut);
       }
       if (readVLong() != readVLongMSB()) {
         System.out.println("vlong msb wrong");
         throw new IllegalStateException();
       }
       if (readVLong() != readVLongMSBNew()) {
         System.out.println("vlong new msb wrong");
         throw new IllegalStateException();
       }
     }
   
     @Benchmark
     public long readVLong() {
       int pos = 0;
       long res = 0;
       for (int iter = 0; iter < size; iter++) {
         byte b = bytes[pos++];
         long i = b & 0x7F;
         for (int shift = 7; (b & 0x80) != 0; shift += 7) {
           b = bytes[pos++];
           i |= (b & 0x7FL) << shift;
         }
         res ^= i;
       }
       return res;
     }
   
     @Benchmark
     public long readVLongMSB() {
       int pos = 0;
       long res = 0;
       for (int iter = 0; iter < size; iter++) {
         long i = 0L;
         while (true) {
           byte b = msbBytes[pos++];
           i = (i << 7) | (b & 0x7FL);
           if ((b & 0x80) == 0) {
             break;
           }
         }
         res ^= i;
       }
       return res;
     }
   
     @Benchmark
     public long readVLongMSBNew() {
       int pos = 0;
       long res = 0;
       for (int iter = 0; iter < size; iter++) {
         byte b = msbBytes[pos++];
         long i = b & 0x7FL;
         while (b < 0) {
           b = msbBytes[pos++];
           i = (i << 7) | (b & 0x7FL);
         }
         res ^= i;
       }
       return res;
     }
   
     static void writeMSBVLong(long l, DataOutput scratchBytes) throws 
IOException {
       assert l >= 0;
       // Keep zero bits on most significant byte to have more chance to get 
prefix bytes shared.
       // e.g. we expect 0x7FFF stored as [0x81, 0xFF, 0x7F] but not [0xFF, 
0xFF, 0x40]
       final int bytesNeeded = (Long.SIZE - Long.numberOfLeadingZeros(l) - 1) / 
7 + 1;
       l <<= Long.SIZE - bytesNeeded * 7;
       for (int i = 1; i < bytesNeeded; i++) {
         scratchBytes.writeByte((byte) (((l >>> 57) & 0x7FL) | 0x80));
         l = l << 7;
       }
       scratchBytes.writeByte((byte) (((l >>> 57) & 0x7FL)));
     }
   }
   
   ```
   </details>


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org
For additional commands, e-mail: issues-h...@lucene.apache.org

Reply via email to