gf2121 commented on issue #12659: URL: https://github.com/apache/lucene/issues/12659#issuecomment-1759141669
I write a JMH benchmark to compare reading `vint` vs `msbvint`. ``` Benchmark (size) (valueBit) Mode Cnt Score Error Units ReadVLongBenchmark.readVLong 128 7 thrpt 5 29.406 ± 0.148 ops/us ReadVLongBenchmark.readVLong 128 14 thrpt 5 3.644 ± 0.013 ops/us ReadVLongBenchmark.readVLong 128 21 thrpt 5 2.416 ± 0.004 ops/us ReadVLongBenchmark.readVLong 128 28 thrpt 5 2.074 ± 0.005 ops/us ReadVLongBenchmark.readVLong 128 35 thrpt 5 1.838 ± 0.001 ops/us ReadVLongBenchmark.readVLong 128 42 thrpt 5 1.958 ± 0.004 ops/us ReadVLongBenchmark.readVLong 128 49 thrpt 5 1.511 ± 0.012 ops/us ReadVLongBenchmark.readVLong 128 56 thrpt 5 1.463 ± 0.005 ops/us ReadVLongBenchmark.readVLong 128 63 thrpt 5 1.299 ± 0.092 ops/us ReadVLongBenchmark.writeVLongMSB 128 7 thrpt 5 28.622 ± 0.865 ops/us ReadVLongBenchmark.writeVLongMSB 128 14 thrpt 5 4.479 ± 0.013 ops/us ReadVLongBenchmark.writeVLongMSB 128 21 thrpt 5 3.647 ± 0.007 ops/us ReadVLongBenchmark.writeVLongMSB 128 28 thrpt 5 2.434 ± 0.372 ops/us ReadVLongBenchmark.writeVLongMSB 128 35 thrpt 5 1.885 ± 0.006 ops/us ReadVLongBenchmark.writeVLongMSB 128 42 thrpt 5 1.516 ± 0.005 ops/us ReadVLongBenchmark.writeVLongMSB 128 49 thrpt 5 1.271 ± 0.004 ops/us ReadVLongBenchmark.writeVLongMSB 128 56 thrpt 5 1.095 ± 0.003 ops/us ReadVLongBenchmark.writeVLongMSB 128 63 thrpt 5 0.961 ± 0.002 ops/us ``` <details> <summary>Code </summary> ``` package testing; import org.apache.lucene.store.ByteArrayDataOutput; import org.apache.lucene.store.DataOutput; import org.apache.lucene.util.packed.PackedInts; import org.openjdk.jmh.annotations.*; import java.io.IOException; import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.TimeUnit; @BenchmarkMode(Mode.Throughput) @OutputTimeUnit(TimeUnit.MICROSECONDS) @State(Scope.Benchmark) @Warmup(iterations = 3, time = 3) @Measurement(iterations = 5, time = 3) @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"}) public class ReadVLongBenchmark { private byte[] bytes; private byte[] msbBytes; private int written; @Param({"128"}) int size; @Param({"7", "14", "21", "28", "35", "42", "49", "56", "63"}) int valueBit; @Setup(Level.Trial) public void init() throws Exception { bytes = new byte[size * (valueBit / 7)]; msbBytes = new byte[size * (valueBit / 7)]; ByteArrayDataOutput out = new ByteArrayDataOutput(bytes); ByteArrayDataOutput msbOut = new ByteArrayDataOutput(msbBytes); written = 0; final long min = (1L << valueBit - 1); final long max = valueBit == 63 ? Long.MAX_VALUE : (1L << valueBit) - 1; while (out.getPosition() < bytes.length - 10) { written++; long l = ThreadLocalRandom.current().nextLong(min, max); if (PackedInts.bitsRequired(l) != valueBit) { throw new IllegalStateException(); } out.writeVLong(l); writeMSBVLong(l, msbOut); } if (readVLong() != writeVLongMSB()) { throw new IllegalStateException(); } } @Benchmark public long readVLong() { int pos = 0; long res = 0; for (int iter = 0; iter < written; iter++) { byte b = bytes[pos++]; long i = b & 0x7F; for (int shift = 7; (b & 0x80) != 0; shift += 7) { b = bytes[pos++]; i |= (b & 0x7FL) << shift; } res ^= i; } return res; } @Benchmark public long writeVLongMSB() { int pos = 0; long res = 0; for (int iter = 0; iter < written; iter++) { long i = 0L; while (true) { byte b = msbBytes[pos++]; i = (i << 7) | (b & 0x7FL); if ((b & 0x80) == 0) { break; } } res ^= i; } return res; } static void writeMSBVLong(long l, DataOutput scratchBytes) throws IOException { assert l >= 0; // Keep zero bits on most significant byte to have more chance to get prefix bytes shared. // e.g. we expect 0x7FFF stored as [0x81, 0xFF, 0x7F] but not [0xFF, 0xFF, 0x40] final int bytesNeeded = (Long.SIZE - Long.numberOfLeadingZeros(l) - 1) / 7 + 1; l <<= Long.SIZE - bytesNeeded * 7; for (int i = 1; i < bytesNeeded; i++) { scratchBytes.writeByte((byte) (((l >>> 57) & 0x7FL) | 0x80)); l = l << 7; } scratchBytes.writeByte((byte) (((l >>> 57) & 0x7FL))); } } ``` </details> -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org