This is an automated email from the ASF dual-hosted git repository.

dmollitor pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/avro.git


The following commit(s) were added to refs/heads/main by this push:
     new 11ca5da73 AVRO-4067: Optimize First Byte of Long Decode (#3183)
11ca5da73 is described below

commit 11ca5da73cd16aef52b55f0dce814420a7403caa
Author: belugabehr <[email protected]>
AuthorDate: Sat Mar 8 13:27:17 2025 -0500

    AVRO-4067: Optimize First Byte of Long Decode (#3183)
---
 .../java/org/apache/avro/io/BinaryDecoder.java     | 23 +++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/BinaryDecoder.java 
b/lang/java/avro/src/main/java/org/apache/avro/io/BinaryDecoder.java
index 865bc9c06..2f9eccd00 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/io/BinaryDecoder.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/io/BinaryDecoder.java
@@ -184,10 +184,25 @@ public class BinaryDecoder extends Decoder {
   @Override
   public long readLong() throws IOException {
     ensureBounds(10);
-    int b = buf[pos++] & 0xff;
-    int n = b & 0x7f;
+
+    /*
+     * Long values are used for many different areas of the spec, for example: 
a
+     * string is encoded as a long followed by that many bytes of UTF-8 encoded
+     * character data. Because of this, long values actually tend to be pretty 
small
+     * on average, and so can often fit within the first byte of the 
variable-length
+     * array. Therefore, the first byte is prioritized. For the first byte, if 
the
+     * high-order bit is set, this indicates there are more bytes to read, but 
also
+     * this means a signed value >= 0 does not have any following bytes.
+     */
     long l;
-    if (b > 0x7f) {
+    int b, n;
+    if ((b = buf[pos++]) == 0) {
+      return 0;
+    } else if (b > 0) {
+      // back to two's-complement (zig-zag)
+      return (b >>> 1) ^ -(b & 1);
+    } else {
+      n = b & 0x7f;
       b = buf[pos++] & 0xff;
       n ^= (b & 0x7f) << 7;
       if (b > 0x7f) {
@@ -209,8 +224,6 @@ public class BinaryDecoder extends Decoder {
       } else {
         l = n;
       }
-    } else {
-      l = n;
     }
     if (pos > limit) {
       throw new EOFException();

Reply via email to