This is an automated email from the ASF dual-hosted git repository.

dmollitor pushed a commit to branch branch-1.12
in repository https://gitbox.apache.org/repos/asf/avro.git


The following commit(s) were added to refs/heads/branch-1.12 by this push:
     new aa424484c AVRO-4061: Use Default Value of 1 For UTF8 Hash (#3176)
aa424484c is described below

commit aa424484cc15f00a1f20fe666bd76afd995fa82f
Author: belugabehr <[email protected]>
AuthorDate: Fri Sep 27 10:30:39 2024 -0400

    AVRO-4061: Use Default Value of 1 For UTF8 Hash (#3176)
---
 .../main/java/org/apache/avro/io/BinaryData.java   | 12 +++----
 .../src/main/java/org/apache/avro/util/Utf8.java   |  4 ++-
 .../test/java/org/apache/avro/util/TestUtf8.java   | 37 +++++++++++-----------
 3 files changed, 28 insertions(+), 25 deletions(-)

diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/BinaryData.java 
b/lang/java/avro/src/main/java/org/apache/avro/io/BinaryData.java
index f925bcd96..99bc0b2ac 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/io/BinaryData.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/io/BinaryData.java
@@ -263,7 +263,7 @@ public class BinaryData {
     case FIXED:
       return hashBytes(1, data, schema.getFixedSize(), false);
     case STRING:
-      return hashBytes(0, data, decoder.readInt(), false);
+      return hashBytes(1, data, decoder.readInt(), false);
     case BYTES:
       return hashBytes(1, data, decoder.readInt(), true);
     case NULL:
@@ -298,7 +298,7 @@ public class BinaryData {
   /**
    * Encode a boolean to the byte array at the given position. Will throw
    * IndexOutOfBounds if the position is not valid.
-   * 
+   *
    * @return The number of bytes written to the buffer, 1.
    */
   public static int encodeBoolean(boolean b, byte[] buf, int pos) {
@@ -310,7 +310,7 @@ public class BinaryData {
    * Encode an integer to the byte array at the given position. Will throw
    * IndexOutOfBounds if it overflows. Users should ensure that there are at 
least
    * 5 bytes left in the buffer before calling this method.
-   * 
+   *
    * @return The number of bytes written to the buffer, between 1 and 5.
    */
   public static int encodeInt(int n, byte[] buf, int pos) {
@@ -341,7 +341,7 @@ public class BinaryData {
    * Encode a long to the byte array at the given position. Will throw
    * IndexOutOfBounds if it overflows. Users should ensure that there are at 
least
    * 10 bytes left in the buffer before calling this method.
-   * 
+   *
    * @return The number of bytes written to the buffer, between 1 and 10.
    */
   public static int encodeLong(long n, byte[] buf, int pos) {
@@ -392,7 +392,7 @@ public class BinaryData {
    * Encode a float to the byte array at the given position. Will throw
    * IndexOutOfBounds if it overflows. Users should ensure that there are at 
least
    * 4 bytes left in the buffer before calling this method.
-   * 
+   *
    * @return Returns the number of bytes written to the buffer, 4.
    */
   public static int encodeFloat(float f, byte[] buf, int pos) {
@@ -408,7 +408,7 @@ public class BinaryData {
    * Encode a double to the byte array at the given position. Will throw
    * IndexOutOfBounds if it overflows. Users should ensure that there are at 
least
    * 8 bytes left in the buffer before calling this method.
-   * 
+   *
    * @return Returns the number of bytes written to the buffer, 8.
    */
   public static int encodeDouble(double d, byte[] buf, int pos) {
diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java 
b/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java
index 9238fd78c..c83770876 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java
@@ -42,7 +42,8 @@ public class Utf8 implements Comparable<Utf8>, CharSequence, 
Externalizable {
   private String string;
 
   public Utf8() {
-    bytes = EMPTY;
+    this.bytes = EMPTY;
+    this.hash = 1;
   }
 
   public Utf8(String string) {
@@ -171,6 +172,7 @@ public class Utf8 implements Comparable<Utf8>, 
CharSequence, Externalizable {
     if (h == 0) {
       byte[] bytes = this.bytes;
       int length = this.length;
+      h = 1;
       for (int i = 0; i < length; i++) {
         h = h * 31 + bytes[i];
       }
diff --git a/lang/java/avro/src/test/java/org/apache/avro/util/TestUtf8.java 
b/lang/java/avro/src/test/java/org/apache/avro/util/TestUtf8.java
index e0977ff9f..91618ca5e 100644
--- a/lang/java/avro/src/test/java/org/apache/avro/util/TestUtf8.java
+++ b/lang/java/avro/src/test/java/org/apache/avro/util/TestUtf8.java
@@ -59,43 +59,44 @@ public class TestUtf8 {
 
   @Test
   void hashCodeReused() {
-    assertEquals(97, new Utf8("a").hashCode());
-    assertEquals(3904, new Utf8("zz").hashCode());
-    assertEquals(122, new Utf8("z").hashCode());
-    assertEquals(99162322, new Utf8("hello").hashCode());
-    assertEquals(3198781, new Utf8("hell").hashCode());
+    assertEquals(1, new Utf8().hashCode());
+    assertEquals(128, new Utf8("a").hashCode());
+    assertEquals(4865, new Utf8("zz").hashCode());
+    assertEquals(153, new Utf8("z").hashCode());
+    assertEquals(127791473, new Utf8("hello").hashCode());
+    assertEquals(4122302, new Utf8("hell").hashCode());
 
     Utf8 u = new Utf8("a");
-    assertEquals(97, u.hashCode());
-    assertEquals(97, u.hashCode());
+    assertEquals(128, u.hashCode());
+    assertEquals(128, u.hashCode());
 
     u.set("a");
-    assertEquals(97, u.hashCode());
+    assertEquals(128, u.hashCode());
 
     u.setByteLength(1);
-    assertEquals(97, u.hashCode());
+    assertEquals(128, u.hashCode());
     u.setByteLength(2);
-    assertNotEquals(97, u.hashCode());
+    assertNotEquals(128, u.hashCode());
 
     u.set("zz");
-    assertEquals(3904, u.hashCode());
+    assertEquals(4865, u.hashCode());
     u.setByteLength(1);
-    assertEquals(122, u.hashCode());
+    assertEquals(153, u.hashCode());
 
     u.set("hello");
-    assertEquals(99162322, u.hashCode());
+    assertEquals(127791473, u.hashCode());
     u.setByteLength(4);
-    assertEquals(3198781, u.hashCode());
+    assertEquals(4122302, u.hashCode());
 
     u.set(new Utf8("zz"));
-    assertEquals(3904, u.hashCode());
+    assertEquals(4865, u.hashCode());
     u.setByteLength(1);
-    assertEquals(122, u.hashCode());
+    assertEquals(153, u.hashCode());
 
     u.set(new Utf8("hello"));
-    assertEquals(99162322, u.hashCode());
+    assertEquals(127791473, u.hashCode());
     u.setByteLength(4);
-    assertEquals(3198781, u.hashCode());
+    assertEquals(4122302, u.hashCode());
   }
 
   @Test

Reply via email to