This is an automated email from the ASF dual-hosted git repository.
dmollitor pushed a commit to branch branch-1.12
in repository https://gitbox.apache.org/repos/asf/avro.git
The following commit(s) were added to refs/heads/branch-1.12 by this push:
new aa424484c AVRO-4061: Use Default Value of 1 For UTF8 Hash (#3176)
aa424484c is described below
commit aa424484cc15f00a1f20fe666bd76afd995fa82f
Author: belugabehr <[email protected]>
AuthorDate: Fri Sep 27 10:30:39 2024 -0400
AVRO-4061: Use Default Value of 1 For UTF8 Hash (#3176)
---
.../main/java/org/apache/avro/io/BinaryData.java | 12 +++----
.../src/main/java/org/apache/avro/util/Utf8.java | 4 ++-
.../test/java/org/apache/avro/util/TestUtf8.java | 37 +++++++++++-----------
3 files changed, 28 insertions(+), 25 deletions(-)
diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/BinaryData.java
b/lang/java/avro/src/main/java/org/apache/avro/io/BinaryData.java
index f925bcd96..99bc0b2ac 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/io/BinaryData.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/io/BinaryData.java
@@ -263,7 +263,7 @@ public class BinaryData {
case FIXED:
return hashBytes(1, data, schema.getFixedSize(), false);
case STRING:
- return hashBytes(0, data, decoder.readInt(), false);
+ return hashBytes(1, data, decoder.readInt(), false);
case BYTES:
return hashBytes(1, data, decoder.readInt(), true);
case NULL:
@@ -298,7 +298,7 @@ public class BinaryData {
/**
* Encode a boolean to the byte array at the given position. Will throw
* IndexOutOfBounds if the position is not valid.
- *
+ *
* @return The number of bytes written to the buffer, 1.
*/
public static int encodeBoolean(boolean b, byte[] buf, int pos) {
@@ -310,7 +310,7 @@ public class BinaryData {
* Encode an integer to the byte array at the given position. Will throw
* IndexOutOfBounds if it overflows. Users should ensure that there are at
least
* 5 bytes left in the buffer before calling this method.
- *
+ *
* @return The number of bytes written to the buffer, between 1 and 5.
*/
public static int encodeInt(int n, byte[] buf, int pos) {
@@ -341,7 +341,7 @@ public class BinaryData {
* Encode a long to the byte array at the given position. Will throw
* IndexOutOfBounds if it overflows. Users should ensure that there are at
least
* 10 bytes left in the buffer before calling this method.
- *
+ *
* @return The number of bytes written to the buffer, between 1 and 10.
*/
public static int encodeLong(long n, byte[] buf, int pos) {
@@ -392,7 +392,7 @@ public class BinaryData {
* Encode a float to the byte array at the given position. Will throw
* IndexOutOfBounds if it overflows. Users should ensure that there are at
least
* 4 bytes left in the buffer before calling this method.
- *
+ *
* @return Returns the number of bytes written to the buffer, 4.
*/
public static int encodeFloat(float f, byte[] buf, int pos) {
@@ -408,7 +408,7 @@ public class BinaryData {
* Encode a double to the byte array at the given position. Will throw
* IndexOutOfBounds if it overflows. Users should ensure that there are at
least
* 8 bytes left in the buffer before calling this method.
- *
+ *
* @return Returns the number of bytes written to the buffer, 8.
*/
public static int encodeDouble(double d, byte[] buf, int pos) {
diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java
b/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java
index 9238fd78c..c83770876 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java
@@ -42,7 +42,8 @@ public class Utf8 implements Comparable<Utf8>, CharSequence,
Externalizable {
private String string;
public Utf8() {
- bytes = EMPTY;
+ this.bytes = EMPTY;
+ this.hash = 1;
}
public Utf8(String string) {
@@ -171,6 +172,7 @@ public class Utf8 implements Comparable<Utf8>,
CharSequence, Externalizable {
if (h == 0) {
byte[] bytes = this.bytes;
int length = this.length;
+ h = 1;
for (int i = 0; i < length; i++) {
h = h * 31 + bytes[i];
}
diff --git a/lang/java/avro/src/test/java/org/apache/avro/util/TestUtf8.java
b/lang/java/avro/src/test/java/org/apache/avro/util/TestUtf8.java
index e0977ff9f..91618ca5e 100644
--- a/lang/java/avro/src/test/java/org/apache/avro/util/TestUtf8.java
+++ b/lang/java/avro/src/test/java/org/apache/avro/util/TestUtf8.java
@@ -59,43 +59,44 @@ public class TestUtf8 {
@Test
void hashCodeReused() {
- assertEquals(97, new Utf8("a").hashCode());
- assertEquals(3904, new Utf8("zz").hashCode());
- assertEquals(122, new Utf8("z").hashCode());
- assertEquals(99162322, new Utf8("hello").hashCode());
- assertEquals(3198781, new Utf8("hell").hashCode());
+ assertEquals(1, new Utf8().hashCode());
+ assertEquals(128, new Utf8("a").hashCode());
+ assertEquals(4865, new Utf8("zz").hashCode());
+ assertEquals(153, new Utf8("z").hashCode());
+ assertEquals(127791473, new Utf8("hello").hashCode());
+ assertEquals(4122302, new Utf8("hell").hashCode());
Utf8 u = new Utf8("a");
- assertEquals(97, u.hashCode());
- assertEquals(97, u.hashCode());
+ assertEquals(128, u.hashCode());
+ assertEquals(128, u.hashCode());
u.set("a");
- assertEquals(97, u.hashCode());
+ assertEquals(128, u.hashCode());
u.setByteLength(1);
- assertEquals(97, u.hashCode());
+ assertEquals(128, u.hashCode());
u.setByteLength(2);
- assertNotEquals(97, u.hashCode());
+ assertNotEquals(128, u.hashCode());
u.set("zz");
- assertEquals(3904, u.hashCode());
+ assertEquals(4865, u.hashCode());
u.setByteLength(1);
- assertEquals(122, u.hashCode());
+ assertEquals(153, u.hashCode());
u.set("hello");
- assertEquals(99162322, u.hashCode());
+ assertEquals(127791473, u.hashCode());
u.setByteLength(4);
- assertEquals(3198781, u.hashCode());
+ assertEquals(4122302, u.hashCode());
u.set(new Utf8("zz"));
- assertEquals(3904, u.hashCode());
+ assertEquals(4865, u.hashCode());
u.setByteLength(1);
- assertEquals(122, u.hashCode());
+ assertEquals(153, u.hashCode());
u.set(new Utf8("hello"));
- assertEquals(99162322, u.hashCode());
+ assertEquals(127791473, u.hashCode());
u.setByteLength(4);
- assertEquals(3198781, u.hashCode());
+ assertEquals(4122302, u.hashCode());
}
@Test