This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 87b8f2b3a7ab [SPARK-51664][SQL] Support the TIME data type in the Hash expression 87b8f2b3a7ab is described below commit 87b8f2b3a7ab8021c6aeb94fe56326b296b87c98 Author: Max Gekk <max.g...@gmail.com> AuthorDate: Mon Mar 31 11:09:27 2025 +0300 [SPARK-51664][SQL] Support the TIME data type in the Hash expression ### What changes were proposed in this pull request? In the PR, I propose to support the new data type TIME in `HashExpression`, and generate a hash in the same way as for the underlying type `Long.` ### Why are the changes needed? The affected expressions are used in core components in Spark SQL: - Murmur3Hash: generating partition ID, - HiveHash: bucking - XxHash64: Bloom filter ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? By running the new test: ``` $ build/sbt "test:testOnly *HashExpressionsSuite" ``` ### Was this patch authored or co-authored using generative AI tooling? No. Closes #50456 from MaxGekk/time-hash-expr. Lead-authored-by: Max Gekk <max.g...@gmail.com> Co-authored-by: Maxim Gekk <max.g...@gmail.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- .../scala/org/apache/spark/sql/catalyst/expressions/hash.scala | 4 +++- .../spark/sql/catalyst/expressions/HashExpressionsSuite.scala | 9 ++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala index ac493d19df1b..7cb645e601d3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala @@ -256,6 +256,8 @@ case class Crc32(child: Expression) * input with seed. * - binary: use murmur3 to hash the bytes with seed. * - string: get the bytes of string and hash it. + * - time: it stores long value of `microseconds` since the midnight, use + * murmur3 to hash the long input with seed. * - array: The `result` starts with seed, then use `result` as seed, recursively * calculate hash value for each element, and assign the element hash * value to `result`. @@ -507,7 +509,7 @@ abstract class HashExpression[E] extends Expression { case NullType => "" case BooleanType => genHashBoolean(input, result) case ByteType | ShortType | IntegerType | DateType => genHashInt(input, result) - case LongType => genHashLong(input, result) + case LongType | _: TimeType => genHashLong(input, result) case TimestampType | TimestampNTZType => genHashTimestamp(input, result) case FloatType => genHashFloat(input, result) case DoubleType => genHashDouble(input, result) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala index 019c953a3b0a..dddc33aa4358 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala @@ -18,7 +18,7 @@ package org.apache.spark.sql.catalyst.expressions import java.nio.charset.StandardCharsets -import java.time.{Duration, Period, ZoneId, ZoneOffset} +import java.time.{Duration, LocalTime, Period, ZoneId, ZoneOffset} import scala.collection.mutable.ArrayBuffer import scala.language.implicitConversions @@ -754,6 +754,13 @@ class HashExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkResult(Literal.create(-0F, FloatType), Literal.create(0F, FloatType)) } + test("Support TimeType") { + val time = Literal.create(LocalTime.of(23, 50, 59, 123456000), TimeType()) + checkEvaluation(Murmur3Hash(Seq(time), 10), 258472763) + checkEvaluation(XxHash64(Seq(time), 10), -9197489935839400467L) + checkEvaluation(HiveHash(Seq(time)), -40222445) + } + private def testHash(inputSchema: StructType): Unit = { val inputGenerator = RandomDataGenerator.forType(inputSchema, nullable = false).get val toRow = ExpressionEncoder(inputSchema).createSerializer() --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org