This is an automated email from the ASF dual-hosted git repository.
ptoth pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new fd02372a3955 [SPARK-53804][SQL] Support TIME radix sort
fd02372a3955 is described below
commit fd02372a39557b57d7b47af14375a3caebb71a21
Author: Bruce Robbins <[email protected]>
AuthorDate: Tue Oct 7 17:58:02 2025 +0200
[SPARK-53804][SQL] Support TIME radix sort
### What changes were proposed in this pull request?
Add support for radix sort of the Time type.
This PR is based on #35279, which added similar support for TimestampNTZ.
### Why are the changes needed?
Better performance when sorting by one Time type column.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
New test.
Also, I added temporary debug statements to `SortExec` in a working copy
with this change and one without this change, then tested a join by the Time
type. In the working copy with the change, `canUseRadixSort` = true. In the
working copy without the change, `canUseRadixSort` = false.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #52520 from bersprockets/time_sortprefix.
Authored-by: Bruce Robbins <[email protected]>
Signed-off-by: Peter Toth <[email protected]>
---
.../scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala | 6 +++---
.../spark/sql/catalyst/expressions/SortOrderExpressionsSuite.scala | 6 ++++++
.../main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala | 5 +++--
3 files changed, 12 insertions(+), 5 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala
index 824024a84cba..166866c90b87 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala
@@ -128,7 +128,7 @@ object SortOrder {
case class SortPrefix(child: SortOrder) extends UnaryExpression {
val nullValue = child.child.dataType match {
- case BooleanType | DateType | TimestampType | TimestampNTZType |
+ case BooleanType | DateType | TimestampType | TimestampNTZType | _:
TimeType |
_: IntegralType | _: AnsiIntervalType =>
if (nullAsSmallest) Long.MinValue else Long.MaxValue
case dt: DecimalType if dt.precision - dt.scale <= Decimal.MAX_LONG_DIGITS
=>
@@ -151,7 +151,7 @@ case class SortPrefix(child: SortOrder) extends
UnaryExpression {
private lazy val calcPrefix: Any => Long = child.child.dataType match {
case BooleanType => (raw) =>
if (raw.asInstanceOf[Boolean]) 1 else 0
- case DateType | TimestampType | TimestampNTZType |
+ case DateType | TimestampType | TimestampNTZType | _: TimeType |
_: IntegralType | _: AnsiIntervalType => (raw) =>
raw.asInstanceOf[java.lang.Number].longValue()
case FloatType | DoubleType => (raw) => {
@@ -202,7 +202,7 @@ case class SortPrefix(child: SortOrder) extends
UnaryExpression {
s"$input ? 1L : 0L"
case _: IntegralType =>
s"(long) $input"
- case DateType | TimestampType | TimestampNTZType | _: AnsiIntervalType =>
+ case DateType | TimestampType | TimestampNTZType | _: TimeType | _:
AnsiIntervalType =>
s"(long) $input"
case FloatType | DoubleType =>
s"$DoublePrefixCmp.computePrefix((double)$input)"
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SortOrderExpressionsSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SortOrderExpressionsSuite.scala
index 9332ef559532..80bb16d72f6f 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SortOrderExpressionsSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SortOrderExpressionsSuite.scala
@@ -18,9 +18,11 @@
package org.apache.spark.sql.catalyst.expressions
import java.sql.Timestamp
+import java.time.LocalTime
import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
+import org.apache.spark.sql.catalyst.util.SparkDateTimeUtils
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.UTF8String
import org.apache.spark.util.collection.unsafe.sort.PrefixComparators._
@@ -51,6 +53,9 @@ class SortOrderExpressionsSuite extends SparkFunSuite with
ExpressionEvalHelper
val dec3 = Literal(Decimal(20132983L, 21, 2))
val list1 = Literal.create(Seq(1, 2), ArrayType(IntegerType))
val nullVal = Literal.create(null, IntegerType)
+ val tm1LocalTime = LocalTime.of(21, 15, 1, 123456)
+ val tm1Nano = SparkDateTimeUtils.localTimeToNanos(tm1LocalTime)
+ val tm1 = Literal.create(tm1LocalTime, TimeType(6))
checkEvaluation(SortPrefix(SortOrder(b1, Ascending)), 0L)
checkEvaluation(SortPrefix(SortOrder(b2, Ascending)), 1L)
@@ -83,6 +88,7 @@ class SortOrderExpressionsSuite extends SparkFunSuite with
ExpressionEvalHelper
DoublePrefixComparator.computePrefix(201329.83d))
checkEvaluation(SortPrefix(SortOrder(list1, Ascending)), 0L)
checkEvaluation(SortPrefix(SortOrder(nullVal, Ascending)), null)
+ checkEvaluation(SortPrefix(SortOrder(tm1, Ascending)), tm1Nano)
}
test("Cannot sort map type") {
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala
index 4b561b813067..7332bbcb1845 100644
---
a/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala
+++
b/sql/core/src/main/scala/org/apache/spark/sql/execution/SortPrefixUtils.scala
@@ -43,7 +43,7 @@ object SortPrefixUtils {
case StringType => stringPrefixComparator(sortOrder)
case BinaryType => binaryPrefixComparator(sortOrder)
case BooleanType | ByteType | ShortType | IntegerType | LongType |
DateType | TimestampType |
- TimestampNTZType | _: AnsiIntervalType =>
+ TimestampNTZType | _: TimeType |_: AnsiIntervalType =>
longPrefixComparator(sortOrder)
case dt: DecimalType if dt.precision - dt.scale <=
Decimal.MAX_LONG_DIGITS =>
longPrefixComparator(sortOrder)
@@ -123,7 +123,8 @@ object SortPrefixUtils {
def canSortFullyWithPrefix(sortOrder: SortOrder): Boolean = {
sortOrder.dataType match {
case BooleanType | ByteType | ShortType | IntegerType | LongType |
DateType |
- TimestampType | TimestampNTZType | FloatType | DoubleType | _:
AnsiIntervalType =>
+ TimestampType | TimestampNTZType | _: TimeType | FloatType |
DoubleType |
+ _: AnsiIntervalType =>
true
case dt: DecimalType if dt.precision <= Decimal.MAX_LONG_DIGITS =>
true
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]