This is an automated email from the ASF dual-hosted git repository.
cloud-fan pushed a commit to branch branch-4.x
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-4.x by this push:
new 25d161fbfa13 [SPARK-50610][SQL] Fix decimal precision in
HiveInspectors.toInspector
25d161fbfa13 is described below
commit 25d161fbfa13f87be3cf2c702f9c21dad1c8a3fa
Author: Shrirang Mhalgi <[email protected]>
AuthorDate: Tue May 19 12:51:13 2026 +0800
[SPARK-50610][SQL] Fix decimal precision in HiveInspectors.toInspector
### What changes were proposed in this pull request?
Fix `toInspector(dataType: DataType)` and `toInspector(expr: Expression)`
in HiveInspectors to preserve the actual decimal precision and scale instead of
always using `Hive's default precision (38, 18)`.
Column-reference path (toInspector(dataType)): The existing code had a //
TODO decimal precision? comment and always returned
`javaHiveDecimalObjectInspector`. This replaces it with
`getPrimitiveJavaObjectInspector(new DecimalTypeInfo(precision, scale))`.
Literal-expression path (toInspector(expr)):
`getDecimalWritableConstantObjectInspector` hardcoded
`TypeInfoFactory.decimalTypeInfo` (the (38, 18) singleton). Now extracts
precision/scale from the literal's DecimalType and passes it through.
Both fixes align with the existing `typeInfoConversions` precedent in the
same file, which already uses new `DecimalTypeInfo(precision, scale)`.
### Why are the changes needed?
When a Hive UDF (e.g., mask) operates on a Decimal(18, 10) column or
receives a decimal literal argument, the result incorrectly has scale 18
instead of 10, because the inspector always reports (38, 18) regardless of the
actual type. The fix is generic - it correctly handles all decimal precisions
and scales, not just a specific case.
### Does this PR introduce _any_ user-facing change?
Yes. Hive UDFs operating on decimal columns and decimal literal arguments
will now return results with the correct precision and scale, matching the
input type.
### How was this patch tested?
Added tests in `HiveInspectorSuite` verifying that both
`toInspector(dataType)` and `toInspector(expr)` preserve precision/scale for
`DecimalType(18, 10)`, `DecimalType(10, 2)`, `DecimalType.SYSTEM_DEFAULT`, and
`null literals`.
### Was this patch authored or co-authored using generative AI tooling?
Yes.
Closes #55668 from shrirangmhalgi/SPARK-50610-decimal-precision.
Authored-by: Shrirang Mhalgi <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
(cherry picked from commit dd4c29c2d9745a0bb4542ba220f470176c7d402b)
Signed-off-by: Wenchen Fan <[email protected]>
---
.../org/apache/spark/sql/hive/HiveInspectors.scala | 14 ++++----
.../apache/spark/sql/hive/HiveInspectorSuite.scala | 42 +++++++++++++++++++++-
2 files changed, 49 insertions(+), 7 deletions(-)
diff --git
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
index 087f68ce3c79..bf40327bd991 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
@@ -839,8 +839,9 @@ private[hive] trait HiveInspectors {
PrimitiveObjectInspectorFactory.javaHiveIntervalDayTimeObjectInspector
case _: YearMonthIntervalType =>
PrimitiveObjectInspectorFactory.javaHiveIntervalYearMonthObjectInspector
- // TODO decimal precision?
- case DecimalType() =>
PrimitiveObjectInspectorFactory.javaHiveDecimalObjectInspector
+ case DecimalType.Fixed(precision, scale) =>
+ PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(
+ new DecimalTypeInfo(precision, scale))
case StructType(fields) =>
ObjectInspectorFactory.getStandardStructObjectInspector(
java.util.Arrays.asList(fields.map(f => f.name) : _*),
@@ -880,8 +881,8 @@ private[hive] trait HiveInspectors {
getDateWritableConstantObjectInspector(value)
case Literal(value, TimestampType) =>
getTimestampWritableConstantObjectInspector(value)
- case Literal(value, DecimalType()) =>
- getDecimalWritableConstantObjectInspector(value)
+ case Literal(value, DecimalType.Fixed(precision, scale)) =>
+ getDecimalWritableConstantObjectInspector(value, precision, scale)
case Literal(_, NullType) =>
getPrimitiveNullWritableConstantObjectInspector
case Literal(_, _: DayTimeIntervalType) =>
@@ -1035,9 +1036,10 @@ private[hive] trait HiveInspectors {
PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
TypeInfoFactory.timestampTypeInfo, getTimestampWritable(value))
- private def getDecimalWritableConstantObjectInspector(value: Any):
ObjectInspector =
+ private def getDecimalWritableConstantObjectInspector(
+ value: Any, precision: Int, scale: Int): ObjectInspector =
PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
- TypeInfoFactory.decimalTypeInfo, getDecimalWritable(value))
+ new DecimalTypeInfo(precision, scale), getDecimalWritable(value))
private def getPrimitiveNullWritableConstantObjectInspector: ObjectInspector
=
PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(
diff --git
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveInspectorSuite.scala
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveInspectorSuite.scala
index 13c48f38e7f7..8acabd579d44 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveInspectorSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveInspectorSuite.scala
@@ -21,9 +21,10 @@ import java.util
import org.apache.hadoop.hive.ql.udf.UDAFPercentile
import org.apache.hadoop.hive.serde2.io.DoubleWritable
-import org.apache.hadoop.hive.serde2.objectinspector.{ObjectInspector,
ObjectInspectorFactory, StructObjectInspector}
+import org.apache.hadoop.hive.serde2.objectinspector.{ObjectInspector,
ObjectInspectorFactory, PrimitiveObjectInspector, StructObjectInspector}
import
org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions
import
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory
+import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo
import org.apache.hadoop.io.LongWritable
import org.apache.spark.SparkFunSuite
@@ -251,4 +252,43 @@ class HiveInspectorSuite extends SparkFunSuite with
HiveInspectors {
unwrap(wrap(null, toInspector(Literal.create(d, dt)), dt),
toInspector(Literal.create(d, dt))))
}
+
+ test("SPARK-50610: toInspector(dataType) should preserve decimal precision
and scale") {
+ val dt = DecimalType(18, 10)
+ val oi = toInspector(dt).asInstanceOf[PrimitiveObjectInspector]
+ val typeInfo = oi.getTypeInfo.asInstanceOf[DecimalTypeInfo]
+ assert(typeInfo.precision() === 18)
+ assert(typeInfo.scale() === 10)
+
+ // Also verify non-default precision/scale combinations
+ val dt2 = DecimalType(10, 2)
+ val oi2 = toInspector(dt2).asInstanceOf[PrimitiveObjectInspector]
+ val typeInfo2 = oi2.getTypeInfo.asInstanceOf[DecimalTypeInfo]
+ assert(typeInfo2.precision() === 10)
+ assert(typeInfo2.scale() === 2)
+
+ // Verify the default DecimalType also works
+ val dt3 = DecimalType.SYSTEM_DEFAULT
+ val oi3 = toInspector(dt3).asInstanceOf[PrimitiveObjectInspector]
+ val typeInfo3 = oi3.getTypeInfo.asInstanceOf[DecimalTypeInfo]
+ assert(typeInfo3.precision() === DecimalType.MAX_PRECISION)
+ assert(typeInfo3.scale() === DecimalType.DEFAULT_SCALE)
+ }
+
+ test("SPARK-50610: toInspector(expr) should preserve decimal precision and
scale for literals") {
+ val decimal = Decimal(BigDecimal("123.45"))
+ val dt = DecimalType(10, 2)
+ val literal = Literal.create(decimal, dt)
+ val oi = toInspector(literal).asInstanceOf[PrimitiveObjectInspector]
+ val typeInfo = oi.getTypeInfo.asInstanceOf[DecimalTypeInfo]
+ assert(typeInfo.precision() === 10)
+ assert(typeInfo.scale() === 2)
+
+ // Null literal should still preserve type info
+ val nullLiteral = Literal.create(null, DecimalType(18, 10))
+ val oi2 = toInspector(nullLiteral).asInstanceOf[PrimitiveObjectInspector]
+ val typeInfo2 = oi2.getTypeInfo.asInstanceOf[DecimalTypeInfo]
+ assert(typeInfo2.precision() === 18)
+ assert(typeInfo2.scale() === 10)
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]