This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 4c961256da Fix decimal log precision for non-power values (#20433)
4c961256da is described below
commit 4c961256da23afe4c48ba931262758d4eab529a8
Author: Kumar Ujjawal <[email protected]>
AuthorDate: Tue Mar 17 11:41:17 2026 +0530
Fix decimal log precision for non-power values (#20433)
## Which issue does this PR close?
<!--
We generally require a GitHub issue to be filed for all bug fixes and
enhancements and this helps us generate change logs for our releases.
You can link an issue to this PR using the GitHub syntax. For example
`Closes #123` indicates that this PR will close issue #123.
-->
- Closes #18524
## Rationale for this change
The decimal log implementation used the integer ilog path whenever the
base was an integer, which floors results for non‑power values. That
produced incorrect outputs such as log(2, 10^35) returning 116 instead
of 116.267...
<!--
Why are you proposing this change? If this is already explained clearly
in the issue then this section is not needed.
Explaining clearly why changes are proposed helps reviewers understand
your changes and offer better suggestions for fixes.
-->
## What changes are included in this PR?
- Fix decimal log to use the integer fast path only for exact powers,
preserving fractional results for non-power inputs.
- Update unit tests and decimal sqllogictest expectations to match the
corrected behav
<!--
There is no need to duplicate the description in the issue here but it
is sometimes worth providing a summary of the individual changes in this
PR.
-->
## Are these changes tested?
Yes
<!--
We typically require tests for all PRs in order to:
1. Prevent the code from being accidentally broken by subsequent changes
2. Serve as another way to document the expected behavior of the code
If tests are not included in your PR, please explain why (for example,
are they covered by existing tests)?
-->
## Are there any user-facing changes?
No
<!--
If there are user-facing changes then we may require documentation to be
updated before approving the PR.
-->
<!--
If there are any breaking changes to public APIs, please add the `api
change` label.
-->
---
datafusion/functions/src/math/log.rs | 107 ++++++++++---------------
datafusion/sqllogictest/test_files/decimal.slt | 20 +++--
2 files changed, 51 insertions(+), 76 deletions(-)
diff --git a/datafusion/functions/src/math/log.rs
b/datafusion/functions/src/math/log.rs
index d1906a4bf0..3db24d67e8 100644
--- a/datafusion/functions/src/math/log.rs
+++ b/datafusion/functions/src/math/log.rs
@@ -109,80 +109,59 @@ fn is_valid_integer_base(base: f64) -> bool {
}
/// Calculate logarithm for Decimal32 values.
-/// For integer bases >= 2 with non-negative scale, uses the efficient u32
ilog algorithm.
-/// Otherwise falls back to f64 computation.
+/// For integer bases >= 2 with zero scale, return an exact integer log when
the
+/// value is a perfect power of the base. Otherwise falls back to f64
computation.
fn log_decimal32(value: i32, scale: i8, base: f64) -> Result<f64, ArrowError> {
- if is_valid_integer_base(base)
- && scale >= 0
- && let Some(unscaled) = unscale_to_u32(value, scale)
+ if scale == 0
+ && is_valid_integer_base(base)
+ && let Ok(unscaled) = u32::try_from(value)
+ && unscaled > 0
{
- return if unscaled > 0 {
- Ok(unscaled.ilog(base as u32) as f64)
- } else {
- Ok(f64::NAN)
- };
+ let base_u32 = base as u32;
+ let int_log = unscaled.ilog(base_u32);
+ if base_u32.checked_pow(int_log) == Some(unscaled) {
+ return Ok(int_log as f64);
+ }
}
decimal_to_f64(value, scale).map(|v| v.log(base))
}
/// Calculate logarithm for Decimal64 values.
-/// For integer bases >= 2 with non-negative scale, uses the efficient u64
ilog algorithm.
-/// Otherwise falls back to f64 computation.
+/// For integer bases >= 2 with zero scale, return an exact integer log when
the
+/// value is a perfect power of the base. Otherwise falls back to f64
computation.
fn log_decimal64(value: i64, scale: i8, base: f64) -> Result<f64, ArrowError> {
- if is_valid_integer_base(base)
- && scale >= 0
- && let Some(unscaled) = unscale_to_u64(value, scale)
+ if scale == 0
+ && is_valid_integer_base(base)
+ && let Ok(unscaled) = u64::try_from(value)
+ && unscaled > 0
{
- return if unscaled > 0 {
- Ok(unscaled.ilog(base as u64) as f64)
- } else {
- Ok(f64::NAN)
- };
+ let base_u64 = base as u64;
+ let int_log = unscaled.ilog(base_u64);
+ if base_u64.checked_pow(int_log) == Some(unscaled) {
+ return Ok(int_log as f64);
+ }
}
decimal_to_f64(value, scale).map(|v| v.log(base))
}
/// Calculate logarithm for Decimal128 values.
-/// For integer bases >= 2 with non-negative scale, uses the efficient u128
ilog algorithm.
-/// Otherwise falls back to f64 computation.
+/// For integer bases >= 2 with zero scale, return an exact integer log when
the
+/// value is a perfect power of the base. Otherwise falls back to f64
computation.
fn log_decimal128(value: i128, scale: i8, base: f64) -> Result<f64,
ArrowError> {
- if is_valid_integer_base(base)
- && scale >= 0
- && let Some(unscaled) = unscale_to_u128(value, scale)
+ if scale == 0
+ && is_valid_integer_base(base)
+ && let Ok(unscaled) = u128::try_from(value)
+ && unscaled > 0
{
- return if unscaled > 0 {
- Ok(unscaled.ilog(base as u128) as f64)
- } else {
- Ok(f64::NAN)
- };
+ let base_u128 = base as u128;
+ let int_log = unscaled.ilog(base_u128);
+ if base_u128.checked_pow(int_log) == Some(unscaled) {
+ return Ok(int_log as f64);
+ }
}
decimal_to_f64(value, scale).map(|v| v.log(base))
}
-/// Unscale a Decimal32 value to u32.
-#[inline]
-fn unscale_to_u32(value: i32, scale: i8) -> Option<u32> {
- let value_u32 = u32::try_from(value).ok()?;
- let divisor = 10u32.checked_pow(scale as u32)?;
- Some(value_u32 / divisor)
-}
-
-/// Unscale a Decimal64 value to u64.
-#[inline]
-fn unscale_to_u64(value: i64, scale: i8) -> Option<u64> {
- let value_u64 = u64::try_from(value).ok()?;
- let divisor = 10u64.checked_pow(scale as u32)?;
- Some(value_u64 / divisor)
-}
-
-/// Unscale a Decimal128 value to u128.
-#[inline]
-fn unscale_to_u128(value: i128, scale: i8) -> Option<u128> {
- let value_u128 = u128::try_from(value).ok()?;
- let divisor = 10u128.checked_pow(scale as u32)?;
- Some(value_u128 / divisor)
-}
-
/// Convert a scaled decimal value to f64.
#[inline]
fn decimal_to_f64<T: ToPrimitive + Copy>(value: T, scale: i8) -> Result<f64,
ArrowError> {
@@ -444,13 +423,9 @@ mod tests {
#[test]
fn test_log_decimal_native() {
let value = 10_i128.pow(35);
- assert_eq!((value as f64).log2(), 116.26748332105768);
- assert_eq!(
- log_decimal128(value, 0, 2.0).unwrap(),
- // TODO: see we're losing our decimal points compared to above
- // https://github.com/apache/datafusion/issues/18524
- 116.0
- );
+ let expected = (value as f64).log2();
+ let actual = log_decimal128(value, 0, 2.0).unwrap();
+ assert!((actual - expected).abs() < 1e-10);
}
#[test]
@@ -1012,7 +987,8 @@ mod tests {
assert!((floats.value(1) - 2.0).abs() < 1e-10);
assert!((floats.value(2) - 3.0).abs() < 1e-10);
assert!((floats.value(3) - 4.0).abs() < 1e-10);
- assert!((floats.value(4) - 4.0).abs() < 1e-10); // Integer
rounding
+ let expected = 12600_f64.log(10.0);
+ assert!((floats.value(4) - expected).abs() < 1e-10);
assert!(floats.value(5).is_nan());
}
ColumnarValue::Scalar(_) => {
@@ -1142,13 +1118,14 @@ mod tests {
.expect("failed to convert result to a Float64Array");
assert_eq!(floats.len(), 7);
- eprintln!("floats {:?}", &floats);
assert!((floats.value(0) - 1.0).abs() < 1e-10);
assert!((floats.value(1) - 2.0).abs() < 1e-10);
assert!((floats.value(2) - 3.0).abs() < 1e-10);
assert!((floats.value(3) - 4.0).abs() < 1e-10);
- assert!((floats.value(4) - 4.0).abs() < 1e-10); // Integer
rounding for float log
- assert!((floats.value(5) - 38.0).abs() < 1e-10);
+ let expected = 12600_f64.log(10.0);
+ assert!((floats.value(4) - expected).abs() < 1e-10);
+ let expected = ((i128::MAX - 1000) as f64).log(10.0);
+ assert!((floats.value(5) - expected).abs() < 1e-10);
assert!(floats.value(6).is_nan());
}
ColumnarValue::Scalar(_) => {
diff --git a/datafusion/sqllogictest/test_files/decimal.slt
b/datafusion/sqllogictest/test_files/decimal.slt
index eca2c88bb5..5485a5fd30 100644
--- a/datafusion/sqllogictest/test_files/decimal.slt
+++ b/datafusion/sqllogictest/test_files/decimal.slt
@@ -804,7 +804,7 @@ select log(arrow_cast(100, 'Decimal32(9, 2)'));
query R
select log(2.0, arrow_cast(12345.67, 'Decimal32(9, 2)'));
----
-13
+13.591717513272
# log for small decimal64
query R
@@ -820,7 +820,7 @@ select log(arrow_cast(100, 'Decimal64(18, 2)'));
query R
select log(2.0, arrow_cast(12345.6789, 'Decimal64(15, 4)'));
----
-13
+13.591718553311
# log for small decimal128
@@ -896,15 +896,13 @@ select log(10::decimal(38, 0),
100000000000000000000000000000000000::decimal(38,
query R
select log(2, 100000000000000000000000000000000000::decimal(38,0));
----
-116
+116.267483321058
# log(10^35) for decimal128 with another base
-# TODO: this should be 116.267483321058, error with native decimal log impl
-# https://github.com/apache/datafusion/issues/18524
query R
select log(2.0, 100000000000000000000000000000000000::decimal(38,0));
----
-116
+116.267483321058
# log with non-integer base (fallback to f64)
query R
@@ -1036,7 +1034,7 @@ from (values (10.0), (2.0), (3.0)) as t(base);
query R
SELECT log(10, arrow_cast(0.5, 'Decimal32(5, 1)'))
----
-NaN
+-0.301029995664
query R
SELECT log(10, arrow_cast(1 , 'Decimal32(5, 1)'))
@@ -1195,18 +1193,18 @@ select
100000000000000000000000000000000000::decimal(38,0)
99999999999999996863366107917975552
# log(10^35) for decimal128 with explicit decimal base
-# Float parsing is rounding down
+# Float parsing is rounding down, but log uses float computation so result
rounds to 35
query R
select log(10, 100000000000000000000000000000000000::decimal(38,0));
----
-34
+35
# log(10^35) for large decimal128 if parsed as float
-# Float parsing is rounding down
+# Float parsing is rounding down, but log uses float computation so result
rounds to 35
query R
select log(100000000000000000000000000000000000::decimal(38,0))
----
-34
+35
# Result is decimal since argument is decimal regardless decimals-as-floats
parsing
query R
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]