This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new c0d5a596f3 GH-49003: [C++] Don't consider `out_of_range` an error in
float parsing (#49095)
c0d5a596f3 is described below
commit c0d5a596f300e25e194343af3766f4ca0e746300
Author: Álvaro Kothe <[email protected]>
AuthorDate: Tue Feb 3 14:59:44 2026 -0300
GH-49003: [C++] Don't consider `out_of_range` an error in float parsing
(#49095)
### Rationale for this change
This PR restores the behavior previous to version 23 for floating-point
parsing on overflow and subnormal.
`fast_float` didn't assign an error code on overflow in version `3.10.1`
and assigned `±Inf` on overflow and `0.0` on subnormal. With the update to
version `8.1`, it started to assign `std::errc::result_out_of_range` in such
cases.
### What changes are included in this PR?
Ignores `std::errc::result_out_of_range` and produce `±Inf` / `0.0` as
appropriate instead of failing the conversion.
### Are these changes tested?
Yes. Created tests for overflow with positive and negative signed mantissa,
and also created tests for subnormal, all of them for binary{16,32,64}.
### Are there any user-facing changes?
It's a user facing change. The CSV reader on version `libarrow==23` was
assigning them as strings, while before it was parsing it as `0` or `+- inf`.
With this patch, the CSV reader in PyArrow outputs:
```python
>>> import pyarrow
>>> import pyarrow.csv
>>> import io
>>> table =
pyarrow.csv.read_csv(io.BytesIO(f"data\n10E-617\n10E617\n-10E617".encode()))
>>> print(table)
pyarrow.Table
data: double
----
data: [[0,inf,-inf]]
```
Closes #49003
* GitHub Issue: #49003
Authored-by: Alvaro-Kothe <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
---
cpp/src/arrow/util/value_parsing.cc | 15 ++++++++++++---
cpp/src/arrow/util/value_parsing_test.cc | 12 ++++++++++++
2 files changed, 24 insertions(+), 3 deletions(-)
diff --git a/cpp/src/arrow/util/value_parsing.cc
b/cpp/src/arrow/util/value_parsing.cc
index 1a8e8066d7..0cc71f276d 100644
--- a/cpp/src/arrow/util/value_parsing.cc
+++ b/cpp/src/arrow/util/value_parsing.cc
@@ -35,7 +35,10 @@ bool StringToFloat(const char* s, size_t length, char
decimal_point, float* out)
::arrow_vendored::fast_float::chars_format::general, decimal_point};
const auto res =
::arrow_vendored::fast_float::from_chars_advanced(s, s + length, *out,
options);
- return res.ec == std::errc() && res.ptr == s + length;
+ const bool is_valid_number =
+ res.ec == std::errc() || res.ec == std::errc::result_out_of_range;
+ const bool consumed_entire_string = res.ptr == s + length;
+ return is_valid_number && consumed_entire_string;
}
bool StringToFloat(const char* s, size_t length, char decimal_point, double*
out) {
@@ -43,7 +46,10 @@ bool StringToFloat(const char* s, size_t length, char
decimal_point, double* out
::arrow_vendored::fast_float::chars_format::general, decimal_point};
const auto res =
::arrow_vendored::fast_float::from_chars_advanced(s, s + length, *out,
options);
- return res.ec == std::errc() && res.ptr == s + length;
+ const bool is_valid_number =
+ res.ec == std::errc() || res.ec == std::errc::result_out_of_range;
+ const bool consumed_entire_string = res.ptr == s + length;
+ return is_valid_number && consumed_entire_string;
}
// Half float
@@ -53,7 +59,10 @@ bool StringToFloat(const char* s, size_t length, char
decimal_point, Float16* ou
float temp_out;
const auto res =
::arrow_vendored::fast_float::from_chars_advanced(s, s + length,
temp_out, options);
- const bool ok = res.ec == std::errc() && res.ptr == s + length;
+ const bool is_valid_number =
+ res.ec == std::errc() || res.ec == std::errc::result_out_of_range;
+ const bool consumed_entire_string = res.ptr == s + length;
+ const bool ok = is_valid_number && consumed_entire_string;
if (ok) {
*out = Float16::FromFloat(temp_out);
}
diff --git a/cpp/src/arrow/util/value_parsing_test.cc
b/cpp/src/arrow/util/value_parsing_test.cc
index b9e3b18444..b61f777685 100644
--- a/cpp/src/arrow/util/value_parsing_test.cc
+++ b/cpp/src/arrow/util/value_parsing_test.cc
@@ -141,6 +141,10 @@ TEST(StringConversion, ToFloat) {
AssertConversion<FloatType>("0", 0.0f);
AssertConversion<FloatType>("-0.0", -0.0f);
AssertConversion<FloatType>("-1e20", -1e20f);
+ AssertConversion<FloatType>("4e38", std::numeric_limits<float>::infinity());
+ AssertConversion<FloatType>("-4e38",
-std::numeric_limits<float>::infinity());
+ AssertConversion<FloatType>("1e-46", 0.0f);
+ AssertConversion<FloatType>("-1e-46", -0.0f);
AssertConversion<FloatType>("+Infinity",
std::numeric_limits<float>::infinity());
AssertConversion<FloatType>("-Infinity",
-std::numeric_limits<float>::infinity());
AssertConversion<FloatType>("Infinity",
std::numeric_limits<float>::infinity());
@@ -166,6 +170,10 @@ TEST(StringConversion, ToDouble) {
AssertConversion<DoubleType>("0", 0);
AssertConversion<DoubleType>("-0.0", -0.0);
AssertConversion<DoubleType>("-1e100", -1e100);
+ AssertConversion<DoubleType>("2e308",
std::numeric_limits<double>::infinity());
+ AssertConversion<DoubleType>("-2e308",
-std::numeric_limits<double>::infinity());
+ AssertConversion<DoubleType>("1e-325", 0.0);
+ AssertConversion<DoubleType>("-1e-325", -0.0);
AssertConversion<DoubleType>("+Infinity",
std::numeric_limits<double>::infinity());
AssertConversion<DoubleType>("-Infinity",
-std::numeric_limits<double>::infinity());
AssertConversion<DoubleType>("Infinity",
std::numeric_limits<double>::infinity());
@@ -185,6 +193,10 @@ TEST(StringConversion, ToHalfFloat) {
AssertConversion<HalfFloatType>("0", Float16(0.0f));
AssertConversion<HalfFloatType>("-0.0", Float16(-0.0f));
AssertConversion<HalfFloatType>("-1e15", Float16(-1e15));
+ AssertConversion<HalfFloatType>("7e4", Float16::FromBits(0x7c00));
+ AssertConversion<HalfFloatType>("-7e4", Float16::FromBits(0xfc00));
+ AssertConversion<HalfFloatType>("1e-9", Float16(0.0f));
+ AssertConversion<HalfFloatType>("-1e-9", Float16(-0.0f));
AssertConversion<HalfFloatType>("+Infinity", Float16::FromBits(0x7c00));
AssertConversion<HalfFloatType>("-Infinity", Float16::FromBits(0xfc00));
AssertConversion<HalfFloatType>("Infinity", Float16::FromBits(0x7c00));