This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 85c18a0965 GH-49034 [C++][Gandiva] Fix binary_string to not trigger
error for null strings (#49035)
85c18a0965 is described below
commit 85c18a0965005a0f36c1be08f96a2f5d9f63db6d
Author: Logan Riggs <[email protected]>
AuthorDate: Fri Jan 30 00:42:15 2026 -0800
GH-49034 [C++][Gandiva] Fix binary_string to not trigger error for null
strings (#49035)
### Rationale for this change
The binary_string function will attempt to allocate 0 bytes of memory,
which results in a null ptr being returned and the function interprets that as
an error.
### What changes are included in this PR?
Add kCanReturnErrors to the function definition to match other string
functions.
Move the check for 0 byte length input earlier in the binary_string
function to prevent the 0 allocation.
Add a unit test.
### Are these changes tested?
Yes, unit and integration testing.
### Are there any user-facing changes?
No.
* GitHub Issue: #49034
Authored-by: Logan Riggs <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
---
cpp/src/gandiva/function_registry_string.cc | 3 ++-
cpp/src/gandiva/precompiled/string_ops.cc | 10 +++++-----
cpp/src/gandiva/precompiled/string_ops_test.cc | 20 ++++++++++++++++----
3 files changed, 23 insertions(+), 10 deletions(-)
diff --git a/cpp/src/gandiva/function_registry_string.cc
b/cpp/src/gandiva/function_registry_string.cc
index 2bc6936d77..be57ce4f47 100644
--- a/cpp/src/gandiva/function_registry_string.cc
+++ b/cpp/src/gandiva/function_registry_string.cc
@@ -432,7 +432,8 @@ std::vector<NativeFunction> GetStringFunctionRegistry() {
NativeFunction::kNeedsContext |
NativeFunction::kCanReturnErrors),
NativeFunction("binary_string", {}, DataTypeVector{utf8()}, binary(),
- kResultNullIfNull, "binary_string",
NativeFunction::kNeedsContext),
+ kResultNullIfNull, "binary_string",
+ NativeFunction::kNeedsContext |
NativeFunction::kCanReturnErrors),
NativeFunction("left", {}, DataTypeVector{utf8(), int32()}, utf8(),
kResultNullIfNull, "left_utf8_int32",
NativeFunction::kNeedsContext),
diff --git a/cpp/src/gandiva/precompiled/string_ops.cc
b/cpp/src/gandiva/precompiled/string_ops.cc
index 0b31c769c9..7450018a55 100644
--- a/cpp/src/gandiva/precompiled/string_ops.cc
+++ b/cpp/src/gandiva/precompiled/string_ops.cc
@@ -2252,6 +2252,11 @@ const char* right_utf8_int32(gdv_int64 context, const
char* text, gdv_int32 text
FORCE_INLINE
const char* binary_string(gdv_int64 context, const char* text, gdv_int32
text_len,
gdv_int32* out_len) {
+ if (text_len == 0) {
+ *out_len = 0;
+ return "";
+ }
+
gdv_binary ret =
reinterpret_cast<gdv_binary>(gdv_fn_context_arena_malloc(context,
text_len));
@@ -2261,11 +2266,6 @@ const char* binary_string(gdv_int64 context, const char*
text, gdv_int32 text_le
return "";
}
- if (text_len == 0) {
- *out_len = 0;
- return "";
- }
-
// converting hex encoded string to normal string
int j = 0;
for (int i = 0; i < text_len; i++, j++) {
diff --git a/cpp/src/gandiva/precompiled/string_ops_test.cc
b/cpp/src/gandiva/precompiled/string_ops_test.cc
index 9d0a4d71af..ca2b2b5785 100644
--- a/cpp/src/gandiva/precompiled/string_ops_test.cc
+++ b/cpp/src/gandiva/precompiled/string_ops_test.cc
@@ -1883,10 +1883,6 @@ TEST(TestStringOps, TestBinaryString) {
std::string output = std::string(out_str, out_len);
EXPECT_EQ(output, "TestString");
- out_str = binary_string(ctx_ptr, "", 0, &out_len);
- output = std::string(out_str, out_len);
- EXPECT_EQ(output, "");
-
out_str = binary_string(ctx_ptr, "T", 1, &out_len);
output = std::string(out_str, out_len);
EXPECT_EQ(output, "T");
@@ -1912,6 +1908,22 @@ TEST(TestStringOps, TestBinaryString) {
EXPECT_EQ(output, "OM");
}
+TEST(TestStringOps, TestBinaryStringNull) {
+ // This test is only valid if it is the first to trigger a memory allocation
in the
+ // context.
+ gandiva::ExecutionContext ctx;
+ uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+ gdv_int32 out_len = 0;
+ const char* out_str;
+
+ std::string output;
+
+ out_str = binary_string(ctx_ptr, "", 0, &out_len);
+ ASSERT_FALSE(ctx.has_error());
+ output = std::string(out_str, out_len);
+ EXPECT_EQ(output, "");
+}
+
TEST(TestStringOps, TestSplitPart) {
gandiva::ExecutionContext ctx;
uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);