This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 85c18a0965 GH-49034 [C++][Gandiva] Fix binary_string to not trigger 
error for null strings (#49035)
85c18a0965 is described below

commit 85c18a0965005a0f36c1be08f96a2f5d9f63db6d
Author: Logan Riggs <[email protected]>
AuthorDate: Fri Jan 30 00:42:15 2026 -0800

    GH-49034 [C++][Gandiva] Fix binary_string to not trigger error for null 
strings (#49035)
    
    ### Rationale for this change
    
    The binary_string function will attempt to allocate 0 bytes of memory, 
which results in a null ptr being returned and the function interprets that as 
an error.
    
    ### What changes are included in this PR?
    Add kCanReturnErrors to the function definition to match other string 
functions.
    Move the check for 0 byte length input earlier in the binary_string 
function to prevent the 0 allocation.
    Add a unit test.
    
    ### Are these changes tested?
    Yes, unit and integration testing.
    
    ### Are there any user-facing changes?
    No.
    
    * GitHub Issue: #49034
    
    Authored-by: Logan Riggs <[email protected]>
    Signed-off-by: Sutou Kouhei <[email protected]>
---
 cpp/src/gandiva/function_registry_string.cc    |  3 ++-
 cpp/src/gandiva/precompiled/string_ops.cc      | 10 +++++-----
 cpp/src/gandiva/precompiled/string_ops_test.cc | 20 ++++++++++++++++----
 3 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/cpp/src/gandiva/function_registry_string.cc 
b/cpp/src/gandiva/function_registry_string.cc
index 2bc6936d77..be57ce4f47 100644
--- a/cpp/src/gandiva/function_registry_string.cc
+++ b/cpp/src/gandiva/function_registry_string.cc
@@ -432,7 +432,8 @@ std::vector<NativeFunction> GetStringFunctionRegistry() {
                      NativeFunction::kNeedsContext | 
NativeFunction::kCanReturnErrors),
 
       NativeFunction("binary_string", {}, DataTypeVector{utf8()}, binary(),
-                     kResultNullIfNull, "binary_string", 
NativeFunction::kNeedsContext),
+                     kResultNullIfNull, "binary_string",
+                     NativeFunction::kNeedsContext | 
NativeFunction::kCanReturnErrors),
 
       NativeFunction("left", {}, DataTypeVector{utf8(), int32()}, utf8(),
                      kResultNullIfNull, "left_utf8_int32", 
NativeFunction::kNeedsContext),
diff --git a/cpp/src/gandiva/precompiled/string_ops.cc 
b/cpp/src/gandiva/precompiled/string_ops.cc
index 0b31c769c9..7450018a55 100644
--- a/cpp/src/gandiva/precompiled/string_ops.cc
+++ b/cpp/src/gandiva/precompiled/string_ops.cc
@@ -2252,6 +2252,11 @@ const char* right_utf8_int32(gdv_int64 context, const 
char* text, gdv_int32 text
 FORCE_INLINE
 const char* binary_string(gdv_int64 context, const char* text, gdv_int32 
text_len,
                           gdv_int32* out_len) {
+  if (text_len == 0) {
+    *out_len = 0;
+    return "";
+  }
+
   gdv_binary ret =
       reinterpret_cast<gdv_binary>(gdv_fn_context_arena_malloc(context, 
text_len));
 
@@ -2261,11 +2266,6 @@ const char* binary_string(gdv_int64 context, const char* 
text, gdv_int32 text_le
     return "";
   }
 
-  if (text_len == 0) {
-    *out_len = 0;
-    return "";
-  }
-
   // converting hex encoded string to normal string
   int j = 0;
   for (int i = 0; i < text_len; i++, j++) {
diff --git a/cpp/src/gandiva/precompiled/string_ops_test.cc 
b/cpp/src/gandiva/precompiled/string_ops_test.cc
index 9d0a4d71af..ca2b2b5785 100644
--- a/cpp/src/gandiva/precompiled/string_ops_test.cc
+++ b/cpp/src/gandiva/precompiled/string_ops_test.cc
@@ -1883,10 +1883,6 @@ TEST(TestStringOps, TestBinaryString) {
   std::string output = std::string(out_str, out_len);
   EXPECT_EQ(output, "TestString");
 
-  out_str = binary_string(ctx_ptr, "", 0, &out_len);
-  output = std::string(out_str, out_len);
-  EXPECT_EQ(output, "");
-
   out_str = binary_string(ctx_ptr, "T", 1, &out_len);
   output = std::string(out_str, out_len);
   EXPECT_EQ(output, "T");
@@ -1912,6 +1908,22 @@ TEST(TestStringOps, TestBinaryString) {
   EXPECT_EQ(output, "OM");
 }
 
+TEST(TestStringOps, TestBinaryStringNull) {
+  // This test is only valid if it is the first to trigger a memory allocation 
in the
+  // context.
+  gandiva::ExecutionContext ctx;
+  uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
+  gdv_int32 out_len = 0;
+  const char* out_str;
+
+  std::string output;
+
+  out_str = binary_string(ctx_ptr, "", 0, &out_len);
+  ASSERT_FALSE(ctx.has_error());
+  output = std::string(out_str, out_len);
+  EXPECT_EQ(output, "");
+}
+
 TEST(TestStringOps, TestSplitPart) {
   gandiva::ExecutionContext ctx;
   uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);

Reply via email to