This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 338459608a GH-48673: [C++] Fix ToStringWithoutContextLines to check 
for :\d+ pattern before removing lines (#48674)
338459608a is described below

commit 338459608a78742af81ae444bc8053f0a2e4cdb4
Author: Hyukjin Kwon <[email protected]>
AuthorDate: Thu Jan 29 20:24:03 2026 +0900

    GH-48673: [C++] Fix ToStringWithoutContextLines to check for :\d+ pattern 
before removing lines (#48674)
    
    ### Rationale for this change
    
    This PR proposes to fix the todo 
https://github.com/apache/arrow/blob/7ebc88c8fae62ed97bc30865c845c8061132af7e/cpp/src/arrow/status.cc#L131-L134
 which would allows a better parsing for line numbers.
    
    I could not find the relevant example to demonstrate within this project 
but assume that we have a test such as:
    
    (Generated by ChatGPT)
    
    ```cpp
    TEST(BlockParser, ErrorMessageWithColonsPreserved) {
      Status st(StatusCode::Invalid,
                "CSV parse error: Row #2: Expected 2 columns, got 3: 
12:34:56,key:value,data\n"
                "Error details: Time format: 12:34:56, Key: value\n"
                "parser_test.cc:940  Parse(parser, csv, &out_size)");
    
      std::string expected_msg =
          "Invalid: CSV parse error: Row #2: Expected 2 columns, got 3: 
12:34:56,key:value,data\n"
          "Error details: Time format: 12:34:56, Key: value";
    
      ASSERT_RAISES_WITH_MESSAGE(Invalid, expected_msg, st);
    }
    
    // Test with URL-like data (another common case with colons)
    TEST(BlockParser, ErrorMessageWithURLPreserved) {
      Status st(StatusCode::Invalid,
                "CSV parse error: Row #2: Expected 1 columns, got 2: 
http://arrow.apache.org:8080/api,data\n";
                "URL: http://arrow.apache.org:8080/api\n";
                "parser_test.cc:974  Parse(parser, csv, &out_size)");
    
      std::string expected_msg =
          "Invalid: CSV parse error: Row #2: Expected 1 columns, got 2: 
http://arrow.apache.org:8080/api,data\n";
          "URL: http://arrow.apache.org:8080/api";;
    
      ASSERT_RAISES_WITH_MESSAGE(Invalid, expected_msg, st);
    }
    ```
    
    then it fails.
    
    ### What changes are included in this PR?
    
    Fixed `Status::ToStringWithoutContextLines()` to only remove context lines 
matching the `filename:line` pattern (`:\d+`), preventing legitimate error 
messages containing colons from being incorrectly stripped.
    
    ### Are these changes tested?
    
    Manually tested, and unittests were added, with `cmake .. --preset 
ninja-debug -DARROW_EXTRA_ERROR_CONTEXT=ON`.
    
    ### Are there any user-facing changes?
    
    No, test-only.
    
    * GitHub Issue: #48673
    
    Authored-by: Hyukjin Kwon <[email protected]>
    Signed-off-by: Sutou Kouhei <[email protected]>
---
 cpp/src/arrow/status.cc      | 22 ++++++++++++++++++++--
 cpp/src/arrow/status_test.cc | 17 +++++++++++++++++
 2 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/cpp/src/arrow/status.cc b/cpp/src/arrow/status.cc
index 55ce3fb78d..4730bca8c6 100644
--- a/cpp/src/arrow/status.cc
+++ b/cpp/src/arrow/status.cc
@@ -13,6 +13,7 @@
 #include "arrow/status.h"
 
 #include <cassert>
+#include <cctype>
 #include <cstdlib>
 #include <iostream>
 #ifdef ARROW_EXTRA_ERROR_CONTEXT
@@ -131,8 +132,25 @@ std::string Status::ToStringWithoutContextLines() const {
     if (last_new_line_position == std::string::npos) {
       break;
     }
-    // TODO: We may want to check /:\d+ /
-    if (message.find(":", last_new_line_position) == std::string::npos) {
+    // Check for the pattern ":\d+ " (colon followed by one or more digits and 
a space)
+    // to identify context lines in the format "filename:line  expr"
+    auto colon_position = message.find(":", last_new_line_position);
+    if (colon_position == std::string::npos) {
+      break;
+    }
+    // Verify that the colon is followed by one or more digits and then a space
+    size_t pos = colon_position + 1;
+    if (pos >= message.size() ||
+        !std::isdigit(static_cast<unsigned char>(message[pos]))) {
+      break;
+    }
+    // Skip all digits
+    while (pos < message.size() &&
+           std::isdigit(static_cast<unsigned char>(message[pos]))) {
+      pos++;
+    }
+    // Check if followed by a space
+    if (pos >= message.size() || message[pos] != ' ') {
       break;
     }
     message = message.substr(0, last_new_line_position);
diff --git a/cpp/src/arrow/status_test.cc b/cpp/src/arrow/status_test.cc
index 39a52bd2ba..72998cba78 100644
--- a/cpp/src/arrow/status_test.cc
+++ b/cpp/src/arrow/status_test.cc
@@ -342,4 +342,21 @@ TEST(StatusTest, ReturnIfNotOk) {
   ASSERT_EQ(StripContext(st.message()), "StatusLike: 43");
 }
 
+#ifdef ARROW_EXTRA_ERROR_CONTEXT
+TEST(StatusTest, ToStringWithoutContextLines) {
+  Status status = Status::IOError("base error");
+  status.AddContextLine("file1.cc", 42, "expr");
+  status.AddContextLine("file2.cc", 100, "expr");
+
+  ASSERT_EQ(status.ToStringWithoutContextLines(), "IOError: base error");
+
+  Status status2(StatusCode::Invalid,
+                 "Error message\nThis line has: a colon but no digits");
+  status2.AddContextLine("file.cc", 20, "expr");
+
+  ASSERT_EQ(status2.ToStringWithoutContextLines(),
+            "Invalid: Error message\nThis line has: a colon but no digits");
+}
+#endif
+
 }  // namespace arrow

Reply via email to