Copilot commented on code in PR #49462:
URL: https://github.com/apache/arrow/pull/49462#discussion_r3008801947


##########
cpp/src/arrow/json/reader_test.cc:
##########
@@ -290,6 +290,29 @@ TEST(ReaderTest, MultipleChunksParallel) {
   AssertTablesEqual(*serial, *threaded);
 }
 
+// Regression test for intermittent threading crashes on MinGW.
+// Run this test multiple times manually to stress-test:
+//   while build/debug/arrow-json-test \
+//       --gtest_filter=ReaderTest.MultipleChunksParallelRegression; do :; done
+// See https://github.com/apache/arrow/issues/49272
+TEST(ReaderTest, MultipleChunksParallelRegression) {
+  int64_t count = 1 << 10;
+  ParseOptions parse_options;
+  parse_options.unexpected_field_behavior = UnexpectedFieldBehavior::InferType;
+  ReadOptions read_options;
+  read_options.block_size = static_cast<int>(count / 2);
+  read_options.use_threads = true;
+
+  std::string json;
+  for (int i = 0; i < count; ++i) {
+    json += "{\"a\":" + std::to_string(i) + "}\n";
+  }
+
+  ASSERT_OK_AND_ASSIGN(auto table,
+                       ReadToTable(std::move(json), read_options, 
parse_options));

Review Comment:
   `MultipleChunksParallelRegression` duplicates setup and JSON construction 
logic from `MultipleChunksParallel`. Consider refactoring to reuse the existing 
helper/logic (or wrapping the existing test body in a loop) to avoid divergence 
and reduce maintenance burden if the test input needs to change.
   ```suggestion
   
   // Helper used by multiple-chunk parallel tests to build the JSON input,
   // configure read options and read the resulting table.
   static Result<std::shared_ptr<Table>> ReadMultipleChunksParallelTable(
       int64_t count, bool use_threads, const ParseOptions& parse_options) {
     ReadOptions read_options;
     read_options.block_size = static_cast<int>(count / 2);
     read_options.use_threads = use_threads;
   
     std::string json;
     json.reserve(static_cast<size_t>(count) * 16);  // rough reserve to avoid 
reallocations
     for (int64_t i = 0; i < count; ++i) {
       json += "{\"a\":" + std::to_string(i) + "}\n";
     }
   
     return ReadToTable(std::move(json), read_options, parse_options);
   }
   
   TEST(ReaderTest, MultipleChunksParallelRegression) {
     int64_t count = 1 << 10;
     ParseOptions parse_options;
     parse_options.unexpected_field_behavior = 
UnexpectedFieldBehavior::InferType;
   
     ASSERT_OK_AND_ASSIGN(auto table, ReadMultipleChunksParallelTable(
                                          count, /*use_threads=*/true, 
parse_options));
   ```



##########
cpp/src/arrow/util/thread_pool.cc:
##########
@@ -630,9 +631,48 @@ void ThreadPool::CollectFinishedWorkersUnlocked() {
   state_->finished_workers_.clear();
 }
 
+// MinGW's __emutls implementation for C++ thread_local has known race 
conditions
+// during thread creation that can cause segfaults. Use native Win32 TLS 
instead.
+// See https://github.com/apache/arrow/issues/49272
+#  ifdef __MINGW32__
+
+namespace {
+DWORD GetPoolTlsIndex() {
+  static DWORD index = [] {
+    DWORD i = TlsAlloc();
+    if (i == TLS_OUT_OF_INDEXES) {
+      ARROW_LOG(FATAL) << "TlsAlloc failed for thread pool TLS: "
+                       << WinErrorMessage(GetLastError());
+    }
+    return i;
+  }();
+  return index;
+}
+}  // namespace
+
+static ThreadPool* GetCurrentThreadPool() {
+  // TlsGetValue() clears the calling thread's last-error state on success.
+  DWORD last_error = GetLastError();
+  auto* pool = static_cast<ThreadPool*>(TlsGetValue(GetPoolTlsIndex()));
+  SetLastError(last_error);

Review Comment:
   On MinGW, `GetCurrentThreadPool()` uses `TlsGetValue()` but doesn’t 
distinguish between “no value set” and “TLS lookup failed” (both return 
`NULL`). Since the function also restores the caller’s last-error value, a real 
TLS failure would be silently ignored and could make `OwnsThisThread()` return 
false unexpectedly. Consider temporarily setting last-error to 0 before 
`TlsGetValue()`, checking `GetLastError()` after the call, and only then 
restoring the original last-error (and/or logging/failing on TLS errors).
   ```suggestion
     // Preserve the caller's last-error value while also detecting TLS 
failures.
     DWORD original_error = GetLastError();
     // Ensure a successful TlsGetValue() leaves GetLastError() == 0.
     SetLastError(0);
     auto* pool = static_cast<ThreadPool*>(TlsGetValue(GetPoolTlsIndex()));
     DWORD tls_error = GetLastError();
     if (tls_error != 0) {
       // Restore the original error before logging a fatal TLS failure.
       SetLastError(original_error);
       ARROW_LOG(FATAL) << "TlsGetValue failed for thread pool TLS: "
                        << WinErrorMessage(tls_error);
     }
     // No TLS error: restore the caller's last-error value and return the pool.
     SetLastError(original_error);
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to