This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 35717a7aef GH-48592: [C++] Use starts_with/ends_with methods (#48614)
35717a7aef is described below

commit 35717a7aef295738b59d3c4a1f31d6bce4f81f8f
Author: Jonah Kelman <[email protected]>
AuthorDate: Mon Jan 5 13:14:50 2026 -0500

    GH-48592: [C++] Use starts_with/ends_with methods (#48614)
    
    ### Rationale for this change
    
    The code previously used StartsWith and EndsWith utility functions. C++ now 
includes built-in starts_with and ends_with functions, rendering StartsWith and 
EndsWith unnecessary.
    
    ### What changes are included in this PR?
    
    All calls of StartsWith and EndsWith are replaces with starts_with and 
ends_with, respectively. The definition and tests for StartsWith and EndsWith 
have been deleted.
    
    ### Are these changes tested?
    
    Yes, all unit tests pass.
    
    ### Are there any user-facing changes?
    
    No.
    
    * GitHub Issue: #48592
    
    Authored-by: Jonah Kelman <[email protected]>
    Signed-off-by: Antoine Pitrou <[email protected]>
---
 cpp/src/arrow/acero/tpch_node_test.cc              |  4 +--
 cpp/src/arrow/compute/expression.cc                |  3 +-
 .../arrow/compute/kernels/scalar_string_ascii.cc   |  7 ++---
 cpp/src/arrow/dataset/discovery.cc                 |  4 +--
 cpp/src/arrow/dataset/subtree_test.cc              |  4 +--
 .../arrow/engine/simple_extension_type_internal.h  |  2 +-
 .../arrow/engine/substrait/relation_internal.cc    |  1 -
 cpp/src/arrow/filesystem/azurefs.cc                |  4 +--
 cpp/src/arrow/filesystem/azurefs_test.cc           |  3 +-
 cpp/src/arrow/filesystem/path_util.cc              |  6 ++--
 cpp/src/arrow/filesystem/s3fs.cc                   |  3 +-
 cpp/src/arrow/flight/transport/grpc/grpc_client.cc | 22 +++++++--------
 cpp/src/arrow/flight/types.cc                      |  2 +-
 cpp/src/arrow/json/chunker_test.cc                 |  6 ++--
 cpp/src/arrow/util/reflection_test.cc              |  2 +-
 cpp/src/arrow/util/string.h                        | 10 -------
 cpp/src/arrow/util/string_test.cc                  | 32 ++--------------------
 cpp/src/parquet/arrow/fuzz_internal.cc             |  2 +-
 cpp/src/parquet/arrow/schema.cc                    |  1 -
 cpp/src/parquet/geospatial/util_json_internal.cc   |  4 +--
 20 files changed, 33 insertions(+), 89 deletions(-)

diff --git a/cpp/src/arrow/acero/tpch_node_test.cc 
b/cpp/src/arrow/acero/tpch_node_test.cc
index f484d6c9d5..6321a8ca02 100644
--- a/cpp/src/arrow/acero/tpch_node_test.cc
+++ b/cpp/src/arrow/acero/tpch_node_test.cc
@@ -38,8 +38,6 @@
 
 namespace arrow {
 
-using arrow::internal::StartsWith;
-
 namespace acero {
 namespace internal {
 
@@ -100,7 +98,7 @@ void VerifyUniqueKey(std::unordered_set<int32_t>* seen, 
const Datum& d, int32_t
 void VerifyStringAndNumber_Single(std::string_view row, std::string_view 
prefix,
                                   const int64_t i, const int32_t* nums,
                                   bool verify_padding) {
-  ASSERT_TRUE(StartsWith(row, prefix)) << row << ", prefix=" << prefix << ", 
i=" << i;
+  ASSERT_TRUE(row.starts_with(prefix)) << row << ", prefix=" << prefix << ", 
i=" << i;
   const char* num_str = row.data() + prefix.size();
   const char* num_str_end = row.data() + row.size();
   int64_t num = 0;
diff --git a/cpp/src/arrow/compute/expression.cc 
b/cpp/src/arrow/compute/expression.cc
index 2563674a59..93427f75fe 100644
--- a/cpp/src/arrow/compute/expression.cc
+++ b/cpp/src/arrow/compute/expression.cc
@@ -47,7 +47,6 @@ namespace arrow {
 
 using internal::checked_cast;
 using internal::checked_pointer_cast;
-using internal::EndsWith;
 using internal::ToChars;
 
 namespace compute {
@@ -180,7 +179,7 @@ std::string Expression::ToString() const {
   }
 
   constexpr std::string_view kleene = "_kleene";
-  if (EndsWith(call->function_name, kleene)) {
+  if (call->function_name.ends_with(kleene)) {
     auto op = call->function_name.substr(0, call->function_name.size() - 
kleene.size());
     return binary(std::move(op));
   }
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc 
b/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc
index dd5abed16c..d4dda5556b 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc
@@ -38,9 +38,6 @@
 
 namespace arrow {
 
-using internal::EndsWith;
-using internal::StartsWith;
-
 namespace compute {
 namespace internal {
 
@@ -1291,7 +1288,7 @@ struct PlainStartsWithMatcher {
   }
 
   bool Match(std::string_view current) const {
-    return StartsWith(current, options_.pattern);
+    return current.starts_with(options_.pattern);
   }
 };
 
@@ -1309,7 +1306,7 @@ struct PlainEndsWithMatcher {
   }
 
   bool Match(std::string_view current) const {
-    return EndsWith(current, options_.pattern);
+    return current.ends_with(options_.pattern);
   }
 };
 
diff --git a/cpp/src/arrow/dataset/discovery.cc 
b/cpp/src/arrow/dataset/discovery.cc
index 5686e50e3c..c6f6e41d1e 100644
--- a/cpp/src/arrow/dataset/discovery.cc
+++ b/cpp/src/arrow/dataset/discovery.cc
@@ -35,8 +35,6 @@
 
 namespace arrow {
 
-using internal::StartsWith;
-
 namespace dataset {
 
 namespace {
@@ -49,7 +47,7 @@ bool StartsWithAnyOf(const std::string& path, const 
std::vector<std::string>& pr
   auto parts = fs::internal::SplitAbstractPath(path);
   return std::any_of(parts.cbegin(), parts.cend(), [&](std::string_view part) {
     return std::any_of(prefixes.cbegin(), prefixes.cend(),
-                       [&](std::string_view prefix) { return StartsWith(part, 
prefix); });
+                       [&](std::string_view prefix) { return 
part.starts_with(prefix); });
   });
 }
 
diff --git a/cpp/src/arrow/dataset/subtree_test.cc 
b/cpp/src/arrow/dataset/subtree_test.cc
index fc13c20ece..51c7d47acb 100644
--- a/cpp/src/arrow/dataset/subtree_test.cc
+++ b/cpp/src/arrow/dataset/subtree_test.cc
@@ -31,8 +31,6 @@
 
 namespace arrow {
 
-using internal::StartsWith;
-
 using compute::field_ref;
 using compute::literal;
 
@@ -112,7 +110,7 @@ bool IsAncestorOf(std::string_view ancestor, 
std::string_view descendant) {
   ancestor = RemoveTrailingSlash(ancestor);
   if (ancestor == "") return true;
   descendant = RemoveTrailingSlash(descendant);
-  if (!StartsWith(descendant, ancestor)) return false;
+  if (!descendant.starts_with(ancestor)) return false;
   descendant.remove_prefix(ancestor.size());
   if (descendant.empty()) return true;
   return descendant.front() == '/';
diff --git a/cpp/src/arrow/engine/simple_extension_type_internal.h 
b/cpp/src/arrow/engine/simple_extension_type_internal.h
index 1867fe5045..5124b6d294 100644
--- a/cpp/src/arrow/engine/simple_extension_type_internal.h
+++ b/cpp/src/arrow/engine/simple_extension_type_internal.h
@@ -111,7 +111,7 @@ class SimpleExtensionType : public ExtensionType {
     void Fail() { params_ = std::nullopt; }
 
     void Init(std::string_view class_name, std::string_view repr, size_t 
num_properties) {
-      if (!::arrow::internal::StartsWith(repr, class_name)) return Fail();
+      if (!repr.starts_with(class_name)) return Fail();
 
       repr = repr.substr(class_name.size());
       if (repr.empty()) return Fail();
diff --git a/cpp/src/arrow/engine/substrait/relation_internal.cc 
b/cpp/src/arrow/engine/substrait/relation_internal.cc
index 1ea143f9c5..b9e663ed7b 100644
--- a/cpp/src/arrow/engine/substrait/relation_internal.cc
+++ b/cpp/src/arrow/engine/substrait/relation_internal.cc
@@ -65,7 +65,6 @@
 namespace arrow {
 
 using internal::checked_cast;
-using internal::StartsWith;
 using internal::ToChars;
 using util::UriFromAbsolutePath;
 
diff --git a/cpp/src/arrow/filesystem/azurefs.cc 
b/cpp/src/arrow/filesystem/azurefs.cc
index 0ca18eed51..a3a162616e 100644
--- a/cpp/src/arrow/filesystem/azurefs.cc
+++ b/cpp/src/arrow/filesystem/azurefs.cc
@@ -70,10 +70,10 @@ void AzureOptions::ExtractFromUriSchemeAndHierPart(const 
Uri& uri,
                                                    std::string* out_path) {
   const auto host = uri.host();
   std::string path;
-  if (arrow::internal::EndsWith(host, blob_storage_authority)) {
+  if (host.ends_with(blob_storage_authority)) {
     account_name = host.substr(0, host.size() - blob_storage_authority.size());
     path = internal::RemoveLeadingSlash(uri.path());
-  } else if (arrow::internal::EndsWith(host, dfs_storage_authority)) {
+  } else if (host.ends_with(dfs_storage_authority)) {
     account_name = host.substr(0, host.size() - dfs_storage_authority.size());
     path = internal::ConcatAbstractPath(uri.username(), uri.path());
   } else {
diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc 
b/cpp/src/arrow/filesystem/azurefs_test.cc
index 43d1c2afb7..c3af6fb079 100644
--- a/cpp/src/arrow/filesystem/azurefs_test.cc
+++ b/cpp/src/arrow/filesystem/azurefs_test.cc
@@ -2856,8 +2856,7 @@ std::shared_ptr<const KeyValueMetadata> 
NormalizerKeyValueMetadata(
         value = "2023-10-31T08:15:20Z";
       }
     } else if (key == "ETag") {
-      if (arrow::internal::StartsWith(value, "\"") &&
-          arrow::internal::EndsWith(value, "\"")) {
+      if (value.starts_with("\"") && value.ends_with("\"")) {
         // Valid value
         value = "\"ETagValue\"";
       }
diff --git a/cpp/src/arrow/filesystem/path_util.cc 
b/cpp/src/arrow/filesystem/path_util.cc
index a48a34135a..dc82afd07e 100644
--- a/cpp/src/arrow/filesystem/path_util.cc
+++ b/cpp/src/arrow/filesystem/path_util.cc
@@ -29,8 +29,6 @@
 
 namespace arrow {
 
-using internal::StartsWith;
-
 namespace fs {
 namespace internal {
 
@@ -236,7 +234,7 @@ bool IsAncestorOf(std::string_view ancestor, 
std::string_view descendant) {
   }
 
   descendant = RemoveTrailingSlash(descendant);
-  if (!StartsWith(descendant, ancestor)) {
+  if (!descendant.starts_with(ancestor)) {
     // an ancestor path is a prefix of descendant paths
     return false;
   }
@@ -249,7 +247,7 @@ bool IsAncestorOf(std::string_view ancestor, 
std::string_view descendant) {
   }
 
   // "/hello/w" is not an ancestor of "/hello/world"
-  return StartsWith(descendant, std::string{kSep});
+  return descendant.starts_with(std::string{kSep});
 }
 
 std::optional<std::string_view> RemoveAncestor(std::string_view ancestor,
diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc
index c6b821f5de..f75fd970a1 100644
--- a/cpp/src/arrow/filesystem/s3fs.cc
+++ b/cpp/src/arrow/filesystem/s3fs.cc
@@ -1405,9 +1405,10 @@ bool IsDirectory(std::string_view key, const 
S3Model::HeadObjectResult& result)
   }
   // Otherwise, if its content type starts with "application/x-directory",
   // it's a directory
-  if (::arrow::internal::StartsWith(result.GetContentType(), 
kAwsDirectoryContentType)) {
+  if (result.GetContentType().starts_with(kAwsDirectoryContentType)) {
     return true;
   }
+
   // Otherwise, it's a regular file.
   return false;
 }
diff --git a/cpp/src/arrow/flight/transport/grpc/grpc_client.cc 
b/cpp/src/arrow/flight/transport/grpc/grpc_client.cc
index a2af830db7..737ec5fe91 100644
--- a/cpp/src/arrow/flight/transport/grpc/grpc_client.cc
+++ b/cpp/src/arrow/flight/transport/grpc/grpc_client.cc
@@ -62,8 +62,6 @@
 
 namespace arrow {
 
-using internal::EndsWith;
-
 namespace flight {
 namespace transport {
 namespace grpc {
@@ -176,25 +174,25 @@ class GrpcClientInterceptorAdapterFactory
 
     FlightMethod flight_method = FlightMethod::Invalid;
     std::string_view method(info->method());
-    if (EndsWith(method, "/Handshake")) {
+    if (method.ends_with("/Handshake")) {
       flight_method = FlightMethod::Handshake;
-    } else if (EndsWith(method, "/ListFlights")) {
+    } else if (method.ends_with("/ListFlights")) {
       flight_method = FlightMethod::ListFlights;
-    } else if (EndsWith(method, "/GetFlightInfo")) {
+    } else if (method.ends_with("/GetFlightInfo")) {
       flight_method = FlightMethod::GetFlightInfo;
-    } else if (EndsWith(method, "/PollFlightInfo")) {
+    } else if (method.ends_with("/PollFlightInfo")) {
       flight_method = FlightMethod::PollFlightInfo;
-    } else if (EndsWith(method, "/GetSchema")) {
+    } else if (method.ends_with("/GetSchema")) {
       flight_method = FlightMethod::GetSchema;
-    } else if (EndsWith(method, "/DoGet")) {
+    } else if (method.ends_with("/DoGet")) {
       flight_method = FlightMethod::DoGet;
-    } else if (EndsWith(method, "/DoPut")) {
+    } else if (method.ends_with("/DoPut")) {
       flight_method = FlightMethod::DoPut;
-    } else if (EndsWith(method, "/DoExchange")) {
+    } else if (method.ends_with("/DoExchange")) {
       flight_method = FlightMethod::DoExchange;
-    } else if (EndsWith(method, "/DoAction")) {
+    } else if (method.ends_with("/DoAction")) {
       flight_method = FlightMethod::DoAction;
-    } else if (EndsWith(method, "/ListActions")) {
+    } else if (method.ends_with("/ListActions")) {
       flight_method = FlightMethod::ListActions;
     } else {
       ARROW_LOG(WARNING) << "Unknown Flight method: " << info->method();
diff --git a/cpp/src/arrow/flight/types.cc b/cpp/src/arrow/flight/types.cc
index 759b1410bd..8166513d4e 100644
--- a/cpp/src/arrow/flight/types.cc
+++ b/cpp/src/arrow/flight/types.cc
@@ -1167,7 +1167,7 @@ std::string TransportStatusDetail::ToString() const {
     repr += "{\"";
     repr += key;
     repr += "\", ";
-    if (arrow::internal::EndsWith(key, "-bin")) {
+    if (key.ends_with("-bin")) {
       repr += arrow::util::base64_encode(value);
     } else {
       repr += "\"";
diff --git a/cpp/src/arrow/json/chunker_test.cc 
b/cpp/src/arrow/json/chunker_test.cc
index 1c26d52b14..0976e9ba22 100644
--- a/cpp/src/arrow/json/chunker_test.cc
+++ b/cpp/src/arrow/json/chunker_test.cc
@@ -34,8 +34,6 @@
 
 namespace arrow {
 
-using internal::StartsWith;
-
 namespace json {
 
 // Use no nested objects and no string literals containing braces in this test.
@@ -159,10 +157,10 @@ void AssertStraddledChunking(Chunker& chunker, const 
std::shared_ptr<Buffer>& bu
   AssertChunking(chunker, first_half, 1);
   std::shared_ptr<Buffer> first_whole, partial;
   ASSERT_OK(chunker.Process(first_half, &first_whole, &partial));
-  ASSERT_TRUE(StartsWith(std::string_view(*first_half), 
std::string_view(*first_whole)));
+  
ASSERT_TRUE(std::string_view(*first_half).starts_with(std::string_view(*first_whole)));
   std::shared_ptr<Buffer> completion, rest;
   ASSERT_OK(chunker.ProcessWithPartial(partial, second_half, &completion, 
&rest));
-  ASSERT_TRUE(StartsWith(std::string_view(*second_half), 
std::string_view(*completion)));
+  
ASSERT_TRUE(std::string_view(*second_half).starts_with(std::string_view(*completion)));
   std::shared_ptr<Buffer> straddling;
   ASSERT_OK_AND_ASSIGN(straddling, ConcatenateBuffers({partial, completion}));
   auto length = ConsumeWholeObject(&straddling);
diff --git a/cpp/src/arrow/util/reflection_test.cc 
b/cpp/src/arrow/util/reflection_test.cc
index d2d6379bec..2246c8fe7f 100644
--- a/cpp/src/arrow/util/reflection_test.cc
+++ b/cpp/src/arrow/util/reflection_test.cc
@@ -83,7 +83,7 @@ struct FromStringImpl {
   void Fail() { obj_ = std::nullopt; }
 
   void Init(std::string_view class_name, std::string_view repr, size_t 
num_properties) {
-    if (!StartsWith(repr, class_name)) return Fail();
+    if (!repr.starts_with(class_name)) return Fail();
 
     repr = repr.substr(class_name.size());
     if (repr.empty()) return Fail();
diff --git a/cpp/src/arrow/util/string.h b/cpp/src/arrow/util/string.h
index d39b7a295e..af8c948f48 100644
--- a/cpp/src/arrow/util/string.h
+++ b/cpp/src/arrow/util/string.h
@@ -52,16 +52,6 @@ ARROW_EXPORT Status ParseHexValues(std::string_view 
hex_string, uint8_t* out);
 
 namespace internal {
 
-/// Like std::string_view::starts_with in C++20
-inline bool StartsWith(std::string_view s, std::string_view prefix) {
-  return s.starts_with(prefix);
-}
-
-/// Like std::string_view::ends_with in C++20
-inline bool EndsWith(std::string_view s, std::string_view suffix) {
-  return s.ends_with(suffix);
-}
-
 /// \brief Split a string with a delimiter
 ARROW_EXPORT
 std::vector<std::string_view> SplitString(std::string_view v, char delim,
diff --git a/cpp/src/arrow/util/string_test.cc 
b/cpp/src/arrow/util/string_test.cc
index f222b938d5..8988eb9996 100644
--- a/cpp/src/arrow/util/string_test.cc
+++ b/cpp/src/arrow/util/string_test.cc
@@ -170,34 +170,6 @@ TEST(SplitString, LimitZero) {
   EXPECT_EQ(parts[2], "c");
 }
 
-TEST(StartsWith, Basics) {
-  std::string empty{};
-  std::string abc{"abc"};
-  std::string abcdef{"abcdef"};
-  std::string def{"def"};
-  ASSERT_TRUE(StartsWith(empty, empty));
-  ASSERT_TRUE(StartsWith(abc, empty));
-  ASSERT_TRUE(StartsWith(abc, abc));
-  ASSERT_TRUE(StartsWith(abcdef, abc));
-  ASSERT_FALSE(StartsWith(abc, abcdef));
-  ASSERT_FALSE(StartsWith(def, abcdef));
-  ASSERT_FALSE(StartsWith(abcdef, def));
-}
-
-TEST(EndsWith, Basics) {
-  std::string empty{};
-  std::string abc{"abc"};
-  std::string abcdef{"abcdef"};
-  std::string def{"def"};
-  ASSERT_TRUE(EndsWith(empty, empty));
-  ASSERT_TRUE(EndsWith(abc, empty));
-  ASSERT_TRUE(EndsWith(abc, abc));
-  ASSERT_TRUE(EndsWith(abcdef, def));
-  ASSERT_FALSE(EndsWith(abcdef, abc));
-  ASSERT_FALSE(EndsWith(def, abcdef));
-  ASSERT_FALSE(EndsWith(abcdef, abc));
-}
-
 TEST(RegexMatch, Basics) {
   std::regex regex("a+(b*)(c+)d+");
   std::string_view b, c;
@@ -260,9 +232,9 @@ TEST(ToChars, FloatingPoint) {
     // to std::to_string which may make ad hoc formatting choices, so we cannot
     // really test much about the result.
     auto result = ToChars(0.0f);
-    ASSERT_TRUE(StartsWith(result, "0")) << result;
+    ASSERT_TRUE(result.starts_with("0")) << result;
     result = ToChars(0.25);
-    ASSERT_TRUE(StartsWith(result, "0.25")) << result;
+    ASSERT_TRUE(result.starts_with("0.25")) << result;
   }
 }
 
diff --git a/cpp/src/parquet/arrow/fuzz_internal.cc 
b/cpp/src/parquet/arrow/fuzz_internal.cc
index b2e295b435..0c9d088ee8 100644
--- a/cpp/src/parquet/arrow/fuzz_internal.cc
+++ b/cpp/src/parquet/arrow/fuzz_internal.cc
@@ -80,7 +80,7 @@ class FuzzDecryptionKeyRetriever : public 
DecryptionKeyRetriever {
       return it->second;
     }
     // Is it a key generated by MakeEncryptionKey?
-    if (::arrow::internal::StartsWith(key_id, kInlineKeyPrefix)) {
+    if (key_id.starts_with(kInlineKeyPrefix)) {
       return SecureString(
           
::arrow::util::base64_decode(key_id.substr(kInlineKeyPrefix.length())));
     }
diff --git a/cpp/src/parquet/arrow/schema.cc b/cpp/src/parquet/arrow/schema.cc
index 293ae94b94..266215a810 100644
--- a/cpp/src/parquet/arrow/schema.cc
+++ b/cpp/src/parquet/arrow/schema.cc
@@ -49,7 +49,6 @@ using arrow::FieldVector;
 using arrow::KeyValueMetadata;
 using arrow::Status;
 using arrow::internal::checked_cast;
-using arrow::internal::EndsWith;
 using arrow::internal::ToChars;
 
 using ArrowType = arrow::DataType;
diff --git a/cpp/src/parquet/geospatial/util_json_internal.cc 
b/cpp/src/parquet/geospatial/util_json_internal.cc
index 0ca88f4c6c..6278ab8873 100644
--- a/cpp/src/parquet/geospatial/util_json_internal.cc
+++ b/cpp/src/parquet/geospatial/util_json_internal.cc
@@ -104,10 +104,10 @@ std::string EscapeCrsAsJsonIfRequired(std::string_view 
crs);
   // the format and pass on this information to GeoArrow.
   if (crs.empty()) {
     return R"("crs": "OGC:CRS84", "crs_type": "authority_code")";
-  } else if (::arrow::internal::StartsWith(crs, kSridPrefix)) {
+  } else if (crs.starts_with(kSridPrefix)) {
     return R"("crs": ")" + std::string(crs.substr(kSridPrefix.size())) +
            R"(", "crs_type": "srid")";
-  } else if (::arrow::internal::StartsWith(crs, kProjjsonPrefix)) {
+  } else if (crs.starts_with(kProjjsonPrefix)) {
     std::string_view metadata_field = crs.substr(kProjjsonPrefix.size());
     if (metadata && metadata->Contains(metadata_field)) {
       ARROW_ASSIGN_OR_RAISE(std::string projjson_value, 
metadata->Get(metadata_field));

Reply via email to