This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 35717a7aef GH-48592: [C++] Use starts_with/ends_with methods (#48614)
35717a7aef is described below
commit 35717a7aef295738b59d3c4a1f31d6bce4f81f8f
Author: Jonah Kelman <[email protected]>
AuthorDate: Mon Jan 5 13:14:50 2026 -0500
GH-48592: [C++] Use starts_with/ends_with methods (#48614)
### Rationale for this change
The code previously used StartsWith and EndsWith utility functions. C++ now
includes built-in starts_with and ends_with functions, rendering StartsWith and
EndsWith unnecessary.
### What changes are included in this PR?
All calls of StartsWith and EndsWith are replaces with starts_with and
ends_with, respectively. The definition and tests for StartsWith and EndsWith
have been deleted.
### Are these changes tested?
Yes, all unit tests pass.
### Are there any user-facing changes?
No.
* GitHub Issue: #48592
Authored-by: Jonah Kelman <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
---
cpp/src/arrow/acero/tpch_node_test.cc | 4 +--
cpp/src/arrow/compute/expression.cc | 3 +-
.../arrow/compute/kernels/scalar_string_ascii.cc | 7 ++---
cpp/src/arrow/dataset/discovery.cc | 4 +--
cpp/src/arrow/dataset/subtree_test.cc | 4 +--
.../arrow/engine/simple_extension_type_internal.h | 2 +-
.../arrow/engine/substrait/relation_internal.cc | 1 -
cpp/src/arrow/filesystem/azurefs.cc | 4 +--
cpp/src/arrow/filesystem/azurefs_test.cc | 3 +-
cpp/src/arrow/filesystem/path_util.cc | 6 ++--
cpp/src/arrow/filesystem/s3fs.cc | 3 +-
cpp/src/arrow/flight/transport/grpc/grpc_client.cc | 22 +++++++--------
cpp/src/arrow/flight/types.cc | 2 +-
cpp/src/arrow/json/chunker_test.cc | 6 ++--
cpp/src/arrow/util/reflection_test.cc | 2 +-
cpp/src/arrow/util/string.h | 10 -------
cpp/src/arrow/util/string_test.cc | 32 ++--------------------
cpp/src/parquet/arrow/fuzz_internal.cc | 2 +-
cpp/src/parquet/arrow/schema.cc | 1 -
cpp/src/parquet/geospatial/util_json_internal.cc | 4 +--
20 files changed, 33 insertions(+), 89 deletions(-)
diff --git a/cpp/src/arrow/acero/tpch_node_test.cc
b/cpp/src/arrow/acero/tpch_node_test.cc
index f484d6c9d5..6321a8ca02 100644
--- a/cpp/src/arrow/acero/tpch_node_test.cc
+++ b/cpp/src/arrow/acero/tpch_node_test.cc
@@ -38,8 +38,6 @@
namespace arrow {
-using arrow::internal::StartsWith;
-
namespace acero {
namespace internal {
@@ -100,7 +98,7 @@ void VerifyUniqueKey(std::unordered_set<int32_t>* seen,
const Datum& d, int32_t
void VerifyStringAndNumber_Single(std::string_view row, std::string_view
prefix,
const int64_t i, const int32_t* nums,
bool verify_padding) {
- ASSERT_TRUE(StartsWith(row, prefix)) << row << ", prefix=" << prefix << ",
i=" << i;
+ ASSERT_TRUE(row.starts_with(prefix)) << row << ", prefix=" << prefix << ",
i=" << i;
const char* num_str = row.data() + prefix.size();
const char* num_str_end = row.data() + row.size();
int64_t num = 0;
diff --git a/cpp/src/arrow/compute/expression.cc
b/cpp/src/arrow/compute/expression.cc
index 2563674a59..93427f75fe 100644
--- a/cpp/src/arrow/compute/expression.cc
+++ b/cpp/src/arrow/compute/expression.cc
@@ -47,7 +47,6 @@ namespace arrow {
using internal::checked_cast;
using internal::checked_pointer_cast;
-using internal::EndsWith;
using internal::ToChars;
namespace compute {
@@ -180,7 +179,7 @@ std::string Expression::ToString() const {
}
constexpr std::string_view kleene = "_kleene";
- if (EndsWith(call->function_name, kleene)) {
+ if (call->function_name.ends_with(kleene)) {
auto op = call->function_name.substr(0, call->function_name.size() -
kleene.size());
return binary(std::move(op));
}
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc
b/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc
index dd5abed16c..d4dda5556b 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc
@@ -38,9 +38,6 @@
namespace arrow {
-using internal::EndsWith;
-using internal::StartsWith;
-
namespace compute {
namespace internal {
@@ -1291,7 +1288,7 @@ struct PlainStartsWithMatcher {
}
bool Match(std::string_view current) const {
- return StartsWith(current, options_.pattern);
+ return current.starts_with(options_.pattern);
}
};
@@ -1309,7 +1306,7 @@ struct PlainEndsWithMatcher {
}
bool Match(std::string_view current) const {
- return EndsWith(current, options_.pattern);
+ return current.ends_with(options_.pattern);
}
};
diff --git a/cpp/src/arrow/dataset/discovery.cc
b/cpp/src/arrow/dataset/discovery.cc
index 5686e50e3c..c6f6e41d1e 100644
--- a/cpp/src/arrow/dataset/discovery.cc
+++ b/cpp/src/arrow/dataset/discovery.cc
@@ -35,8 +35,6 @@
namespace arrow {
-using internal::StartsWith;
-
namespace dataset {
namespace {
@@ -49,7 +47,7 @@ bool StartsWithAnyOf(const std::string& path, const
std::vector<std::string>& pr
auto parts = fs::internal::SplitAbstractPath(path);
return std::any_of(parts.cbegin(), parts.cend(), [&](std::string_view part) {
return std::any_of(prefixes.cbegin(), prefixes.cend(),
- [&](std::string_view prefix) { return StartsWith(part,
prefix); });
+ [&](std::string_view prefix) { return
part.starts_with(prefix); });
});
}
diff --git a/cpp/src/arrow/dataset/subtree_test.cc
b/cpp/src/arrow/dataset/subtree_test.cc
index fc13c20ece..51c7d47acb 100644
--- a/cpp/src/arrow/dataset/subtree_test.cc
+++ b/cpp/src/arrow/dataset/subtree_test.cc
@@ -31,8 +31,6 @@
namespace arrow {
-using internal::StartsWith;
-
using compute::field_ref;
using compute::literal;
@@ -112,7 +110,7 @@ bool IsAncestorOf(std::string_view ancestor,
std::string_view descendant) {
ancestor = RemoveTrailingSlash(ancestor);
if (ancestor == "") return true;
descendant = RemoveTrailingSlash(descendant);
- if (!StartsWith(descendant, ancestor)) return false;
+ if (!descendant.starts_with(ancestor)) return false;
descendant.remove_prefix(ancestor.size());
if (descendant.empty()) return true;
return descendant.front() == '/';
diff --git a/cpp/src/arrow/engine/simple_extension_type_internal.h
b/cpp/src/arrow/engine/simple_extension_type_internal.h
index 1867fe5045..5124b6d294 100644
--- a/cpp/src/arrow/engine/simple_extension_type_internal.h
+++ b/cpp/src/arrow/engine/simple_extension_type_internal.h
@@ -111,7 +111,7 @@ class SimpleExtensionType : public ExtensionType {
void Fail() { params_ = std::nullopt; }
void Init(std::string_view class_name, std::string_view repr, size_t
num_properties) {
- if (!::arrow::internal::StartsWith(repr, class_name)) return Fail();
+ if (!repr.starts_with(class_name)) return Fail();
repr = repr.substr(class_name.size());
if (repr.empty()) return Fail();
diff --git a/cpp/src/arrow/engine/substrait/relation_internal.cc
b/cpp/src/arrow/engine/substrait/relation_internal.cc
index 1ea143f9c5..b9e663ed7b 100644
--- a/cpp/src/arrow/engine/substrait/relation_internal.cc
+++ b/cpp/src/arrow/engine/substrait/relation_internal.cc
@@ -65,7 +65,6 @@
namespace arrow {
using internal::checked_cast;
-using internal::StartsWith;
using internal::ToChars;
using util::UriFromAbsolutePath;
diff --git a/cpp/src/arrow/filesystem/azurefs.cc
b/cpp/src/arrow/filesystem/azurefs.cc
index 0ca18eed51..a3a162616e 100644
--- a/cpp/src/arrow/filesystem/azurefs.cc
+++ b/cpp/src/arrow/filesystem/azurefs.cc
@@ -70,10 +70,10 @@ void AzureOptions::ExtractFromUriSchemeAndHierPart(const
Uri& uri,
std::string* out_path) {
const auto host = uri.host();
std::string path;
- if (arrow::internal::EndsWith(host, blob_storage_authority)) {
+ if (host.ends_with(blob_storage_authority)) {
account_name = host.substr(0, host.size() - blob_storage_authority.size());
path = internal::RemoveLeadingSlash(uri.path());
- } else if (arrow::internal::EndsWith(host, dfs_storage_authority)) {
+ } else if (host.ends_with(dfs_storage_authority)) {
account_name = host.substr(0, host.size() - dfs_storage_authority.size());
path = internal::ConcatAbstractPath(uri.username(), uri.path());
} else {
diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc
b/cpp/src/arrow/filesystem/azurefs_test.cc
index 43d1c2afb7..c3af6fb079 100644
--- a/cpp/src/arrow/filesystem/azurefs_test.cc
+++ b/cpp/src/arrow/filesystem/azurefs_test.cc
@@ -2856,8 +2856,7 @@ std::shared_ptr<const KeyValueMetadata>
NormalizerKeyValueMetadata(
value = "2023-10-31T08:15:20Z";
}
} else if (key == "ETag") {
- if (arrow::internal::StartsWith(value, "\"") &&
- arrow::internal::EndsWith(value, "\"")) {
+ if (value.starts_with("\"") && value.ends_with("\"")) {
// Valid value
value = "\"ETagValue\"";
}
diff --git a/cpp/src/arrow/filesystem/path_util.cc
b/cpp/src/arrow/filesystem/path_util.cc
index a48a34135a..dc82afd07e 100644
--- a/cpp/src/arrow/filesystem/path_util.cc
+++ b/cpp/src/arrow/filesystem/path_util.cc
@@ -29,8 +29,6 @@
namespace arrow {
-using internal::StartsWith;
-
namespace fs {
namespace internal {
@@ -236,7 +234,7 @@ bool IsAncestorOf(std::string_view ancestor,
std::string_view descendant) {
}
descendant = RemoveTrailingSlash(descendant);
- if (!StartsWith(descendant, ancestor)) {
+ if (!descendant.starts_with(ancestor)) {
// an ancestor path is a prefix of descendant paths
return false;
}
@@ -249,7 +247,7 @@ bool IsAncestorOf(std::string_view ancestor,
std::string_view descendant) {
}
// "/hello/w" is not an ancestor of "/hello/world"
- return StartsWith(descendant, std::string{kSep});
+ return descendant.starts_with(std::string{kSep});
}
std::optional<std::string_view> RemoveAncestor(std::string_view ancestor,
diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc
index c6b821f5de..f75fd970a1 100644
--- a/cpp/src/arrow/filesystem/s3fs.cc
+++ b/cpp/src/arrow/filesystem/s3fs.cc
@@ -1405,9 +1405,10 @@ bool IsDirectory(std::string_view key, const
S3Model::HeadObjectResult& result)
}
// Otherwise, if its content type starts with "application/x-directory",
// it's a directory
- if (::arrow::internal::StartsWith(result.GetContentType(),
kAwsDirectoryContentType)) {
+ if (result.GetContentType().starts_with(kAwsDirectoryContentType)) {
return true;
}
+
// Otherwise, it's a regular file.
return false;
}
diff --git a/cpp/src/arrow/flight/transport/grpc/grpc_client.cc
b/cpp/src/arrow/flight/transport/grpc/grpc_client.cc
index a2af830db7..737ec5fe91 100644
--- a/cpp/src/arrow/flight/transport/grpc/grpc_client.cc
+++ b/cpp/src/arrow/flight/transport/grpc/grpc_client.cc
@@ -62,8 +62,6 @@
namespace arrow {
-using internal::EndsWith;
-
namespace flight {
namespace transport {
namespace grpc {
@@ -176,25 +174,25 @@ class GrpcClientInterceptorAdapterFactory
FlightMethod flight_method = FlightMethod::Invalid;
std::string_view method(info->method());
- if (EndsWith(method, "/Handshake")) {
+ if (method.ends_with("/Handshake")) {
flight_method = FlightMethod::Handshake;
- } else if (EndsWith(method, "/ListFlights")) {
+ } else if (method.ends_with("/ListFlights")) {
flight_method = FlightMethod::ListFlights;
- } else if (EndsWith(method, "/GetFlightInfo")) {
+ } else if (method.ends_with("/GetFlightInfo")) {
flight_method = FlightMethod::GetFlightInfo;
- } else if (EndsWith(method, "/PollFlightInfo")) {
+ } else if (method.ends_with("/PollFlightInfo")) {
flight_method = FlightMethod::PollFlightInfo;
- } else if (EndsWith(method, "/GetSchema")) {
+ } else if (method.ends_with("/GetSchema")) {
flight_method = FlightMethod::GetSchema;
- } else if (EndsWith(method, "/DoGet")) {
+ } else if (method.ends_with("/DoGet")) {
flight_method = FlightMethod::DoGet;
- } else if (EndsWith(method, "/DoPut")) {
+ } else if (method.ends_with("/DoPut")) {
flight_method = FlightMethod::DoPut;
- } else if (EndsWith(method, "/DoExchange")) {
+ } else if (method.ends_with("/DoExchange")) {
flight_method = FlightMethod::DoExchange;
- } else if (EndsWith(method, "/DoAction")) {
+ } else if (method.ends_with("/DoAction")) {
flight_method = FlightMethod::DoAction;
- } else if (EndsWith(method, "/ListActions")) {
+ } else if (method.ends_with("/ListActions")) {
flight_method = FlightMethod::ListActions;
} else {
ARROW_LOG(WARNING) << "Unknown Flight method: " << info->method();
diff --git a/cpp/src/arrow/flight/types.cc b/cpp/src/arrow/flight/types.cc
index 759b1410bd..8166513d4e 100644
--- a/cpp/src/arrow/flight/types.cc
+++ b/cpp/src/arrow/flight/types.cc
@@ -1167,7 +1167,7 @@ std::string TransportStatusDetail::ToString() const {
repr += "{\"";
repr += key;
repr += "\", ";
- if (arrow::internal::EndsWith(key, "-bin")) {
+ if (key.ends_with("-bin")) {
repr += arrow::util::base64_encode(value);
} else {
repr += "\"";
diff --git a/cpp/src/arrow/json/chunker_test.cc
b/cpp/src/arrow/json/chunker_test.cc
index 1c26d52b14..0976e9ba22 100644
--- a/cpp/src/arrow/json/chunker_test.cc
+++ b/cpp/src/arrow/json/chunker_test.cc
@@ -34,8 +34,6 @@
namespace arrow {
-using internal::StartsWith;
-
namespace json {
// Use no nested objects and no string literals containing braces in this test.
@@ -159,10 +157,10 @@ void AssertStraddledChunking(Chunker& chunker, const
std::shared_ptr<Buffer>& bu
AssertChunking(chunker, first_half, 1);
std::shared_ptr<Buffer> first_whole, partial;
ASSERT_OK(chunker.Process(first_half, &first_whole, &partial));
- ASSERT_TRUE(StartsWith(std::string_view(*first_half),
std::string_view(*first_whole)));
+
ASSERT_TRUE(std::string_view(*first_half).starts_with(std::string_view(*first_whole)));
std::shared_ptr<Buffer> completion, rest;
ASSERT_OK(chunker.ProcessWithPartial(partial, second_half, &completion,
&rest));
- ASSERT_TRUE(StartsWith(std::string_view(*second_half),
std::string_view(*completion)));
+
ASSERT_TRUE(std::string_view(*second_half).starts_with(std::string_view(*completion)));
std::shared_ptr<Buffer> straddling;
ASSERT_OK_AND_ASSIGN(straddling, ConcatenateBuffers({partial, completion}));
auto length = ConsumeWholeObject(&straddling);
diff --git a/cpp/src/arrow/util/reflection_test.cc
b/cpp/src/arrow/util/reflection_test.cc
index d2d6379bec..2246c8fe7f 100644
--- a/cpp/src/arrow/util/reflection_test.cc
+++ b/cpp/src/arrow/util/reflection_test.cc
@@ -83,7 +83,7 @@ struct FromStringImpl {
void Fail() { obj_ = std::nullopt; }
void Init(std::string_view class_name, std::string_view repr, size_t
num_properties) {
- if (!StartsWith(repr, class_name)) return Fail();
+ if (!repr.starts_with(class_name)) return Fail();
repr = repr.substr(class_name.size());
if (repr.empty()) return Fail();
diff --git a/cpp/src/arrow/util/string.h b/cpp/src/arrow/util/string.h
index d39b7a295e..af8c948f48 100644
--- a/cpp/src/arrow/util/string.h
+++ b/cpp/src/arrow/util/string.h
@@ -52,16 +52,6 @@ ARROW_EXPORT Status ParseHexValues(std::string_view
hex_string, uint8_t* out);
namespace internal {
-/// Like std::string_view::starts_with in C++20
-inline bool StartsWith(std::string_view s, std::string_view prefix) {
- return s.starts_with(prefix);
-}
-
-/// Like std::string_view::ends_with in C++20
-inline bool EndsWith(std::string_view s, std::string_view suffix) {
- return s.ends_with(suffix);
-}
-
/// \brief Split a string with a delimiter
ARROW_EXPORT
std::vector<std::string_view> SplitString(std::string_view v, char delim,
diff --git a/cpp/src/arrow/util/string_test.cc
b/cpp/src/arrow/util/string_test.cc
index f222b938d5..8988eb9996 100644
--- a/cpp/src/arrow/util/string_test.cc
+++ b/cpp/src/arrow/util/string_test.cc
@@ -170,34 +170,6 @@ TEST(SplitString, LimitZero) {
EXPECT_EQ(parts[2], "c");
}
-TEST(StartsWith, Basics) {
- std::string empty{};
- std::string abc{"abc"};
- std::string abcdef{"abcdef"};
- std::string def{"def"};
- ASSERT_TRUE(StartsWith(empty, empty));
- ASSERT_TRUE(StartsWith(abc, empty));
- ASSERT_TRUE(StartsWith(abc, abc));
- ASSERT_TRUE(StartsWith(abcdef, abc));
- ASSERT_FALSE(StartsWith(abc, abcdef));
- ASSERT_FALSE(StartsWith(def, abcdef));
- ASSERT_FALSE(StartsWith(abcdef, def));
-}
-
-TEST(EndsWith, Basics) {
- std::string empty{};
- std::string abc{"abc"};
- std::string abcdef{"abcdef"};
- std::string def{"def"};
- ASSERT_TRUE(EndsWith(empty, empty));
- ASSERT_TRUE(EndsWith(abc, empty));
- ASSERT_TRUE(EndsWith(abc, abc));
- ASSERT_TRUE(EndsWith(abcdef, def));
- ASSERT_FALSE(EndsWith(abcdef, abc));
- ASSERT_FALSE(EndsWith(def, abcdef));
- ASSERT_FALSE(EndsWith(abcdef, abc));
-}
-
TEST(RegexMatch, Basics) {
std::regex regex("a+(b*)(c+)d+");
std::string_view b, c;
@@ -260,9 +232,9 @@ TEST(ToChars, FloatingPoint) {
// to std::to_string which may make ad hoc formatting choices, so we cannot
// really test much about the result.
auto result = ToChars(0.0f);
- ASSERT_TRUE(StartsWith(result, "0")) << result;
+ ASSERT_TRUE(result.starts_with("0")) << result;
result = ToChars(0.25);
- ASSERT_TRUE(StartsWith(result, "0.25")) << result;
+ ASSERT_TRUE(result.starts_with("0.25")) << result;
}
}
diff --git a/cpp/src/parquet/arrow/fuzz_internal.cc
b/cpp/src/parquet/arrow/fuzz_internal.cc
index b2e295b435..0c9d088ee8 100644
--- a/cpp/src/parquet/arrow/fuzz_internal.cc
+++ b/cpp/src/parquet/arrow/fuzz_internal.cc
@@ -80,7 +80,7 @@ class FuzzDecryptionKeyRetriever : public
DecryptionKeyRetriever {
return it->second;
}
// Is it a key generated by MakeEncryptionKey?
- if (::arrow::internal::StartsWith(key_id, kInlineKeyPrefix)) {
+ if (key_id.starts_with(kInlineKeyPrefix)) {
return SecureString(
::arrow::util::base64_decode(key_id.substr(kInlineKeyPrefix.length())));
}
diff --git a/cpp/src/parquet/arrow/schema.cc b/cpp/src/parquet/arrow/schema.cc
index 293ae94b94..266215a810 100644
--- a/cpp/src/parquet/arrow/schema.cc
+++ b/cpp/src/parquet/arrow/schema.cc
@@ -49,7 +49,6 @@ using arrow::FieldVector;
using arrow::KeyValueMetadata;
using arrow::Status;
using arrow::internal::checked_cast;
-using arrow::internal::EndsWith;
using arrow::internal::ToChars;
using ArrowType = arrow::DataType;
diff --git a/cpp/src/parquet/geospatial/util_json_internal.cc
b/cpp/src/parquet/geospatial/util_json_internal.cc
index 0ca88f4c6c..6278ab8873 100644
--- a/cpp/src/parquet/geospatial/util_json_internal.cc
+++ b/cpp/src/parquet/geospatial/util_json_internal.cc
@@ -104,10 +104,10 @@ std::string EscapeCrsAsJsonIfRequired(std::string_view
crs);
// the format and pass on this information to GeoArrow.
if (crs.empty()) {
return R"("crs": "OGC:CRS84", "crs_type": "authority_code")";
- } else if (::arrow::internal::StartsWith(crs, kSridPrefix)) {
+ } else if (crs.starts_with(kSridPrefix)) {
return R"("crs": ")" + std::string(crs.substr(kSridPrefix.size())) +
R"(", "crs_type": "srid")";
- } else if (::arrow::internal::StartsWith(crs, kProjjsonPrefix)) {
+ } else if (crs.starts_with(kProjjsonPrefix)) {
std::string_view metadata_field = crs.substr(kProjjsonPrefix.size());
if (metadata && metadata->Contains(metadata_field)) {
ARROW_ASSIGN_OR_RAISE(std::string projjson_value,
metadata->Get(metadata_field));