This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 8a778857e1 GH-49065: [C++] Remove unnecessary copies of shared_ptr in
Type::BOOL and Type::NA at GrouperImpl (#49066)
8a778857e1 is described below
commit 8a778857e1cbb951c4eed08d134ceb28f0eb7cd5
Author: Hyukjin Kwon <[email protected]>
AuthorDate: Fri Jan 30 17:31:37 2026 +0900
GH-49065: [C++] Remove unnecessary copies of shared_ptr in Type::BOOL and
Type::NA at GrouperImpl (#49066)
### Rationale for this change
The grouper code was creating a `shared_ptr<DataType>` for every key type,
even when it wasn't needed. This resulted in unnecessary reference counting
operations. For example, `BooleanKeyEncoder` and `NullKeyEncoder` don't require
a `shared_ptr` in their constructors, yet we were creating one for every key of
those types.
### What changes are included in this PR?
Changed `GrouperImpl::Make()` to use `TypeHolder` references directly and
only call `GetSharedPtr()` when needed by encoder constructors. This eliminates
`shared_ptr` creation for `Type::BOOL` and `Type::NA` cases. Other encoder
types (dictionary, fixed-width, binary) still require `shared_ptr` since their
constructors take `shared_ptr<DataType>` parameters for ownership.
### Are these changes tested?
Yes, existing tests.
### Are there any user-facing changes?
No.
* GitHub Issue: #49065
Authored-by: Hyukjin Kwon <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
---
cpp/src/arrow/compute/row/grouper.cc | 31 ++++++++++++++++---------------
1 file changed, 16 insertions(+), 15 deletions(-)
diff --git a/cpp/src/arrow/compute/row/grouper.cc
b/cpp/src/arrow/compute/row/grouper.cc
index d62333af37..a342e5a6b1 100644
--- a/cpp/src/arrow/compute/row/grouper.cc
+++ b/cpp/src/arrow/compute/row/grouper.cc
@@ -341,43 +341,44 @@ struct GrouperImpl : public Grouper {
impl->ctx_ = ctx;
for (size_t i = 0; i < key_types.size(); ++i) {
- // TODO(wesm): eliminate this probably unneeded shared_ptr copy
- std::shared_ptr<DataType> key = key_types[i].GetSharedPtr();
+ const auto& key_type = key_types[i];
- if (key->id() == Type::BOOL) {
+ if (key_type.id() == Type::BOOL) {
impl->encoders_[i] = std::make_unique<internal::BooleanKeyEncoder>();
continue;
}
- if (key->id() == Type::DICTIONARY) {
- impl->encoders_[i] =
- std::make_unique<internal::DictionaryKeyEncoder>(key,
ctx->memory_pool());
+ if (key_type.id() == Type::DICTIONARY) {
+ impl->encoders_[i] = std::make_unique<internal::DictionaryKeyEncoder>(
+ key_type.GetSharedPtr(), ctx->memory_pool());
continue;
}
- if (is_fixed_width(key->id())) {
- impl->encoders_[i] =
std::make_unique<internal::FixedWidthKeyEncoder>(key);
+ if (is_fixed_width(key_type.id())) {
+ impl->encoders_[i] =
+
std::make_unique<internal::FixedWidthKeyEncoder>(key_type.GetSharedPtr());
continue;
}
- if (is_binary_like(key->id())) {
- impl->encoders_[i] =
- std::make_unique<internal::VarLengthKeyEncoder<BinaryType>>(key);
+ if (is_binary_like(key_type.id())) {
+ impl->encoders_[i] =
std::make_unique<internal::VarLengthKeyEncoder<BinaryType>>(
+ key_type.GetSharedPtr());
continue;
}
- if (is_large_binary_like(key->id())) {
+ if (is_large_binary_like(key_type.id())) {
impl->encoders_[i] =
-
std::make_unique<internal::VarLengthKeyEncoder<LargeBinaryType>>(key);
+ std::make_unique<internal::VarLengthKeyEncoder<LargeBinaryType>>(
+ key_type.GetSharedPtr());
continue;
}
- if (key->id() == Type::NA) {
+ if (key_type.id() == Type::NA) {
impl->encoders_[i] = std::make_unique<internal::NullKeyEncoder>();
continue;
}
- return Status::NotImplemented("Keys of type ", *key);
+ return Status::NotImplemented("Keys of type ", *key_type);
}
return impl;