This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 961258d211 GH-48954: [C++] Add test for null-type dictionary sorting
and clarify XXX comment (#48955)
961258d211 is described below
commit 961258d2111b4a50f47f7ad9f72d6a20fabae983
Author: Hyukjin Kwon <[email protected]>
AuthorDate: Wed Feb 4 23:02:25 2026 +0900
GH-48954: [C++] Add test for null-type dictionary sorting and clarify XXX
comment (#48955)
### Rationale for this change
Null-type dictionaries (e.g., `dictionary(int8(), null())`) are valid Arrow
constructs supported from day one, but the sorting code had an uncertain `XXX
Should this support Type::NA?` comment. We should explicitly support and test
this because other functions already support this:
```python
import pyarrow as pa
import pyarrow.compute as pc
pc.array_sort_indices(pa.array([None, None, None, None], type=pa.int32()))
# [0, 1, 2, 3]
pc.array_sort_indices(pa.DictionaryArray.from_arrays(
indices=pa.array([None, None, None, None], type=pa.int8()),
dictionary=pa.array([], type=pa.null())
))
# [0, 1, 2, 3]
```
I believe it does not make sense to specifically disallow this in
dictionaries at this point.
### What changes are included in this PR?
Added a unittest for null sorting behaviour.
### Are these changes tested?
Yes, the unittest was added.
### Are there any user-facing changes?
No.
* GitHub Issue: #48954
Authored-by: Hyukjin Kwon <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
---
cpp/src/arrow/compute/kernels/vector_array_sort.cc | 1 -
cpp/src/arrow/compute/kernels/vector_sort_test.cc | 21 +++++++++++++++++++++
2 files changed, 21 insertions(+), 1 deletion(-)
diff --git a/cpp/src/arrow/compute/kernels/vector_array_sort.cc
b/cpp/src/arrow/compute/kernels/vector_array_sort.cc
index 0c27808dd1..6e7068f6ec 100644
--- a/cpp/src/arrow/compute/kernels/vector_array_sort.cc
+++ b/cpp/src/arrow/compute/kernels/vector_array_sort.cc
@@ -235,7 +235,6 @@ class ArrayCompareSorter<DictionaryType> {
RankOptions rank_options(SortOrder::Ascending, NullPlacement::AtEnd,
RankOptions::Dense);
- // XXX Should this support Type::NA?
auto data = array->data();
std::shared_ptr<Buffer> null_bitmap;
if (array->null_count() > 0) {
diff --git a/cpp/src/arrow/compute/kernels/vector_sort_test.cc
b/cpp/src/arrow/compute/kernels/vector_sort_test.cc
index 90f8eb7a56..e18fcf3771 100644
--- a/cpp/src/arrow/compute/kernels/vector_sort_test.cc
+++ b/cpp/src/arrow/compute/kernels/vector_sort_test.cc
@@ -437,6 +437,27 @@ TEST(ArraySortIndicesFunction, AllNullDictionaryArray) {
}
}
+TEST(ArraySortIndicesFunction, NullTypeDictionaryArray) {
+ // Test that dictionaries with Type::NA (null type) values can be sorted.
+ // All values in a null-type dictionary are logically null, so sorting
+ // should just arrange indices based on null placement, preserving order.
+ for (const auto& index_type : all_dictionary_index_types()) {
+ ARROW_SCOPED_TRACE("index_type = ", index_type->ToString());
+ auto dict_type = dictionary(index_type, null());
+ auto dict_arr = DictArrayFromJSON(dict_type, "[null, 0, 0, null]",
"[null]");
+
+ for (auto null_placement : AllNullPlacements()) {
+ ArraySortOptions options{SortOrder::Ascending, null_placement};
+ // All nulls, so output should be identity permutation
+ auto expected = ArrayFromJSON(uint64(), "[0, 1, 2, 3]");
+ ASSERT_OK_AND_ASSIGN(auto actual,
+ CallFunction("array_sort_indices", {dict_arr},
&options));
+ ValidateOutput(actual);
+ AssertDatumsEqual(expected, actual, /*verbose=*/true);
+ }
+ }
+}
+
Result<std::shared_ptr<Array>> DecodeDictionary(const Array& array) {
const auto& dict_array = checked_cast<const DictionaryArray&>(array);
ARROW_ASSIGN_OR_RAISE(auto decoded_datum,