This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 961258d211 GH-48954: [C++] Add test for null-type dictionary sorting 
and clarify XXX comment (#48955)
961258d211 is described below

commit 961258d2111b4a50f47f7ad9f72d6a20fabae983
Author: Hyukjin Kwon <[email protected]>
AuthorDate: Wed Feb 4 23:02:25 2026 +0900

    GH-48954: [C++] Add test for null-type dictionary sorting and clarify XXX 
comment (#48955)
    
    ### Rationale for this change
    
    Null-type dictionaries (e.g., `dictionary(int8(), null())`) are valid Arrow 
constructs supported from day one, but the sorting code had an uncertain `XXX 
Should this support Type::NA?` comment. We should explicitly support and test 
this because other functions already support this:
    
    ```python
    import pyarrow as pa
    import pyarrow.compute as pc
    
    pc.array_sort_indices(pa.array([None, None, None, None], type=pa.int32()))
    # [0, 1, 2, 3]
    pc.array_sort_indices(pa.DictionaryArray.from_arrays(
        indices=pa.array([None, None, None, None], type=pa.int8()),
        dictionary=pa.array([], type=pa.null())
    ))
    # [0, 1, 2, 3]
    ```
    
    I believe it does not make sense to specifically disallow this in 
dictionaries at this point.
    
    ### What changes are included in this PR?
    
    Added a unittest for null sorting behaviour.
    
    ### Are these changes tested?
    
    Yes, the unittest was added.
    
    ### Are there any user-facing changes?
    
    No.
    * GitHub Issue: #48954
    
    Authored-by: Hyukjin Kwon <[email protected]>
    Signed-off-by: Antoine Pitrou <[email protected]>
---
 cpp/src/arrow/compute/kernels/vector_array_sort.cc |  1 -
 cpp/src/arrow/compute/kernels/vector_sort_test.cc  | 21 +++++++++++++++++++++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/cpp/src/arrow/compute/kernels/vector_array_sort.cc 
b/cpp/src/arrow/compute/kernels/vector_array_sort.cc
index 0c27808dd1..6e7068f6ec 100644
--- a/cpp/src/arrow/compute/kernels/vector_array_sort.cc
+++ b/cpp/src/arrow/compute/kernels/vector_array_sort.cc
@@ -235,7 +235,6 @@ class ArrayCompareSorter<DictionaryType> {
     RankOptions rank_options(SortOrder::Ascending, NullPlacement::AtEnd,
                              RankOptions::Dense);
 
-    // XXX Should this support Type::NA?
     auto data = array->data();
     std::shared_ptr<Buffer> null_bitmap;
     if (array->null_count() > 0) {
diff --git a/cpp/src/arrow/compute/kernels/vector_sort_test.cc 
b/cpp/src/arrow/compute/kernels/vector_sort_test.cc
index 90f8eb7a56..e18fcf3771 100644
--- a/cpp/src/arrow/compute/kernels/vector_sort_test.cc
+++ b/cpp/src/arrow/compute/kernels/vector_sort_test.cc
@@ -437,6 +437,27 @@ TEST(ArraySortIndicesFunction, AllNullDictionaryArray) {
   }
 }
 
+TEST(ArraySortIndicesFunction, NullTypeDictionaryArray) {
+  // Test that dictionaries with Type::NA (null type) values can be sorted.
+  // All values in a null-type dictionary are logically null, so sorting
+  // should just arrange indices based on null placement, preserving order.
+  for (const auto& index_type : all_dictionary_index_types()) {
+    ARROW_SCOPED_TRACE("index_type = ", index_type->ToString());
+    auto dict_type = dictionary(index_type, null());
+    auto dict_arr = DictArrayFromJSON(dict_type, "[null, 0, 0, null]", 
"[null]");
+
+    for (auto null_placement : AllNullPlacements()) {
+      ArraySortOptions options{SortOrder::Ascending, null_placement};
+      // All nulls, so output should be identity permutation
+      auto expected = ArrayFromJSON(uint64(), "[0, 1, 2, 3]");
+      ASSERT_OK_AND_ASSIGN(auto actual,
+                           CallFunction("array_sort_indices", {dict_arr}, 
&options));
+      ValidateOutput(actual);
+      AssertDatumsEqual(expected, actual, /*verbose=*/true);
+    }
+  }
+}
+
 Result<std::shared_ptr<Array>> DecodeDictionary(const Array& array) {
   const auto& dict_array = checked_cast<const DictionaryArray&>(array);
   ARROW_ASSIGN_OR_RAISE(auto decoded_datum,

Reply via email to