(arrow) branch main updated: GH-33459: [C++][Python] Support step >= 1 in list_slice kernel (#48769)

alenka Thu, 05 Feb 2026 00:57:54 -0800

This is an automated email from the ASF dual-hosted git repository.

alenka pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git



The following commit(s) were added to refs/heads/main by this push:
     new 49423f8d3b GH-33459: [C++][Python] Support step >= 1 in list_slice 
kernel (#48769)
49423f8d3b is described below

commit 49423f8d3bc251807d69d66537d3e22302e4176f
Author: Hyukjin Kwon <[email protected]>
AuthorDate: Thu Feb 5 17:57:35 2026 +0900

    GH-33459: [C++][Python] Support step >= 1 in list_slice kernel (#48769)
    
    ### Rationale for this change
    
    Closes ARROW-18281, which has been open since 2022. The `list_slice` kernel 
currently rejects `start == stop`, but should return empty lists instead 
(following Python slicing semantics).
    
    The implementation already handles this case correctly. When ARROW-18282 
added step support, `bit_util::CeilDiv(stop - start, step)` naturally returns 0 
for `start == stop`, producing empty lists. The only issue was the validation 
check (`start >= stop`) that prevented this from working.
    
    ### What changes are included in this PR?
    
    - Changed validation from `start >= stop` to `start > stop`
    - Updated error message
    - Added test cases
    
    ### Are these changes tested?
    
    Yes, tests were added.
    
    ### Are there any user-facing changes?
    
    Yes.
    
    ```python
    import pyarrow.compute as pc
    pc.list_slice([[1,2,3]], 0, 0)
    ```
    
    Before:
    
    ```
    pyarrow.lib.ArrowInvalid: `start`(0) should be greater than 0 and smaller 
than `stop`(0)
    ```
    
    After:
    
    ```
    <pyarrow.lib.ListArray object at 0x1a01b8b20>
    [
      []
    ]
    ```
    * GitHub Issue: #33459
    
    Authored-by: Hyukjin Kwon <[email protected]>
    Signed-off-by: AlenkaF <[email protected]>
---
 cpp/src/arrow/compute/kernels/scalar_nested.cc     | 16 ++++++++------
 .../arrow/compute/kernels/scalar_nested_test.cc    | 25 +++++++++++-----------
 python/pyarrow/tests/test_compute.py               | 14 ++++++------
 3 files changed, 29 insertions(+), 26 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/scalar_nested.cc 
b/cpp/src/arrow/compute/kernels/scalar_nested.cc
index 1fb0df56bb..e9c65aff1c 100644
--- a/cpp/src/arrow/compute/kernels/scalar_nested.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_nested.cc
@@ -162,7 +162,8 @@ Result<TypeHolder> ListSliceOutputType(const 
ListSliceOptions& opts,
           "`stop` being set.");
     }
     if (opts.step < 1) {
-      return Status::Invalid("`step` must be >= 1, got: ", opts.step);
+      return Status::Invalid("`step` must be greater than or equal to 1, got: 
",
+                             opts.step);
     }
     const auto length = ListSliceLength(opts.start, opts.step, *stop);
     return fixed_size_list(value_type, static_cast<int32_t>(length));
@@ -183,14 +184,15 @@ struct ListSlice {
     const auto* list_type = checked_cast<const BaseListType*>(list_array.type);
 
     // Pre-conditions
-    if (opts.start < 0 || (opts.stop.has_value() && opts.start >= 
opts.stop.value())) {
-      // TODO(ARROW-18281): support start == stop which should give empty lists
-      return Status::Invalid("`start`(", opts.start,
-                             ") should be greater than 0 and smaller than 
`stop`(",
-                             ToString(opts.stop), ")");
+    if (opts.start < 0 || (opts.stop.has_value() && opts.start > 
opts.stop.value())) {
+      return Status::Invalid(
+          "`start`(", opts.start,
+          ") should be greater than or equal to 0 and not greater than 
`stop`(",
+          ToString(opts.stop), ")");
     }
     if (opts.step < 1) {
-      return Status::Invalid("`step` must be >= 1, got: ", opts.step);
+      return Status::Invalid("`step` must be greater than or equal to 1, got: 
",
+                             opts.step);
     }
 
     auto* pool = ctx->memory_pool();
diff --git a/cpp/src/arrow/compute/kernels/scalar_nested_test.cc 
b/cpp/src/arrow/compute/kernels/scalar_nested_test.cc
index f199f56aa2..b5a68d12cb 100644
--- a/cpp/src/arrow/compute/kernels/scalar_nested_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_nested_test.cc
@@ -176,6 +176,12 @@ TEST(TestScalarNested, ListSliceVariableOutput) {
   auto input = ArrayFromJSON(fixed_size_list(int32(), 1), "[[1]]");
   auto expected = ArrayFromJSON(list(int32()), "[[1]]");
   CheckScalarUnary("list_slice", input, expected, &args);
+
+  args.start = 0;
+  args.stop = 0;
+  auto input_empty = ArrayFromJSON(list(int32()), "[[1, 2, 3], [4, 5], null]");
+  auto expected_empty = ArrayFromJSON(list(int32()), "[[], [], null]");
+  CheckScalarUnary("list_slice", input_empty, expected_empty, &args);
 }
 
 TEST(TestScalarNested, ListSliceFixedOutput) {
@@ -315,7 +321,8 @@ TEST(TestScalarNested, ListSliceBadParameters) {
   EXPECT_RAISES_WITH_MESSAGE_THAT(
       Invalid,
       ::testing::HasSubstr(
-          "`start`(-1) should be greater than 0 and smaller than `stop`(1)"),
+          "`start`(-1) should be greater than or equal to 0 and not greater 
than "
+          "`stop`(1)"),
       CallFunction("list_slice", {input}, &args));
   // start greater than stop
   args.start = 1;
@@ -323,14 +330,8 @@ TEST(TestScalarNested, ListSliceBadParameters) {
   EXPECT_RAISES_WITH_MESSAGE_THAT(
       Invalid,
       ::testing::HasSubstr(
-          "`start`(1) should be greater than 0 and smaller than `stop`(0)"),
-      CallFunction("list_slice", {input}, &args));
-  // start same as stop
-  args.stop = args.start;
-  EXPECT_RAISES_WITH_MESSAGE_THAT(
-      Invalid,
-      ::testing::HasSubstr(
-          "`start`(1) should be greater than 0 and smaller than `stop`(1)"),
+          "`start`(1) should be greater than or equal to 0 and not greater 
than "
+          "`stop`(0)"),
       CallFunction("list_slice", {input}, &args));
   // stop not set and FixedSizeList requested with variable sized input
   args.stop = std::nullopt;
@@ -343,9 +344,9 @@ TEST(TestScalarNested, ListSliceBadParameters) {
   args.start = 0;
   args.stop = 2;
   args.step = 0;
-  EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
-                                  ::testing::HasSubstr("`step` must be >= 1, 
got: 0"),
-                                  CallFunction("list_slice", {input}, &args));
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid, ::testing::HasSubstr("`step` must be greater than or equal to 
1, got: 0"),
+      CallFunction("list_slice", {input}, &args));
 }
 
 TEST(TestScalarNested, StructField) {
diff --git a/python/pyarrow/tests/test_compute.py 
b/python/pyarrow/tests/test_compute.py
index d8a1c4d093..2ef14ff39b 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -3930,7 +3930,8 @@ def test_list_slice_output_fixed(start, stop, step, 
expected, value_type,
     (0, 1,),
     (0, 2,),
     (1, 2,),
-    (2, 4,)
+    (2, 4,),
+    (0, 0,)
 ))
 @pytest.mark.parametrize("step", (1, 2))
 @pytest.mark.parametrize("value_type", (pa.string, pa.int16, pa.float64))
@@ -3978,18 +3979,17 @@ def 
test_list_slice_field_names_retained(return_fixed_size, type):
 
 def test_list_slice_bad_parameters():
     arr = pa.array([[1]], pa.list_(pa.int8(), 1))
-    msg = r"`start`(.*) should be greater than 0 and smaller than `stop`(.*)"
+    msg = (
+        r"`start`(.*) should be greater than or equal to 0 "
+        r"and not greater than `stop`(.*)"
+    )
     with pytest.raises(pa.ArrowInvalid, match=msg):
         pc.list_slice(arr, -1, 1)  # negative start?
     with pytest.raises(pa.ArrowInvalid, match=msg):
         pc.list_slice(arr, 2, 1)  # start > stop?
 
-    # TODO(ARROW-18281): start==stop -> empty lists
-    with pytest.raises(pa.ArrowInvalid, match=msg):
-        pc.list_slice(arr, 0, 0)  # start == stop?
-
     # Step not >= 1
-    msg = "`step` must be >= 1, got: "
+    msg = "`step` must be greater than or equal to 1, got: "
     with pytest.raises(pa.ArrowInvalid, match=msg + "0"):
         pc.list_slice(arr, 0, 1, step=0)
     with pytest.raises(pa.ArrowInvalid, match=msg + "-1"):

(arrow) branch main updated: GH-33459: [C++][Python] Support step >= 1 in list_slice kernel (#48769)

Reply via email to