This is an automated email from the ASF dual-hosted git repository.
alenka pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 49423f8d3b GH-33459: [C++][Python] Support step >= 1 in list_slice
kernel (#48769)
49423f8d3b is described below
commit 49423f8d3bc251807d69d66537d3e22302e4176f
Author: Hyukjin Kwon <[email protected]>
AuthorDate: Thu Feb 5 17:57:35 2026 +0900
GH-33459: [C++][Python] Support step >= 1 in list_slice kernel (#48769)
### Rationale for this change
Closes ARROW-18281, which has been open since 2022. The `list_slice` kernel
currently rejects `start == stop`, but should return empty lists instead
(following Python slicing semantics).
The implementation already handles this case correctly. When ARROW-18282
added step support, `bit_util::CeilDiv(stop - start, step)` naturally returns 0
for `start == stop`, producing empty lists. The only issue was the validation
check (`start >= stop`) that prevented this from working.
### What changes are included in this PR?
- Changed validation from `start >= stop` to `start > stop`
- Updated error message
- Added test cases
### Are these changes tested?
Yes, tests were added.
### Are there any user-facing changes?
Yes.
```python
import pyarrow.compute as pc
pc.list_slice([[1,2,3]], 0, 0)
```
Before:
```
pyarrow.lib.ArrowInvalid: `start`(0) should be greater than 0 and smaller
than `stop`(0)
```
After:
```
<pyarrow.lib.ListArray object at 0x1a01b8b20>
[
[]
]
```
* GitHub Issue: #33459
Authored-by: Hyukjin Kwon <[email protected]>
Signed-off-by: AlenkaF <[email protected]>
---
cpp/src/arrow/compute/kernels/scalar_nested.cc | 16 ++++++++------
.../arrow/compute/kernels/scalar_nested_test.cc | 25 +++++++++++-----------
python/pyarrow/tests/test_compute.py | 14 ++++++------
3 files changed, 29 insertions(+), 26 deletions(-)
diff --git a/cpp/src/arrow/compute/kernels/scalar_nested.cc
b/cpp/src/arrow/compute/kernels/scalar_nested.cc
index 1fb0df56bb..e9c65aff1c 100644
--- a/cpp/src/arrow/compute/kernels/scalar_nested.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_nested.cc
@@ -162,7 +162,8 @@ Result<TypeHolder> ListSliceOutputType(const
ListSliceOptions& opts,
"`stop` being set.");
}
if (opts.step < 1) {
- return Status::Invalid("`step` must be >= 1, got: ", opts.step);
+ return Status::Invalid("`step` must be greater than or equal to 1, got:
",
+ opts.step);
}
const auto length = ListSliceLength(opts.start, opts.step, *stop);
return fixed_size_list(value_type, static_cast<int32_t>(length));
@@ -183,14 +184,15 @@ struct ListSlice {
const auto* list_type = checked_cast<const BaseListType*>(list_array.type);
// Pre-conditions
- if (opts.start < 0 || (opts.stop.has_value() && opts.start >=
opts.stop.value())) {
- // TODO(ARROW-18281): support start == stop which should give empty lists
- return Status::Invalid("`start`(", opts.start,
- ") should be greater than 0 and smaller than
`stop`(",
- ToString(opts.stop), ")");
+ if (opts.start < 0 || (opts.stop.has_value() && opts.start >
opts.stop.value())) {
+ return Status::Invalid(
+ "`start`(", opts.start,
+ ") should be greater than or equal to 0 and not greater than
`stop`(",
+ ToString(opts.stop), ")");
}
if (opts.step < 1) {
- return Status::Invalid("`step` must be >= 1, got: ", opts.step);
+ return Status::Invalid("`step` must be greater than or equal to 1, got:
",
+ opts.step);
}
auto* pool = ctx->memory_pool();
diff --git a/cpp/src/arrow/compute/kernels/scalar_nested_test.cc
b/cpp/src/arrow/compute/kernels/scalar_nested_test.cc
index f199f56aa2..b5a68d12cb 100644
--- a/cpp/src/arrow/compute/kernels/scalar_nested_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_nested_test.cc
@@ -176,6 +176,12 @@ TEST(TestScalarNested, ListSliceVariableOutput) {
auto input = ArrayFromJSON(fixed_size_list(int32(), 1), "[[1]]");
auto expected = ArrayFromJSON(list(int32()), "[[1]]");
CheckScalarUnary("list_slice", input, expected, &args);
+
+ args.start = 0;
+ args.stop = 0;
+ auto input_empty = ArrayFromJSON(list(int32()), "[[1, 2, 3], [4, 5], null]");
+ auto expected_empty = ArrayFromJSON(list(int32()), "[[], [], null]");
+ CheckScalarUnary("list_slice", input_empty, expected_empty, &args);
}
TEST(TestScalarNested, ListSliceFixedOutput) {
@@ -315,7 +321,8 @@ TEST(TestScalarNested, ListSliceBadParameters) {
EXPECT_RAISES_WITH_MESSAGE_THAT(
Invalid,
::testing::HasSubstr(
- "`start`(-1) should be greater than 0 and smaller than `stop`(1)"),
+ "`start`(-1) should be greater than or equal to 0 and not greater
than "
+ "`stop`(1)"),
CallFunction("list_slice", {input}, &args));
// start greater than stop
args.start = 1;
@@ -323,14 +330,8 @@ TEST(TestScalarNested, ListSliceBadParameters) {
EXPECT_RAISES_WITH_MESSAGE_THAT(
Invalid,
::testing::HasSubstr(
- "`start`(1) should be greater than 0 and smaller than `stop`(0)"),
- CallFunction("list_slice", {input}, &args));
- // start same as stop
- args.stop = args.start;
- EXPECT_RAISES_WITH_MESSAGE_THAT(
- Invalid,
- ::testing::HasSubstr(
- "`start`(1) should be greater than 0 and smaller than `stop`(1)"),
+ "`start`(1) should be greater than or equal to 0 and not greater
than "
+ "`stop`(0)"),
CallFunction("list_slice", {input}, &args));
// stop not set and FixedSizeList requested with variable sized input
args.stop = std::nullopt;
@@ -343,9 +344,9 @@ TEST(TestScalarNested, ListSliceBadParameters) {
args.start = 0;
args.stop = 2;
args.step = 0;
- EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
- ::testing::HasSubstr("`step` must be >= 1,
got: 0"),
- CallFunction("list_slice", {input}, &args));
+ EXPECT_RAISES_WITH_MESSAGE_THAT(
+ Invalid, ::testing::HasSubstr("`step` must be greater than or equal to
1, got: 0"),
+ CallFunction("list_slice", {input}, &args));
}
TEST(TestScalarNested, StructField) {
diff --git a/python/pyarrow/tests/test_compute.py
b/python/pyarrow/tests/test_compute.py
index d8a1c4d093..2ef14ff39b 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -3930,7 +3930,8 @@ def test_list_slice_output_fixed(start, stop, step,
expected, value_type,
(0, 1,),
(0, 2,),
(1, 2,),
- (2, 4,)
+ (2, 4,),
+ (0, 0,)
))
@pytest.mark.parametrize("step", (1, 2))
@pytest.mark.parametrize("value_type", (pa.string, pa.int16, pa.float64))
@@ -3978,18 +3979,17 @@ def
test_list_slice_field_names_retained(return_fixed_size, type):
def test_list_slice_bad_parameters():
arr = pa.array([[1]], pa.list_(pa.int8(), 1))
- msg = r"`start`(.*) should be greater than 0 and smaller than `stop`(.*)"
+ msg = (
+ r"`start`(.*) should be greater than or equal to 0 "
+ r"and not greater than `stop`(.*)"
+ )
with pytest.raises(pa.ArrowInvalid, match=msg):
pc.list_slice(arr, -1, 1) # negative start?
with pytest.raises(pa.ArrowInvalid, match=msg):
pc.list_slice(arr, 2, 1) # start > stop?
- # TODO(ARROW-18281): start==stop -> empty lists
- with pytest.raises(pa.ArrowInvalid, match=msg):
- pc.list_slice(arr, 0, 0) # start == stop?
-
# Step not >= 1
- msg = "`step` must be >= 1, got: "
+ msg = "`step` must be greater than or equal to 1, got: "
with pytest.raises(pa.ArrowInvalid, match=msg + "0"):
pc.list_slice(arr, 0, 1, step=0)
with pytest.raises(pa.ArrowInvalid, match=msg + "-1"):