This is an automated email from the ASF dual-hosted git repository.

gengliangwang pushed a commit to branch branch-4.x
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-4.x by this push:
     new 3d63f2c43081 [SPARK-57171][SQL] Simplify Slice codegen by extracting 
index arithmetic into a static Java helper
3d63f2c43081 is described below

commit 3d63f2c430812d9553e9fc4ed5bb834eb9d8eff8
Author: Gengliang Wang <[email protected]>
AuthorDate: Mon Jun 1 13:17:19 2026 -0700

    [SPARK-57171][SQL] Simplify Slice codegen by extracting index arithmetic 
into a static Java helper
    
    ### What changes were proposed in this pull request?
    
    Add `ArrayExpressionUtils.sliceStartIndex(int start, int numElements, 
String functionName)` and `ArrayExpressionUtils.sliceLength(int length, int 
numElements, int startIdx, String functionName)`, and route `Slice`'s codegen 
through them.
    
    `Slice.doGenCode` previously emitted ~17 lines of inline, 
element-type-independent index arithmetic (1-based -> 0-based start resolution, 
the `start == 0` / `length < 0` validations, and the result-length clamp). It 
now emits two helper calls. The eval path reuses `sliceStartIndex` for the 
shared start resolution.
    
    Unlike the earlier SPARK-56908 sub-tasks, this is neither ANSI-specific nor 
a try/catch wrapper -- it is a plain, type-independent block of generated 
logic, which is exactly the kind of boilerplate the umbrella aims to 
deduplicate into static Java helpers.
    
    ### Why are the changes needed?
    
    Part of SPARK-56908 (umbrella). Moving the fixed index arithmetic out of 
the generated Java shrinks the per-stage source for every plan that uses 
`slice`, helping with the JVM 64KB method / constant-pool limits, Janino 
compile time, and JIT work.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No. The compiled behavior is identical; only the emitted Java source text 
changes. The codegen path keeps its existing result-length clamp 
(`sliceLength`); the eval path keeps its existing `data.slice(...)` length 
handling unchanged.
    
    ### How was this patch tested?
    
    ```
    build/sbt "catalyst/testOnly *CollectionExpressionsSuite"
    ```
    
    59/59 pass, including `Slice` (exercised both with and without whole-stage 
codegen).
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    Generated-by: Claude Code (Opus 4.8)
    
    Closes #56221 from gengliangwang/spark-slice-codegen.
    
    Authored-by: Gengliang Wang <[email protected]>
    Signed-off-by: Gengliang Wang <[email protected]>
    (cherry picked from commit 27187d6cbae943713be55b7846bfefc2c9c2fad3)
    Signed-off-by: Gengliang Wang <[email protected]>
---
 .../catalyst/expressions/ArrayExpressionUtils.java | 35 ++++++++++++++++++++++
 .../expressions/collectionOperations.scala         | 32 ++++----------------
 2 files changed, 41 insertions(+), 26 deletions(-)

diff --git 
a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ArrayExpressionUtils.java
 
b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ArrayExpressionUtils.java
index 3d7c5dccc7f2..07a7f98ea1b8 100644
--- 
a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ArrayExpressionUtils.java
+++ 
b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ArrayExpressionUtils.java
@@ -248,4 +248,39 @@ public class ArrayExpressionUtils {
   public static int binarySearch(Object[] data, Object value, 
Comparator<Object> comp) {
     return Arrays.binarySearch(data, value, comp);
   }
+
+  // ----- slice(array, start, length) index resolution -----
+  // Pure 1-based -> 0-based index arithmetic, independent of the array element
+  // type, shared by Slice's eval and codegen paths.
+
+  /**
+   * Resolves the 0-based start index for {@code slice(array, start, length)}.
+   * SQL {@code slice} is 1-based; a negative {@code start} counts back from 
the
+   * end of the array. A {@code start} of 0 is rejected.
+   */
+  public static int sliceStartIndex(int start, int numElements, String 
functionName) {
+    if (start == 0) {
+      throw 
QueryExecutionErrors.unexpectedValueForStartInFunctionError(functionName);
+    } else if (start < 0) {
+      return start + numElements;
+    } else {
+      // arrays in SQL are 1-based instead of 0-based
+      return start - 1;
+    }
+  }
+
+  /**
+   * Resolves the result length for {@code slice(array, start, length)} given 
the
+   * already-resolved {@code startIdx}, clamping it to the number of elements
+   * remaining after {@code startIdx}. A negative {@code length} is rejected.
+   */
+  public static int sliceLength(int length, int numElements, int startIdx, 
String functionName) {
+    if (length < 0) {
+      throw 
QueryExecutionErrors.unexpectedValueForLengthInFunctionError(functionName, 
length);
+    } else if (length > numElements - startIdx) {
+      return numElements - startIdx;
+    } else {
+      return length;
+    }
+  }
 }
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
index 85172f795744..3346f23a70ad 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
@@ -2052,13 +2052,7 @@ case class Slice(x: Expression, start: Expression, 
length: Expression)
     val startInt = startVal.asInstanceOf[Int]
     val lengthInt = lengthVal.asInstanceOf[Int]
     val arr = xVal.asInstanceOf[ArrayData]
-    val startIndex = if (startInt == 0) {
-      throw 
QueryExecutionErrors.unexpectedValueForStartInFunctionError(prettyName)
-    } else if (startInt < 0) {
-      startInt + arr.numElements()
-    } else {
-      startInt - 1
-    }
+    val startIndex = ArrayExpressionUtils.sliceStartIndex(startInt, 
arr.numElements(), prettyName)
     if (lengthInt < 0) {
       throw 
QueryExecutionErrors.unexpectedValueForLengthInFunctionError(prettyName, 
lengthInt)
     }
@@ -2075,26 +2069,12 @@ case class Slice(x: Expression, start: Expression, 
length: Expression)
     nullSafeCodeGen(ctx, ev, (x, start, length) => {
       val startIdx = ctx.freshName("startIdx")
       val resLength = ctx.freshName("resLength")
-      val defaultIntValue = CodeGenerator.defaultValue(CodeGenerator.JAVA_INT, 
false)
+      val utils = classOf[ArrayExpressionUtils].getName
       s"""
-         |${CodeGenerator.JAVA_INT} $startIdx = $defaultIntValue;
-         |${CodeGenerator.JAVA_INT} $resLength = $defaultIntValue;
-         |if ($start == 0) {
-         |  throw 
QueryExecutionErrors.unexpectedValueForStartInFunctionError("$prettyName");
-         |} else if ($start < 0) {
-         |  $startIdx = $start + $x.numElements();
-         |} else {
-         |  // arrays in SQL are 1-based instead of 0-based
-         |  $startIdx = $start - 1;
-         |}
-         |if ($length < 0) {
-         |  throw QueryExecutionErrors.unexpectedValueForLengthInFunctionError(
-         |    "$prettyName", $length);
-         |} else if ($length > $x.numElements() - $startIdx) {
-         |  $resLength = $x.numElements() - $startIdx;
-         |} else {
-         |  $resLength = $length;
-         |}
+         |${CodeGenerator.JAVA_INT} $startIdx =
+         |  $utils.sliceStartIndex($start, $x.numElements(), "$prettyName");
+         |${CodeGenerator.JAVA_INT} $resLength =
+         |  $utils.sliceLength($length, $x.numElements(), $startIdx, 
"$prettyName");
          |${genCodeForResult(ctx, ev, x, startIdx, resLength)}
        """.stripMargin
     })


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to