This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 6a0a905fcfba [SPARK-55739][SQL] Optimize 
`OnHeapColumnVector.putIntsLittleEndian/putLongsLittleEndian` using 
`Platform.copyMemory` on little-endian platforms
6a0a905fcfba is described below

commit 6a0a905fcfbae242b67a6a14bea4535da53bf89b
Author: yangjie01 <[email protected]>
AuthorDate: Fri Feb 27 08:35:54 2026 -0800

    [SPARK-55739][SQL] Optimize 
`OnHeapColumnVector.putIntsLittleEndian/putLongsLittleEndian` using 
`Platform.copyMemory` on little-endian platforms
    
    ### What changes were proposed in this pull request?
    This pr refactored `putIntsLittleEndian` and `putLongsLittleEndian` in 
`OnHeapColumnVector` to hoist the `bigEndianPlatform` check outside the loop 
and use `Platform.copyMemory` for the common little-endian path.
    
    ### Why are the changes needed?
    For little-endian data input, `Platform.copyMemory` can be directly used, 
which is a memory copying method optimized by Spark.
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    - Pass Github Actions
    - Rename the original code to `OldOnHeapColumnVector`, and compare the 
latency of the old and new `putIntsLittleEndian` and `putLongsLittleEndian` 
methods using JMH:
    
    <details>
    <summary><b>Benchmark Code (click to expand)</b></summary>
    
    ```java
    package org.apache.spark.sql.execution.vectorized;
    
    import java.util.concurrent.TimeUnit;
    
    import org.openjdk.jmh.annotations.*;
    import org.openjdk.jmh.runner.Runner;
    import org.openjdk.jmh.runner.RunnerException;
    import org.openjdk.jmh.runner.options.Options;
    import org.openjdk.jmh.runner.options.OptionsBuilder;
    
    import org.apache.spark.sql.types.DataTypes;
    
    BenchmarkMode(Mode.AverageTime)
    OutputTimeUnit(TimeUnit.MICROSECONDS)
    State(Scope.Thread)
    Fork(value = 1, jvmArgs = {"-Xms4G", "-Xmx4G"})
    Warmup(iterations = 10, time = 1)
    Measurement(iterations = 10, time = 1)
    public class OnHeapColumnVectorJMHBenchmark {
    
      Param({"512", "1024", "4096", "8192", "16384"})
      public int count;
    
      Param({"65536"})
      public int i;
    
      private OnHeapColumnVector onHeapVectorInt;
      private OnHeapColumnVector onHeapVectorLong;
      private OldOnHeapColumnVector oldOnHeapVectorInt;
      private OldOnHeapColumnVector oldOnHeapVectorLong;
    
      private byte[] inputBytesInt;
      private byte[] inputBytesLong;
    
      Setup
      public void setup() {
        onHeapVectorInt = new OnHeapColumnVector(count, DataTypes.IntegerType);
        onHeapVectorLong = new OnHeapColumnVector(count, DataTypes.LongType);
        oldOnHeapVectorInt = new OldOnHeapColumnVector(count, 
DataTypes.IntegerType);
        oldOnHeapVectorLong = new OldOnHeapColumnVector(count, 
DataTypes.LongType);
    
        inputBytesInt = new byte[count * 4];
        new java.util.Random().nextBytes(inputBytesInt);
        inputBytesLong = new byte[count * 8];
        new java.util.Random().nextBytes(inputBytesLong);
      }
    
      TearDown
      public void tearDown() {
        onHeapVectorInt.close();
        onHeapVectorLong.close();
        oldOnHeapVectorInt.close();
        oldOnHeapVectorLong.close();
      }
    
      Benchmark
      public void onHeapPutIntsLittleEndian() {
        for (int n = 0; n < i; n++) {
          onHeapVectorInt.putIntsLittleEndian(0, count, inputBytesInt, 0);
        }
      }
    
      Benchmark
      public void OnHeapPutIntsLittleEndian_old() {
        for (int n = 0; n < i; n++) {
          oldOnHeapVectorInt.putIntsLittleEndian(0, count, inputBytesInt, 0);
        }
      }
    
      Benchmark
      public void onHeapPutLongsLittleEndian() {
        for (int n = 0; n < i; n++) {
          onHeapVectorLong.putLongsLittleEndian(0, count, inputBytesLong, 0);
        }
      }
    
      Benchmark
      public void OnHeapPutLongsLittleEndian_old() {
        for (int n = 0; n < i; n++) {
          oldOnHeapVectorLong.putLongsLittleEndian(0, count, inputBytesLong, 0);
        }
      }
    
        public static void main(String[] args) throws RunnerException {
            String filter = args.length > 0 ?
                    args[0] : 
OnHeapColumnVectorJMHBenchmark.class.getSimpleName();
            Options opt = new OptionsBuilder()
                    .include(filter)
                    .build();
    
            new Runner(opt).run();
        }
    }
    ```
    </details>
    
    **Benchmark results:**
    
    - Java 17.0.18+8-LTS
    
    ```
    Benchmark                                                      (count) 
(loop)  Mode  Cnt       Score      Error  Units
    OnHeapColumnVectorJMHBenchmark.OnHeapPutIntsLittleEndian_old       512  
65536  avgt   10   11743.097 ±   53.078  us/op
    OnHeapColumnVectorJMHBenchmark.onHeapPutIntsLittleEndian           512  
65536  avgt   10    1703.095 ±    6.250  us/op
    
    OnHeapColumnVectorJMHBenchmark.OnHeapPutIntsLittleEndian_old      1024  
65536  avgt   10   23448.338 ±  303.412  us/op
    OnHeapColumnVectorJMHBenchmark.onHeapPutIntsLittleEndian          1024  
65536  avgt   10    3008.894 ±    6.781  us/op
    
    OnHeapColumnVectorJMHBenchmark.OnHeapPutIntsLittleEndian_old      4096  
65536  avgt   10   91491.559 ±  346.421  us/op
    OnHeapColumnVectorJMHBenchmark.onHeapPutIntsLittleEndian          4096  
65536  avgt   10   11303.794 ±   22.716  us/op
    
    OnHeapColumnVectorJMHBenchmark.OnHeapPutIntsLittleEndian_old      8192  
65536  avgt   10  189572.012 ± 1575.984  us/op
    OnHeapColumnVectorJMHBenchmark.onHeapPutIntsLittleEndian          8192  
65536  avgt   10   42395.515 ±  353.775  us/op
    
    OnHeapColumnVectorJMHBenchmark.OnHeapPutIntsLittleEndian_old     16384  
65536  avgt   10  379232.070 ± 4484.971  us/op
    OnHeapColumnVectorJMHBenchmark.onHeapPutIntsLittleEndian         16384  
65536  avgt   10   85881.927 ±  271.668  us/op
    
    OnHeapColumnVectorJMHBenchmark.OnHeapPutLongsLittleEndian_old      512  
65536  avgt   10   12195.436 ±  104.812  us/op
    OnHeapColumnVectorJMHBenchmark.onHeapPutLongsLittleEndian          512  
65536  avgt   10    3849.975 ±    5.037  us/op
    
    OnHeapColumnVectorJMHBenchmark.OnHeapPutLongsLittleEndian_old     1024  
65536  avgt   10   24296.856 ±  194.031  us/op
    OnHeapColumnVectorJMHBenchmark.onHeapPutLongsLittleEndian         1024  
65536  avgt   10    7436.610 ±  212.457  us/op
    
    OnHeapColumnVectorJMHBenchmark.OnHeapPutLongsLittleEndian_old     4096  
65536  avgt   10   95374.778 ± 1560.388  us/op
    OnHeapColumnVectorJMHBenchmark.onHeapPutLongsLittleEndian         4096  
65536  avgt   10   44003.750 ±  599.336  us/op
    
    OnHeapColumnVectorJMHBenchmark.OnHeapPutLongsLittleEndian_old     8192  
65536  avgt   10  189202.921 ±  322.925  us/op
    OnHeapColumnVectorJMHBenchmark.onHeapPutLongsLittleEndian         8192  
65536  avgt   10   88005.115 ±   60.030  us/op
    
    OnHeapColumnVectorJMHBenchmark.OnHeapPutLongsLittleEndian_old    16384  
65536  avgt   10  379306.120 ± 4696.742  us/op
    OnHeapColumnVectorJMHBenchmark.onHeapPutLongsLittleEndian        16384  
65536  avgt   10  186179.355 ±  348.975  us/op
    ```
    
    - Java 21.0.10+7-LTS
    
    ```
    Benchmark                                                      (count) 
(loop)  Mode  Cnt       Score      Error  Units
    OnHeapColumnVectorJMHBenchmark.OnHeapPutIntsLittleEndian_old       512  
65536  avgt   10    1790.974 ±   11.692  us/op
    OnHeapColumnVectorJMHBenchmark.onHeapPutIntsLittleEndian           512  
65536  avgt   10    1848.389 ±    5.441  us/op
    
    OnHeapColumnVectorJMHBenchmark.OnHeapPutIntsLittleEndian_old      1024  
65536  avgt   10    3023.715 ±   17.073  us/op
    OnHeapColumnVectorJMHBenchmark.onHeapPutIntsLittleEndian          1024  
65536  avgt   10    3113.747 ±    4.668  us/op
    
    OnHeapColumnVectorJMHBenchmark.OnHeapPutIntsLittleEndian_old      4096  
65536  avgt   10   11076.221 ±   60.823  us/op
    OnHeapColumnVectorJMHBenchmark.onHeapPutIntsLittleEndian          4096  
65536  avgt   10   11180.941 ±   31.083  us/op
    
    OnHeapColumnVectorJMHBenchmark.OnHeapPutIntsLittleEndian_old      8192  
65536  avgt   10   43625.483 ±   67.768  us/op
    OnHeapColumnVectorJMHBenchmark.onHeapPutIntsLittleEndian          8192  
65536  avgt   10   43086.341 ±   65.125  us/op
    
    OnHeapColumnVectorJMHBenchmark.OnHeapPutIntsLittleEndian_old     16384  
65536  avgt   10   89393.103 ±  547.105  us/op
    OnHeapColumnVectorJMHBenchmark.onHeapPutIntsLittleEndian         16384  
65536  avgt   10   90173.425 ±  112.846  us/op
    
    OnHeapColumnVectorJMHBenchmark.OnHeapPutLongsLittleEndian_old      512  
65536  avgt   10    3028.893 ±   91.114  us/op
    OnHeapColumnVectorJMHBenchmark.onHeapPutLongsLittleEndian          512  
65536  avgt   10    3068.886 ±   16.652  us/op
    
    OnHeapColumnVectorJMHBenchmark.OnHeapPutLongsLittleEndian_old     1024  
65536  avgt   10    5961.539 ±   13.220  us/op
    OnHeapColumnVectorJMHBenchmark.onHeapPutLongsLittleEndian         1024  
65536  avgt   10    5902.645 ±   14.256  us/op
    
    OnHeapColumnVectorJMHBenchmark.OnHeapPutLongsLittleEndian_old     4096  
65536  avgt   10   42444.759 ±   64.922  us/op
    OnHeapColumnVectorJMHBenchmark.onHeapPutLongsLittleEndian         4096  
65536  avgt   10   42379.760 ±   63.047  us/op
    
    OnHeapColumnVectorJMHBenchmark.OnHeapPutLongsLittleEndian_old     8192  
65536  avgt   10   85712.614 ±  301.436  us/op
    OnHeapColumnVectorJMHBenchmark.onHeapPutLongsLittleEndian         8192  
65536  avgt   10   85106.127 ±   45.659  us/op
    
    OnHeapColumnVectorJMHBenchmark.OnHeapPutLongsLittleEndian_old    16384  
65536  avgt   10  170694.785 ± 1030.468  us/op
    OnHeapColumnVectorJMHBenchmark.onHeapPutLongsLittleEndian        16384  
65536  avgt   10  170435.863 ±  230.682  us/op
    ```
    
    Based on the test results, the new code exhibits better optimization 
performance for Java 17. In a typical 4096 scenario, it achieves an **8-fold** 
performance improvement for the Int type and a **2-fold** performance 
improvement for the long type. For Java 21, due to its more aggressive 
Auto-vectorization optimization strategy, the new code does not demonstrate a 
distinct advantage, yet it also does not show a noticeable decline in 
performance.We can wait until Java 21 becomes the de [...]
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No
    
    Closes #54532 from LuciferYang/SPARK-55739.
    
    Authored-by: yangjie01 <[email protected]>
    Signed-off-by: Dongjoon Hyun <[email protected]>
---
 .../execution/vectorized/OnHeapColumnVector.java   | 24 +++++++++++++---------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git 
a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java
 
b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java
index 0854c42db672..a6472955d673 100644
--- 
a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java
+++ 
b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java
@@ -332,12 +332,14 @@ public final class OnHeapColumnVector extends 
WritableColumnVector {
 
   @Override
   public void putIntsLittleEndian(int rowId, int count, byte[] src, int 
srcIndex) {
-    int srcOffset = srcIndex + Platform.BYTE_ARRAY_OFFSET;
-    for (int i = 0; i < count; ++i, srcOffset += 4) {
-      intData[i + rowId] = Platform.getInt(src, srcOffset);
-      if (bigEndianPlatform) {
-        intData[i + rowId] = java.lang.Integer.reverseBytes(intData[i + 
rowId]);
+    if (bigEndianPlatform) {
+      int srcOffset = srcIndex + Platform.BYTE_ARRAY_OFFSET;
+      for (int i = 0; i < count; ++i, srcOffset += 4) {
+        intData[i + rowId] = 
java.lang.Integer.reverseBytes(Platform.getInt(src, srcOffset));
       }
+    } else {
+      Platform.copyMemory(src, Platform.BYTE_ARRAY_OFFSET + srcIndex, intData,
+        Platform.INT_ARRAY_OFFSET + rowId * 4L, count * 4L);
     }
   }
 
@@ -406,12 +408,14 @@ public final class OnHeapColumnVector extends 
WritableColumnVector {
 
   @Override
   public void putLongsLittleEndian(int rowId, int count, byte[] src, int 
srcIndex) {
-    int srcOffset = srcIndex + Platform.BYTE_ARRAY_OFFSET;
-    for (int i = 0; i < count; ++i, srcOffset += 8) {
-      longData[i + rowId] = Platform.getLong(src, srcOffset);
-      if (bigEndianPlatform) {
-        longData[i + rowId] = java.lang.Long.reverseBytes(longData[i + rowId]);
+    if (bigEndianPlatform) {
+      int srcOffset = srcIndex + Platform.BYTE_ARRAY_OFFSET;
+      for (int i = 0; i < count; ++i, srcOffset += 8) {
+        longData[i + rowId] = 
java.lang.Long.reverseBytes(Platform.getLong(src, srcOffset));
       }
+    } else {
+      Platform.copyMemory(src, Platform.BYTE_ARRAY_OFFSET + srcIndex, longData,
+        Platform.LONG_ARRAY_OFFSET + rowId * 8L, count * 8L);
     }
   }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to