This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 6a0a905fcfba [SPARK-55739][SQL] Optimize
`OnHeapColumnVector.putIntsLittleEndian/putLongsLittleEndian` using
`Platform.copyMemory` on little-endian platforms
6a0a905fcfba is described below
commit 6a0a905fcfbae242b67a6a14bea4535da53bf89b
Author: yangjie01 <[email protected]>
AuthorDate: Fri Feb 27 08:35:54 2026 -0800
[SPARK-55739][SQL] Optimize
`OnHeapColumnVector.putIntsLittleEndian/putLongsLittleEndian` using
`Platform.copyMemory` on little-endian platforms
### What changes were proposed in this pull request?
This pr refactored `putIntsLittleEndian` and `putLongsLittleEndian` in
`OnHeapColumnVector` to hoist the `bigEndianPlatform` check outside the loop
and use `Platform.copyMemory` for the common little-endian path.
### Why are the changes needed?
For little-endian data input, `Platform.copyMemory` can be directly used,
which is a memory copying method optimized by Spark.
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
- Pass Github Actions
- Rename the original code to `OldOnHeapColumnVector`, and compare the
latency of the old and new `putIntsLittleEndian` and `putLongsLittleEndian`
methods using JMH:
<details>
<summary><b>Benchmark Code (click to expand)</b></summary>
```java
package org.apache.spark.sql.execution.vectorized;
import java.util.concurrent.TimeUnit;
import org.openjdk.jmh.annotations.*;
import org.openjdk.jmh.runner.Runner;
import org.openjdk.jmh.runner.RunnerException;
import org.openjdk.jmh.runner.options.Options;
import org.openjdk.jmh.runner.options.OptionsBuilder;
import org.apache.spark.sql.types.DataTypes;
BenchmarkMode(Mode.AverageTime)
OutputTimeUnit(TimeUnit.MICROSECONDS)
State(Scope.Thread)
Fork(value = 1, jvmArgs = {"-Xms4G", "-Xmx4G"})
Warmup(iterations = 10, time = 1)
Measurement(iterations = 10, time = 1)
public class OnHeapColumnVectorJMHBenchmark {
Param({"512", "1024", "4096", "8192", "16384"})
public int count;
Param({"65536"})
public int i;
private OnHeapColumnVector onHeapVectorInt;
private OnHeapColumnVector onHeapVectorLong;
private OldOnHeapColumnVector oldOnHeapVectorInt;
private OldOnHeapColumnVector oldOnHeapVectorLong;
private byte[] inputBytesInt;
private byte[] inputBytesLong;
Setup
public void setup() {
onHeapVectorInt = new OnHeapColumnVector(count, DataTypes.IntegerType);
onHeapVectorLong = new OnHeapColumnVector(count, DataTypes.LongType);
oldOnHeapVectorInt = new OldOnHeapColumnVector(count,
DataTypes.IntegerType);
oldOnHeapVectorLong = new OldOnHeapColumnVector(count,
DataTypes.LongType);
inputBytesInt = new byte[count * 4];
new java.util.Random().nextBytes(inputBytesInt);
inputBytesLong = new byte[count * 8];
new java.util.Random().nextBytes(inputBytesLong);
}
TearDown
public void tearDown() {
onHeapVectorInt.close();
onHeapVectorLong.close();
oldOnHeapVectorInt.close();
oldOnHeapVectorLong.close();
}
Benchmark
public void onHeapPutIntsLittleEndian() {
for (int n = 0; n < i; n++) {
onHeapVectorInt.putIntsLittleEndian(0, count, inputBytesInt, 0);
}
}
Benchmark
public void OnHeapPutIntsLittleEndian_old() {
for (int n = 0; n < i; n++) {
oldOnHeapVectorInt.putIntsLittleEndian(0, count, inputBytesInt, 0);
}
}
Benchmark
public void onHeapPutLongsLittleEndian() {
for (int n = 0; n < i; n++) {
onHeapVectorLong.putLongsLittleEndian(0, count, inputBytesLong, 0);
}
}
Benchmark
public void OnHeapPutLongsLittleEndian_old() {
for (int n = 0; n < i; n++) {
oldOnHeapVectorLong.putLongsLittleEndian(0, count, inputBytesLong, 0);
}
}
public static void main(String[] args) throws RunnerException {
String filter = args.length > 0 ?
args[0] :
OnHeapColumnVectorJMHBenchmark.class.getSimpleName();
Options opt = new OptionsBuilder()
.include(filter)
.build();
new Runner(opt).run();
}
}
```
</details>
**Benchmark results:**
- Java 17.0.18+8-LTS
```
Benchmark (count)
(loop) Mode Cnt Score Error Units
OnHeapColumnVectorJMHBenchmark.OnHeapPutIntsLittleEndian_old 512
65536 avgt 10 11743.097 ± 53.078 us/op
OnHeapColumnVectorJMHBenchmark.onHeapPutIntsLittleEndian 512
65536 avgt 10 1703.095 ± 6.250 us/op
OnHeapColumnVectorJMHBenchmark.OnHeapPutIntsLittleEndian_old 1024
65536 avgt 10 23448.338 ± 303.412 us/op
OnHeapColumnVectorJMHBenchmark.onHeapPutIntsLittleEndian 1024
65536 avgt 10 3008.894 ± 6.781 us/op
OnHeapColumnVectorJMHBenchmark.OnHeapPutIntsLittleEndian_old 4096
65536 avgt 10 91491.559 ± 346.421 us/op
OnHeapColumnVectorJMHBenchmark.onHeapPutIntsLittleEndian 4096
65536 avgt 10 11303.794 ± 22.716 us/op
OnHeapColumnVectorJMHBenchmark.OnHeapPutIntsLittleEndian_old 8192
65536 avgt 10 189572.012 ± 1575.984 us/op
OnHeapColumnVectorJMHBenchmark.onHeapPutIntsLittleEndian 8192
65536 avgt 10 42395.515 ± 353.775 us/op
OnHeapColumnVectorJMHBenchmark.OnHeapPutIntsLittleEndian_old 16384
65536 avgt 10 379232.070 ± 4484.971 us/op
OnHeapColumnVectorJMHBenchmark.onHeapPutIntsLittleEndian 16384
65536 avgt 10 85881.927 ± 271.668 us/op
OnHeapColumnVectorJMHBenchmark.OnHeapPutLongsLittleEndian_old 512
65536 avgt 10 12195.436 ± 104.812 us/op
OnHeapColumnVectorJMHBenchmark.onHeapPutLongsLittleEndian 512
65536 avgt 10 3849.975 ± 5.037 us/op
OnHeapColumnVectorJMHBenchmark.OnHeapPutLongsLittleEndian_old 1024
65536 avgt 10 24296.856 ± 194.031 us/op
OnHeapColumnVectorJMHBenchmark.onHeapPutLongsLittleEndian 1024
65536 avgt 10 7436.610 ± 212.457 us/op
OnHeapColumnVectorJMHBenchmark.OnHeapPutLongsLittleEndian_old 4096
65536 avgt 10 95374.778 ± 1560.388 us/op
OnHeapColumnVectorJMHBenchmark.onHeapPutLongsLittleEndian 4096
65536 avgt 10 44003.750 ± 599.336 us/op
OnHeapColumnVectorJMHBenchmark.OnHeapPutLongsLittleEndian_old 8192
65536 avgt 10 189202.921 ± 322.925 us/op
OnHeapColumnVectorJMHBenchmark.onHeapPutLongsLittleEndian 8192
65536 avgt 10 88005.115 ± 60.030 us/op
OnHeapColumnVectorJMHBenchmark.OnHeapPutLongsLittleEndian_old 16384
65536 avgt 10 379306.120 ± 4696.742 us/op
OnHeapColumnVectorJMHBenchmark.onHeapPutLongsLittleEndian 16384
65536 avgt 10 186179.355 ± 348.975 us/op
```
- Java 21.0.10+7-LTS
```
Benchmark (count)
(loop) Mode Cnt Score Error Units
OnHeapColumnVectorJMHBenchmark.OnHeapPutIntsLittleEndian_old 512
65536 avgt 10 1790.974 ± 11.692 us/op
OnHeapColumnVectorJMHBenchmark.onHeapPutIntsLittleEndian 512
65536 avgt 10 1848.389 ± 5.441 us/op
OnHeapColumnVectorJMHBenchmark.OnHeapPutIntsLittleEndian_old 1024
65536 avgt 10 3023.715 ± 17.073 us/op
OnHeapColumnVectorJMHBenchmark.onHeapPutIntsLittleEndian 1024
65536 avgt 10 3113.747 ± 4.668 us/op
OnHeapColumnVectorJMHBenchmark.OnHeapPutIntsLittleEndian_old 4096
65536 avgt 10 11076.221 ± 60.823 us/op
OnHeapColumnVectorJMHBenchmark.onHeapPutIntsLittleEndian 4096
65536 avgt 10 11180.941 ± 31.083 us/op
OnHeapColumnVectorJMHBenchmark.OnHeapPutIntsLittleEndian_old 8192
65536 avgt 10 43625.483 ± 67.768 us/op
OnHeapColumnVectorJMHBenchmark.onHeapPutIntsLittleEndian 8192
65536 avgt 10 43086.341 ± 65.125 us/op
OnHeapColumnVectorJMHBenchmark.OnHeapPutIntsLittleEndian_old 16384
65536 avgt 10 89393.103 ± 547.105 us/op
OnHeapColumnVectorJMHBenchmark.onHeapPutIntsLittleEndian 16384
65536 avgt 10 90173.425 ± 112.846 us/op
OnHeapColumnVectorJMHBenchmark.OnHeapPutLongsLittleEndian_old 512
65536 avgt 10 3028.893 ± 91.114 us/op
OnHeapColumnVectorJMHBenchmark.onHeapPutLongsLittleEndian 512
65536 avgt 10 3068.886 ± 16.652 us/op
OnHeapColumnVectorJMHBenchmark.OnHeapPutLongsLittleEndian_old 1024
65536 avgt 10 5961.539 ± 13.220 us/op
OnHeapColumnVectorJMHBenchmark.onHeapPutLongsLittleEndian 1024
65536 avgt 10 5902.645 ± 14.256 us/op
OnHeapColumnVectorJMHBenchmark.OnHeapPutLongsLittleEndian_old 4096
65536 avgt 10 42444.759 ± 64.922 us/op
OnHeapColumnVectorJMHBenchmark.onHeapPutLongsLittleEndian 4096
65536 avgt 10 42379.760 ± 63.047 us/op
OnHeapColumnVectorJMHBenchmark.OnHeapPutLongsLittleEndian_old 8192
65536 avgt 10 85712.614 ± 301.436 us/op
OnHeapColumnVectorJMHBenchmark.onHeapPutLongsLittleEndian 8192
65536 avgt 10 85106.127 ± 45.659 us/op
OnHeapColumnVectorJMHBenchmark.OnHeapPutLongsLittleEndian_old 16384
65536 avgt 10 170694.785 ± 1030.468 us/op
OnHeapColumnVectorJMHBenchmark.onHeapPutLongsLittleEndian 16384
65536 avgt 10 170435.863 ± 230.682 us/op
```
Based on the test results, the new code exhibits better optimization
performance for Java 17. In a typical 4096 scenario, it achieves an **8-fold**
performance improvement for the Int type and a **2-fold** performance
improvement for the long type. For Java 21, due to its more aggressive
Auto-vectorization optimization strategy, the new code does not demonstrate a
distinct advantage, yet it also does not show a noticeable decline in
performance.We can wait until Java 21 becomes the de [...]
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #54532 from LuciferYang/SPARK-55739.
Authored-by: yangjie01 <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.../execution/vectorized/OnHeapColumnVector.java | 24 +++++++++++++---------
1 file changed, 14 insertions(+), 10 deletions(-)
diff --git
a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java
b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java
index 0854c42db672..a6472955d673 100644
---
a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java
+++
b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java
@@ -332,12 +332,14 @@ public final class OnHeapColumnVector extends
WritableColumnVector {
@Override
public void putIntsLittleEndian(int rowId, int count, byte[] src, int
srcIndex) {
- int srcOffset = srcIndex + Platform.BYTE_ARRAY_OFFSET;
- for (int i = 0; i < count; ++i, srcOffset += 4) {
- intData[i + rowId] = Platform.getInt(src, srcOffset);
- if (bigEndianPlatform) {
- intData[i + rowId] = java.lang.Integer.reverseBytes(intData[i +
rowId]);
+ if (bigEndianPlatform) {
+ int srcOffset = srcIndex + Platform.BYTE_ARRAY_OFFSET;
+ for (int i = 0; i < count; ++i, srcOffset += 4) {
+ intData[i + rowId] =
java.lang.Integer.reverseBytes(Platform.getInt(src, srcOffset));
}
+ } else {
+ Platform.copyMemory(src, Platform.BYTE_ARRAY_OFFSET + srcIndex, intData,
+ Platform.INT_ARRAY_OFFSET + rowId * 4L, count * 4L);
}
}
@@ -406,12 +408,14 @@ public final class OnHeapColumnVector extends
WritableColumnVector {
@Override
public void putLongsLittleEndian(int rowId, int count, byte[] src, int
srcIndex) {
- int srcOffset = srcIndex + Platform.BYTE_ARRAY_OFFSET;
- for (int i = 0; i < count; ++i, srcOffset += 8) {
- longData[i + rowId] = Platform.getLong(src, srcOffset);
- if (bigEndianPlatform) {
- longData[i + rowId] = java.lang.Long.reverseBytes(longData[i + rowId]);
+ if (bigEndianPlatform) {
+ int srcOffset = srcIndex + Platform.BYTE_ARRAY_OFFSET;
+ for (int i = 0; i < count; ++i, srcOffset += 8) {
+ longData[i + rowId] =
java.lang.Long.reverseBytes(Platform.getLong(src, srcOffset));
}
+ } else {
+ Platform.copyMemory(src, Platform.BYTE_ARRAY_OFFSET + srcIndex, longData,
+ Platform.LONG_ARRAY_OFFSET + rowId * 8L, count * 8L);
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]