This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new 0432aabd59d branch-4.0: [typo](tvf & decompression) add case "lz4" ->
"lz4frame" #56316 (#56593)
0432aabd59d is described below
commit 0432aabd59d8dad535ab9c22aefe2ab2bc6b6887
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Mon Sep 29 09:55:48 2025 +0800
branch-4.0: [typo](tvf & decompression) add case "lz4" -> "lz4frame" #56316
(#56593)
Cherry-picked from #56316
Co-authored-by: Refrain <[email protected]>
---
be/src/util/load_util.cpp | 4 +-
.../java/org/apache/doris/common/util/Util.java | 4 +
.../data/load_p0/stream_load/test_compress.csv.lz4 | Bin 48 -> 80 bytes
.../data/load_p0/tvf/test_tvf_lz4_compress.out | 21 ++
.../load_p0/tvf/test_tvf_lz4_compress.groovy | 233 +++++++++++++++++++++
5 files changed, 260 insertions(+), 2 deletions(-)
diff --git a/be/src/util/load_util.cpp b/be/src/util/load_util.cpp
index da60e4ce725..ab6e3e887f8 100644
--- a/be/src/util/load_util.cpp
+++ b/be/src/util/load_util.cpp
@@ -37,7 +37,7 @@ void LoadUtil::parse_format(const std::string& format_str,
const std::string& co
*compress_type = TFileCompressType::LZO;
} else if (iequal(compress_type_str, "BZ2")) {
*compress_type = TFileCompressType::BZ2;
- } else if (iequal(compress_type_str, "LZ4")) {
+ } else if (iequal(compress_type_str, "LZ4") || iequal(compress_type_str,
"LZ4FRAME")) {
*compress_type = TFileCompressType::LZ4FRAME;
} else if (iequal(compress_type_str, "LZ4_BLOCK")) {
*compress_type = TFileCompressType::LZ4BLOCK;
@@ -62,7 +62,7 @@ void LoadUtil::parse_format(const std::string& format_str,
const std::string& co
*format_type = TFileFormatType::FORMAT_CSV_LZO;
} else if (iequal(compress_type_str, "BZ2")) {
*format_type = TFileFormatType::FORMAT_CSV_BZ2;
- } else if (iequal(compress_type_str, "LZ4")) {
+ } else if (iequal(compress_type_str, "LZ4") ||
iequal(compress_type_str, "LZ4FRAME")) {
*format_type = TFileFormatType::FORMAT_CSV_LZ4FRAME;
} else if (iequal(compress_type_str, "LZ4_BLOCK")) {
*format_type = TFileFormatType::FORMAT_CSV_LZ4BLOCK;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/Util.java
b/fe/fe-core/src/main/java/org/apache/doris/common/util/Util.java
index 2331070dbe4..dcde71a7a6a 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/util/Util.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/Util.java
@@ -616,6 +616,10 @@ public class Util {
}
final String upperCaseType = compressType.toUpperCase();
try {
+ // for compatibility, convert lz4 to lz4frame
+ if (upperCaseType.equals("LZ4")) {
+ return TFileCompressType.LZ4FRAME;
+ }
return TFileCompressType.valueOf(upperCaseType);
} catch (IllegalArgumentException e) {
throw new AnalysisException("Unknown compression type: " +
compressType);
diff --git a/regression-test/data/load_p0/stream_load/test_compress.csv.lz4
b/regression-test/data/load_p0/stream_load/test_compress.csv.lz4
index 76955306d8a..ac4369beb9f 100644
Binary files a/regression-test/data/load_p0/stream_load/test_compress.csv.lz4
and b/regression-test/data/load_p0/stream_load/test_compress.csv.lz4 differ
diff --git a/regression-test/data/load_p0/tvf/test_tvf_lz4_compress.out
b/regression-test/data/load_p0/tvf/test_tvf_lz4_compress.out
new file mode 100644
index 00000000000..0eca01b393d
--- /dev/null
+++ b/regression-test/data/load_p0/tvf/test_tvf_lz4_compress.out
@@ -0,0 +1,21 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !tvf_lz4 --
+10
+
+-- !tvf_lz4_data --
+1 2
+3 4
+5 6
+7 8
+9 10
+
+-- !tvf_lz4frame --
+10
+
+-- !tvf_lz4frame_data --
+1 2
+3 4
+5 6
+7 8
+9 10
+
diff --git a/regression-test/suites/load_p0/tvf/test_tvf_lz4_compress.groovy
b/regression-test/suites/load_p0/tvf/test_tvf_lz4_compress.groovy
new file mode 100644
index 00000000000..805607d79cb
--- /dev/null
+++ b/regression-test/suites/load_p0/tvf/test_tvf_lz4_compress.groovy
@@ -0,0 +1,233 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+suite("test_tvf_lz4_compress") {
+ def s3BucketName = getS3BucketName()
+ def s3Endpoint = getS3Endpoint()
+ def s3Region = getS3Region()
+ def ak = getS3AK()
+ def sk = getS3SK()
+
+ // stream load test 'lz4' and 'lz4frame'
+ /* test_compress.csv.lz4
+ 1,2
+ 3,4
+ 5,6
+ 7,8
+ 9,10
+ 11,12
+ 13,14
+ 15,16
+ 17,18
+ 19,20
+ */
+ try {
+ sql """
+ CREATE TABLE IF NOT EXISTS test_table1 (
+ a INT,
+ b INT
+ ) ENGINE=OLAP
+ DUPLICATE KEY(a)
+ DISTRIBUTED BY RANDOM BUCKETS 10
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1"
+ );
+ """
+
+ // stream load use 'lz4'
+ streamLoad {
+ table "test_table1"
+ set 'format', 'csv'
+ set 'column_separator', ','
+ set 'columns', 'a,b'
+ set 'compress_type', 'lz4'
+ file '../stream_load/test_compress.csv.lz4'
+ time 10000
+
+ check { result, exception, startTime, endTime ->
+ if (exception != null) {
+ throw exception
+ }
+ log.info("Stream load result: ${result}".toString())
+ def json = parseJson(result)
+ assertEquals("success", json.Status.toLowerCase())
+ assertEquals(json.NumberLoadedRows, 10)
+ }
+ }
+ sql """ truncate table test_table1; """
+ // stream load use 'lz4frame'
+ streamLoad {
+ table "test_table1"
+ set 'format', 'csv'
+ set 'column_separator', ','
+ set 'columns', 'a,b'
+ set 'compress_type', 'lz4frame'
+ file '../stream_load/test_compress.csv.lz4'
+ time 10000
+
+ check { result, exception, startTime, endTime ->
+ if (exception != null) {
+ throw exception
+ }
+ log.info("Stream load result: ${result}".toString())
+ def json = parseJson(result)
+ assertEquals("success", json.Status.toLowerCase())
+ assertEquals(json.NumberLoadedRows, 10)
+ }
+ }
+ } finally {
+ try_sql("DROP TABLE IF EXISTS test_table1")
+ }
+
+ // with S3 load test
+ try {
+ sql """
+ CREATE TABLE IF NOT EXISTS test_table2 (
+ a INT,
+ b INT
+ ) ENGINE=OLAP
+ DUPLICATE KEY(a)
+ DISTRIBUTED BY RANDOM BUCKETS 10
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1"
+ );
+ """
+
+ // S3 load use 'lz4'
+ def label1 = "test_s3_load_lz4_" + System.currentTimeMillis()
+ sql """
+ LOAD LABEL ${label1} (
+ DATA INFILE("s3://${s3BucketName}/load/tvf_compress.csv.lz4")
+ INTO TABLE test_table2
+ COLUMNS TERMINATED BY ","
+ FORMAT AS "csv"
+ (a, b)
+ )
+ WITH S3 (
+ "AWS_ACCESS_KEY" = "${ak}",
+ "AWS_SECRET_KEY" = "${sk}",
+ "AWS_ENDPOINT" = "${s3Endpoint}",
+ "AWS_REGION" = "${s3Region}",
+ "compress_type" = "lz4"
+ )
+ """
+
+ def max_try_milli_secs = 60000
+ while (max_try_milli_secs > 0) {
+ def count = sql """ select * from test_table2; """
+ if (count.size() == 10) {
+ break
+ }
+ Thread.sleep(1000)
+ max_try_milli_secs -= 1000
+ if (max_try_milli_secs <= 0) {
+ assertTrue(false, "S3 load timeout: ${label1}")
+ }
+ }
+
+ sql """ truncate table test_table2; """
+
+ // S3 load use 'lz4frame'
+ def label2 = "test_s3_load_lz4frame_" + System.currentTimeMillis()
+ sql """
+ LOAD LABEL ${label2} (
+ DATA INFILE("s3://${s3BucketName}/load/tvf_compress.csv.lz4")
+ INTO TABLE test_table2
+ COLUMNS TERMINATED BY ","
+ FORMAT AS "csv"
+ (a, b)
+ )
+ WITH S3 (
+ "AWS_ACCESS_KEY" = "${ak}",
+ "AWS_SECRET_KEY" = "${sk}",
+ "AWS_ENDPOINT" = "${s3Endpoint}",
+ "AWS_REGION" = "${s3Region}",
+ "compress_type" = "lz4frame"
+ )
+ """
+
+ while (max_try_milli_secs > 0) {
+ def count = sql """ select * from test_table2; """
+ if (count.size() == 10) {
+ break
+ }
+ Thread.sleep(1000)
+ max_try_milli_secs -= 1000
+ if (max_try_milli_secs <= 0) {
+ assertTrue(false, "S3 load timeout: ${label2}")
+ }
+ }
+ } finally {
+ try_sql("DROP TABLE IF EXISTS test_table2")
+ }
+
+ // tvf s3 load test
+ try {
+ sql """
+ CREATE TABLE IF NOT EXISTS test_table3 (
+ a INT,
+ b INT
+ ) ENGINE=OLAP
+ DUPLICATE KEY(a)
+ DISTRIBUTED BY RANDOM BUCKETS 10
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1"
+ );
+ """
+
+ // TVF S3 load use 'lz4'
+ sql """
+ INSERT INTO test_table3
+ SELECT CAST(split_part(c1, ',', 1) AS INT) AS a,
CAST(split_part(c1, ',', 2) AS INT) AS b FROM S3 (
+ "uri" = "s3://${s3BucketName}/load/tvf_compress.csv.lz4",
+ "s3.access_key" = "${ak}",
+ "s3.secret_key" = "${sk}",
+ "s3.endpoint" = "${s3Endpoint}",
+ "s3.region" = "${s3Region}",
+ "format" = "csv",
+ "compress_type" = "lz4"
+ )
+ """
+
+ qt_tvf_lz4 """ SELECT count(*) FROM test_table3; """
+ qt_tvf_lz4_data """ SELECT * FROM test_table3 ORDER BY a LIMIT 5; """
+
+ sql """ truncate table test_table3; """
+
+ // TVF S3 load use 'lz4frame'
+ sql """
+ INSERT INTO test_table3
+ SELECT CAST(split_part(c1, ',', 1) AS INT) AS a,
CAST(split_part(c1, ',', 2) AS INT) AS b FROM S3 (
+ "uri" = "s3://${s3BucketName}/load/tvf_compress.csv.lz4",
+ "s3.access_key" = "${ak}",
+ "s3.secret_key" = "${sk}",
+ "s3.endpoint" = "${s3Endpoint}",
+ "s3.region" = "${s3Region}",
+ "format" = "csv",
+ "compress_type" = "lz4frame"
+ )
+ """
+
+ qt_tvf_lz4frame """ SELECT count(*) FROM test_table3; """
+ qt_tvf_lz4frame_data """ SELECT * FROM test_table3 ORDER BY a LIMIT 5;
"""
+
+ } finally {
+ try_sql("DROP TABLE IF EXISTS test_table3")
+ }
+
+}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]