(doris) branch branch-4.0 updated: branch-4.0: [typo](tvf & decompression) add case "lz4" -> "lz4frame" #56316 (#56593)

yiguolei Sat, 18 Oct 2025 03:39:21 -0700

This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git



The following commit(s) were added to refs/heads/branch-4.0 by this push:
     new 0432aabd59d branch-4.0: [typo](tvf & decompression) add case "lz4" -> 
"lz4frame" #56316 (#56593)
0432aabd59d is described below

commit 0432aabd59d8dad535ab9c22aefe2ab2bc6b6887
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Mon Sep 29 09:55:48 2025 +0800

    branch-4.0: [typo](tvf & decompression) add case "lz4" -> "lz4frame" #56316 
(#56593)
    
    Cherry-picked from #56316
    
    Co-authored-by: Refrain <[email protected]>
---
 be/src/util/load_util.cpp                          |   4 +-
 .../java/org/apache/doris/common/util/Util.java    |   4 +
 .../data/load_p0/stream_load/test_compress.csv.lz4 | Bin 48 -> 80 bytes
 .../data/load_p0/tvf/test_tvf_lz4_compress.out     |  21 ++
 .../load_p0/tvf/test_tvf_lz4_compress.groovy       | 233 +++++++++++++++++++++
 5 files changed, 260 insertions(+), 2 deletions(-)

diff --git a/be/src/util/load_util.cpp b/be/src/util/load_util.cpp
index da60e4ce725..ab6e3e887f8 100644
--- a/be/src/util/load_util.cpp
+++ b/be/src/util/load_util.cpp
@@ -37,7 +37,7 @@ void LoadUtil::parse_format(const std::string& format_str, 
const std::string& co
         *compress_type = TFileCompressType::LZO;
     } else if (iequal(compress_type_str, "BZ2")) {
         *compress_type = TFileCompressType::BZ2;
-    } else if (iequal(compress_type_str, "LZ4")) {
+    } else if (iequal(compress_type_str, "LZ4") || iequal(compress_type_str, 
"LZ4FRAME")) {
         *compress_type = TFileCompressType::LZ4FRAME;
     } else if (iequal(compress_type_str, "LZ4_BLOCK")) {
         *compress_type = TFileCompressType::LZ4BLOCK;
@@ -62,7 +62,7 @@ void LoadUtil::parse_format(const std::string& format_str, 
const std::string& co
             *format_type = TFileFormatType::FORMAT_CSV_LZO;
         } else if (iequal(compress_type_str, "BZ2")) {
             *format_type = TFileFormatType::FORMAT_CSV_BZ2;
-        } else if (iequal(compress_type_str, "LZ4")) {
+        } else if (iequal(compress_type_str, "LZ4") || 
iequal(compress_type_str, "LZ4FRAME")) {
             *format_type = TFileFormatType::FORMAT_CSV_LZ4FRAME;
         } else if (iequal(compress_type_str, "LZ4_BLOCK")) {
             *format_type = TFileFormatType::FORMAT_CSV_LZ4BLOCK;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/Util.java 
b/fe/fe-core/src/main/java/org/apache/doris/common/util/Util.java
index 2331070dbe4..dcde71a7a6a 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/util/Util.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/Util.java
@@ -616,6 +616,10 @@ public class Util {
         }
         final String upperCaseType = compressType.toUpperCase();
         try {
+            // for compatibility, convert lz4 to lz4frame
+            if (upperCaseType.equals("LZ4")) {
+                return TFileCompressType.LZ4FRAME;
+            }
             return TFileCompressType.valueOf(upperCaseType);
         } catch (IllegalArgumentException e) {
             throw new AnalysisException("Unknown compression type: " + 
compressType);
diff --git a/regression-test/data/load_p0/stream_load/test_compress.csv.lz4 
b/regression-test/data/load_p0/stream_load/test_compress.csv.lz4
index 76955306d8a..ac4369beb9f 100644
Binary files a/regression-test/data/load_p0/stream_load/test_compress.csv.lz4 
and b/regression-test/data/load_p0/stream_load/test_compress.csv.lz4 differ
diff --git a/regression-test/data/load_p0/tvf/test_tvf_lz4_compress.out 
b/regression-test/data/load_p0/tvf/test_tvf_lz4_compress.out
new file mode 100644
index 00000000000..0eca01b393d
--- /dev/null
+++ b/regression-test/data/load_p0/tvf/test_tvf_lz4_compress.out
@@ -0,0 +1,21 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !tvf_lz4 --
+10
+
+-- !tvf_lz4_data --
+1      2
+3      4
+5      6
+7      8
+9      10
+
+-- !tvf_lz4frame --
+10
+
+-- !tvf_lz4frame_data --
+1      2
+3      4
+5      6
+7      8
+9      10
+
diff --git a/regression-test/suites/load_p0/tvf/test_tvf_lz4_compress.groovy 
b/regression-test/suites/load_p0/tvf/test_tvf_lz4_compress.groovy
new file mode 100644
index 00000000000..805607d79cb
--- /dev/null
+++ b/regression-test/suites/load_p0/tvf/test_tvf_lz4_compress.groovy
@@ -0,0 +1,233 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+suite("test_tvf_lz4_compress") {
+    def s3BucketName = getS3BucketName()
+    def s3Endpoint = getS3Endpoint()
+    def s3Region = getS3Region()
+    def ak = getS3AK()
+    def sk = getS3SK()
+
+    // stream load test 'lz4' and 'lz4frame'
+    /* test_compress.csv.lz4
+        1,2
+        3,4
+        5,6
+        7,8
+        9,10
+        11,12
+        13,14
+        15,16
+        17,18
+        19,20
+    */
+    try {
+        sql """
+            CREATE TABLE IF NOT EXISTS test_table1 (
+                a INT,
+                b INT
+            ) ENGINE=OLAP
+            DUPLICATE KEY(a)
+            DISTRIBUTED BY RANDOM BUCKETS 10
+            PROPERTIES (
+                "replication_allocation" = "tag.location.default: 1"
+            );
+            """
+
+        // stream load use 'lz4'
+        streamLoad {
+            table "test_table1"
+            set 'format', 'csv'
+            set 'column_separator', ','
+            set 'columns', 'a,b'
+            set 'compress_type', 'lz4'
+            file '../stream_load/test_compress.csv.lz4'
+            time 10000
+
+            check { result, exception, startTime, endTime ->
+                if (exception != null) {
+                    throw exception
+                }
+                log.info("Stream load result: ${result}".toString())
+                def json = parseJson(result)
+                assertEquals("success", json.Status.toLowerCase())
+                assertEquals(json.NumberLoadedRows, 10)
+            }
+        }
+        sql """ truncate table test_table1; """
+        // stream load use 'lz4frame'
+        streamLoad {
+            table "test_table1"
+            set 'format', 'csv'
+            set 'column_separator', ','
+            set 'columns', 'a,b'
+            set 'compress_type', 'lz4frame'
+            file '../stream_load/test_compress.csv.lz4'
+            time 10000
+
+            check { result, exception, startTime, endTime ->
+                if (exception != null) {
+                    throw exception
+                }
+                log.info("Stream load result: ${result}".toString())
+                def json = parseJson(result)
+                assertEquals("success", json.Status.toLowerCase())
+                assertEquals(json.NumberLoadedRows, 10)
+            }
+        }
+    } finally {
+        try_sql("DROP TABLE IF EXISTS test_table1")
+    }
+
+    // with S3 load test
+    try {
+        sql """
+            CREATE TABLE IF NOT EXISTS test_table2 (
+                a INT,
+                b INT
+            ) ENGINE=OLAP
+            DUPLICATE KEY(a)
+            DISTRIBUTED BY RANDOM BUCKETS 10
+            PROPERTIES (
+                "replication_allocation" = "tag.location.default: 1"
+            );
+            """
+
+        // S3 load use 'lz4'
+        def label1 = "test_s3_load_lz4_" + System.currentTimeMillis()
+        sql """
+            LOAD LABEL ${label1} (
+                DATA INFILE("s3://${s3BucketName}/load/tvf_compress.csv.lz4")
+                INTO TABLE test_table2
+                COLUMNS TERMINATED BY ","
+                FORMAT AS "csv"
+                (a, b)
+            )
+            WITH S3 (
+                "AWS_ACCESS_KEY" = "${ak}",
+                "AWS_SECRET_KEY" = "${sk}",
+                "AWS_ENDPOINT" = "${s3Endpoint}",
+                "AWS_REGION" = "${s3Region}",
+                "compress_type" = "lz4"
+            )
+            """
+
+        def max_try_milli_secs = 60000
+        while (max_try_milli_secs > 0) {
+            def count = sql """ select * from test_table2; """
+            if (count.size() == 10) {
+                break
+            }
+            Thread.sleep(1000)
+            max_try_milli_secs -= 1000
+            if (max_try_milli_secs <= 0) {
+                assertTrue(false, "S3 load timeout: ${label1}")
+            }
+        }
+
+        sql """ truncate table test_table2; """
+
+        // S3 load use 'lz4frame'
+        def label2 = "test_s3_load_lz4frame_" + System.currentTimeMillis()
+        sql """
+            LOAD LABEL ${label2} (
+                DATA INFILE("s3://${s3BucketName}/load/tvf_compress.csv.lz4")
+                INTO TABLE test_table2
+                COLUMNS TERMINATED BY ","
+                FORMAT AS "csv"
+                (a, b)
+            )
+            WITH S3 (
+                "AWS_ACCESS_KEY" = "${ak}",
+                "AWS_SECRET_KEY" = "${sk}",
+                "AWS_ENDPOINT" = "${s3Endpoint}",
+                "AWS_REGION" = "${s3Region}",
+                "compress_type" = "lz4frame"
+            )
+            """
+
+        while (max_try_milli_secs > 0) {
+            def count = sql """ select * from test_table2; """
+            if (count.size() == 10) {
+                break
+            }
+            Thread.sleep(1000)
+            max_try_milli_secs -= 1000
+            if (max_try_milli_secs <= 0) {
+                assertTrue(false, "S3 load timeout: ${label2}")
+            }
+        }
+    } finally {
+        try_sql("DROP TABLE IF EXISTS test_table2")
+    }
+
+    // tvf s3 load test
+    try {
+        sql """
+            CREATE TABLE IF NOT EXISTS test_table3 (
+                a INT,
+                b INT
+            ) ENGINE=OLAP
+            DUPLICATE KEY(a)
+            DISTRIBUTED BY RANDOM BUCKETS 10
+            PROPERTIES (
+                "replication_allocation" = "tag.location.default: 1"
+            );
+            """
+
+        // TVF S3 load use 'lz4'
+        sql """
+            INSERT INTO test_table3 
+            SELECT CAST(split_part(c1, ',', 1) AS INT) AS a, 
CAST(split_part(c1, ',', 2) AS INT) AS b FROM S3 (
+                "uri" = "s3://${s3BucketName}/load/tvf_compress.csv.lz4",
+                "s3.access_key" = "${ak}",
+                "s3.secret_key" = "${sk}",
+                "s3.endpoint" = "${s3Endpoint}",
+                "s3.region" = "${s3Region}",
+                "format" = "csv",
+                "compress_type" = "lz4"
+            )
+            """
+
+        qt_tvf_lz4 """ SELECT count(*) FROM test_table3; """
+        qt_tvf_lz4_data """ SELECT * FROM test_table3 ORDER BY a LIMIT 5; """
+
+        sql """ truncate table test_table3; """
+
+        // TVF S3 load use 'lz4frame'
+        sql """
+            INSERT INTO test_table3 
+            SELECT CAST(split_part(c1, ',', 1) AS INT) AS a, 
CAST(split_part(c1, ',', 2) AS INT) AS b FROM S3 (
+                "uri" = "s3://${s3BucketName}/load/tvf_compress.csv.lz4",
+                "s3.access_key" = "${ak}",
+                "s3.secret_key" = "${sk}",
+                "s3.endpoint" = "${s3Endpoint}",
+                "s3.region" = "${s3Region}",
+                "format" = "csv",
+                "compress_type" = "lz4frame"
+            )
+            """
+
+        qt_tvf_lz4frame """ SELECT count(*) FROM test_table3; """
+        qt_tvf_lz4frame_data """ SELECT * FROM test_table3 ORDER BY a LIMIT 5; 
"""
+
+    } finally {
+        try_sql("DROP TABLE IF EXISTS test_table3")
+    }
+
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(doris) branch branch-4.0 updated: branch-4.0: [typo](tvf & decompression) add case "lz4" -> "lz4frame" #56316 (#56593)

Reply via email to