This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new b210d1d338e branch-3.1: [fix](iceberg) fix insert iceberg transform
partition table problems and add some cases #54557 (#54723)
b210d1d338e is described below
commit b210d1d338efdea2a76bf4f05e1cbe7917c841d0
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Fri Aug 15 17:39:27 2025 +0800
branch-3.1: [fix](iceberg) fix insert iceberg transform partition table
problems and add some cases #54557 (#54723)
Cherry-picked from #54557
Co-authored-by: Socrates <[email protected]>
---
.../sink/writer/iceberg/partition_transformers.h | 15 +-
.../create_preinstalled_scripts/iceberg/run19.sql | 365 +++++++++++++++++++++
.../datasource/iceberg/source/IcebergScanNode.java | 3 +
..._runtime_filter_partition_pruning_transform.out | Bin 0 -> 1319 bytes
.../iceberg/test_iceberg_transform_partitions.out | Bin 0 -> 1619 bytes
...ntime_filter_partition_pruning_transform.groovy | 215 ++++++++++++
.../test_iceberg_transform_partitions.groovy | 205 ++++++++++++
.../test_iceberg_write_transform_partitions.groovy | 70 ++++
8 files changed, 864 insertions(+), 9 deletions(-)
diff --git a/be/src/vec/sink/writer/iceberg/partition_transformers.h
b/be/src/vec/sink/writer/iceberg/partition_transformers.h
index 84ee3029cdd..9ceb21e0c5f 100644
--- a/be/src/vec/sink/writer/iceberg/partition_transformers.h
+++ b/be/src/vec/sink/writer/iceberg/partition_transformers.h
@@ -347,11 +347,10 @@ public:
auto col_res = ColumnDecimal<T>::create(vec_src.size(),
decimal_col->get_scale());
auto& vec_res = col_res->get_data();
- const typename T::NativeType* __restrict p_in =
- reinterpret_cast<const T::NativeType*>(vec_src.data());
- const typename T::NativeType* end_in =
+ const auto* __restrict p_in = reinterpret_cast<const
T::NativeType*>(vec_src.data());
+ const auto* end_in =
reinterpret_cast<const T::NativeType*>(vec_src.data()) +
vec_src.size();
- typename T::NativeType* __restrict p_out =
reinterpret_cast<T::NativeType*>(vec_res.data());
+ auto* __restrict p_out =
reinterpret_cast<T::NativeType*>(vec_res.data());
while (p_in < end_in) {
typename T::NativeType remainder = ((*p_in % _width) + _width) %
_width;
@@ -535,13 +534,11 @@ public:
auto col_res = ColumnInt32::create();
ColumnInt32::Container& out_data = col_res->get_data();
out_data.resize(in_data.size());
- auto& vec_res = col_res->get_data();
- const typename T::NativeType* __restrict p_in =
- reinterpret_cast<const T::NativeType*>(in_data.data());
- const typename T::NativeType* end_in =
+ const auto* __restrict p_in = reinterpret_cast<const
T::NativeType*>(in_data.data());
+ const auto* end_in =
reinterpret_cast<const T::NativeType*>(in_data.data()) +
in_data.size();
- typename T::NativeType* __restrict p_out =
reinterpret_cast<T::NativeType*>(vec_res.data());
+ Int32* __restrict p_out = out_data.data();
while (p_in < end_in) {
std::string buffer = BitUtil::IntToByteBuffer(*p_in);
diff --git
a/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run19.sql
b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run19.sql
new file mode 100644
index 00000000000..8bd75b2809f
--- /dev/null
+++
b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run19.sql
@@ -0,0 +1,365 @@
+create database if not exists transform_partition_db;
+
+use transform_partition_db;
+
+-- set time zone for deterministic timestamp partitioning
+SET TIME ZONE '+08:00';
+
+-- =============================================
+-- Bucket partition coverage across many types
+-- =============================================
+-- Bucket by INT but empty
+CREATE TABLE bucket_int_empty (
+ id BIGINT,
+ name STRING,
+ partition_key INT
+) USING ICEBERG PARTITIONED BY (bucket (4, partition_key));
+
+-- Bucket by INT
+CREATE TABLE bucket_int_4 (
+ id BIGINT,
+ name STRING,
+ partition_key INT
+) USING ICEBERG PARTITIONED BY (bucket (4, partition_key));
+
+CREATE TABLE bucket_int_4_copy AS
+SELECT * FROM bucket_int_4;
+
+INSERT INTO
+ bucket_int_4
+VALUES (1, 'n100', -100),
+ (2, 'n1', -1),
+ (3, 'z', 0),
+ (4, 'p1', 1),
+ (5, 'p2', 2),
+ (6, 'p9', 9),
+ (7, 'p16', 16),
+ (8, 'null', NULL);
+
+-- Bucket by BIGINT
+CREATE TABLE bucket_bigint_4 (
+ id BIGINT,
+ name STRING,
+ partition_key BIGINT
+) USING ICEBERG PARTITIONED BY (bucket (4, partition_key));
+
+CREATE TABLE bucket_bigint_4_copy AS
+SELECT * FROM bucket_bigint_4;
+
+INSERT INTO
+ bucket_bigint_4
+VALUES (
+ 1,
+ 'minish',
+ -9223372036854775808
+ ),
+ (2, 'large-', -1234567890123),
+ (3, 'neg1', -1),
+ (4, 'zero', 0),
+ (5, 'pos1', 1),
+ (6, 'large+', 1234567890123),
+ (
+ 7,
+ 'maxish',
+ 9223372036854775807
+ ),
+ (8, 'null', NULL);
+
+-- Bucket by STRING
+CREATE TABLE bucket_string_4 (
+ id BIGINT,
+ name STRING,
+ partition_key STRING
+) USING ICEBERG PARTITIONED BY (bucket (4, partition_key));
+
+CREATE TABLE bucket_string_4_copy AS
+SELECT * FROM bucket_string_4;
+
+INSERT INTO
+ bucket_string_4
+VALUES (1, 'empty', ''),
+ (2, 'a', 'a'),
+ (3, 'abc', 'abc'),
+ (4, 'unicode', '北'),
+ (5, 'emoji', '😊'),
+ (6, 'space', ' '),
+ (
+ 7,
+ 'long',
+ 'this is a relatively long string for hashing'
+ ),
+ (8, 'null', NULL);
+
+-- Bucket by DATE
+CREATE TABLE bucket_date_4 (
+ id BIGINT,
+ name STRING,
+ partition_key DATE
+) USING ICEBERG PARTITIONED BY (bucket (4, partition_key));
+
+CREATE TABLE bucket_date_4_copy AS
+SELECT * FROM bucket_date_4;
+
+INSERT INTO
+ bucket_date_4
+VALUES (1, 'd1', DATE '1970-01-01'),
+ (2, 'd2', DATE '1999-12-31'),
+ (3, 'd3', DATE '2000-01-01'),
+ (4, 'd4', DATE '2024-02-29'),
+ (5, 'd5', DATE '2024-03-01'),
+ (6, 'd6', DATE '2038-01-19'),
+ (7, 'null', NULL);
+
+-- Bucket by TIMESTAMP (with time zone semantics)
+CREATE TABLE bucket_timestamp_4 (
+ id BIGINT,
+ name STRING,
+ partition_key TIMESTAMP
+) USING ICEBERG PARTITIONED BY (bucket (4, partition_key));
+
+CREATE TABLE bucket_timestamp_4_copy AS
+SELECT * FROM bucket_timestamp_4;
+
+INSERT INTO
+ bucket_timestamp_4
+VALUES (
+ 1,
+ 't1',
+ TIMESTAMP '2024-01-15 08:00:00'
+ ),
+ (
+ 2,
+ 't2',
+ TIMESTAMP '2024-01-15 09:00:00'
+ ),
+ (
+ 3,
+ 't3',
+ TIMESTAMP '2024-06-30 23:59:59'
+ ),
+ (
+ 4,
+ 't4',
+ TIMESTAMP '1970-01-01 00:00:00'
+ ),
+ (
+ 5,
+ 't5',
+ TIMESTAMP '2030-12-31 23:59:59'
+ ),
+ (6, 'null', NULL);
+
+-- Bucket by TIMESTAMP_NTZ
+CREATE TABLE bucket_timestamp_ntz_4 (
+ id BIGINT,
+ name STRING,
+ partition_key TIMESTAMP_NTZ
+) USING ICEBERG PARTITIONED BY (bucket (4, partition_key));
+
+CREATE TABLE bucket_timestamp_ntz_4_copy AS
+SELECT * FROM bucket_timestamp_ntz_4;
+
+INSERT INTO
+ bucket_timestamp_ntz_4
+VALUES (
+ 1,
+ 'ntz1',
+ TIMESTAMP_NTZ '2024-01-15 08:00:00'
+ ),
+ (
+ 2,
+ 'ntz2',
+ TIMESTAMP_NTZ '2024-01-15 09:00:00'
+ ),
+ (
+ 3,
+ 'ntz3',
+ TIMESTAMP_NTZ '2024-06-30 23:59:59'
+ ),
+ (
+ 4,
+ 'ntz4',
+ TIMESTAMP_NTZ '1970-01-01 00:00:00'
+ ),
+ (
+ 5,
+ 'ntz5',
+ TIMESTAMP_NTZ '2030-12-31 23:59:59'
+ ),
+ (6, 'null', NULL);
+
+-- Bucket by DECIMAL
+CREATE TABLE bucket_decimal_4 (
+ id BIGINT,
+ name STRING,
+ partition_key DECIMAL(10, 2)
+) USING ICEBERG PARTITIONED BY (bucket (4, partition_key));
+
+CREATE TABLE bucket_decimal_4_copy AS
+SELECT * FROM bucket_decimal_4;
+
+INSERT INTO
+ bucket_decimal_4
+VALUES (1, 'p1', 0.00),
+ (2, 'p2', 1.00),
+ (3, 'p3', 10.50),
+ (4, 'n1', -1.25),
+ (5, 'n2', -10.50),
+ (6, 'big', 9999999.99),
+ (7, 'null', NULL);
+
+-- Bucket by BINARY
+CREATE TABLE bucket_binary_4 (
+ id BIGINT,
+ name STRING,
+ partition_key BINARY
+) USING ICEBERG PARTITIONED BY (bucket (4, partition_key));
+
+CREATE TABLE bucket_binary_4_copy AS
+SELECT * FROM bucket_binary_4;
+
+INSERT INTO
+ bucket_binary_4
+VALUES (1, 'b1', CAST('' AS BINARY)),
+ (2, 'b2', CAST('a' AS BINARY)),
+ (
+ 3,
+ 'b3',
+ CAST('abc' AS BINARY)
+ ),
+ (4, 'b4', CAST('你好' AS BINARY)),
+ (
+ 5,
+ 'b5',
+ CAST('01010101' AS BINARY)
+ ),
+ (6, 'null', NULL);
+
+-- =============================================
+-- Truncate partition coverage for supported types
+-- =============================================
+
+-- Truncate STRING to length 3
+CREATE TABLE truncate_string_3 (
+ id BIGINT,
+ name STRING,
+ partition_key STRING
+) USING ICEBERG PARTITIONED BY (
+ truncate (3, partition_key)
+);
+
+CREATE TABLE truncate_string_3_copy AS
+SELECT * FROM truncate_string_3;
+
+INSERT INTO
+ truncate_string_3
+VALUES (1, 'empty', ''),
+ (2, 'short', 'a'),
+ (3, 'two', 'ab'),
+ (4, 'three', 'abc'),
+ (5, 'long', 'abcdef'),
+ (6, 'unicode', '你好世界'),
+ (7, 'space', ' ab'),
+ (8, 'null', NULL);
+
+-- Truncate BINARY to length 4 bytes
+CREATE TABLE truncate_binary_4 (
+ id BIGINT,
+ name STRING,
+ partition_key BINARY
+) USING ICEBERG PARTITIONED BY (
+ truncate (4, partition_key)
+);
+
+CREATE TABLE truncate_binary_4_copy AS
+SELECT * FROM truncate_binary_4;
+
+INSERT INTO
+ truncate_binary_4
+VALUES (1, 'b0', CAST('' AS BINARY)),
+ (2, 'b1', CAST('a' AS BINARY)),
+ (
+ 3,
+ 'b2',
+ CAST('abcd' AS BINARY)
+ ),
+ (
+ 4,
+ 'b3',
+ CAST('abcdef' AS BINARY)
+ ),
+ (5, 'b4', CAST('你好' AS BINARY)),
+ (6, 'null', NULL);
+
+-- Truncate INT by width 10
+CREATE TABLE truncate_int_10 (
+ id BIGINT,
+ name STRING,
+ partition_key INT
+) USING ICEBERG PARTITIONED BY (
+ truncate (10, partition_key)
+);
+
+CREATE TABLE truncate_int_10_copy AS
+SELECT * FROM truncate_int_10;
+
+INSERT INTO
+ truncate_int_10
+VALUES (1, 'n23', -23),
+ (2, 'n1', -1),
+ (3, 'z', 0),
+ (4, 'p7', 7),
+ (5, 'p10', 10),
+ (6, 'p19', 19),
+ (7, 'p20', 20),
+ (8, 'p999', 999),
+ (9, 'null', NULL);
+
+-- Truncate BIGINT by width 100
+CREATE TABLE truncate_bigint_100 (
+ id BIGINT,
+ name STRING,
+ partition_key BIGINT
+) USING ICEBERG PARTITIONED BY (
+ truncate (100, partition_key)
+);
+
+CREATE TABLE truncate_bigint_100_copy AS
+SELECT * FROM truncate_bigint_100;
+
+INSERT INTO
+ truncate_bigint_100
+VALUES (1, 'n1001', -1001),
+ (2, 'n1', -1),
+ (3, 'z', 0),
+ (4, 'p7', 7),
+ (5, 'p100', 100),
+ (6, 'p199', 199),
+ (7, 'p200', 200),
+ (8, 'p10101', 10101),
+ (9, 'null', NULL);
+
+-- Truncate DECIMAL(10,2) by width 10
+CREATE TABLE truncate_decimal_10 (
+ id BIGINT,
+ name STRING,
+ partition_key DECIMAL(10, 2)
+) USING ICEBERG PARTITIONED BY (
+ truncate (10, partition_key)
+);
+
+CREATE TABLE truncate_decimal_10_copy AS
+SELECT * FROM truncate_decimal_10;
+
+INSERT INTO
+ truncate_decimal_10
+VALUES (1, 'z', 0.00),
+ (2, 'p1', 1.23),
+ (3, 'p9', 9.99),
+ (4, 'p10', 10.00),
+ (5, 'p19', 19.99),
+ (6, 'p20', 20.00),
+ (7, 'n1', -1.23),
+ (8, 'n20', -20.00),
+ (9, 'big', 9999999.99),
+ (10, 'null', NULL);
\ No newline at end of file
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java
index 729d2006d29..b2c67eca68a 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java
@@ -585,6 +585,9 @@ public class IcebergScanNode extends FileQueryScanNode {
}
private void assignCountToSplits(List<Split> splits, long totalCount) {
+ if (splits.isEmpty()) {
+ return;
+ }
int size = splits.size();
long countPerSplit = totalCount / size;
for (int i = 0; i < size - 1; i++) {
diff --git
a/regression-test/data/external_table_p0/iceberg/test_iceberg_runtime_filter_partition_pruning_transform.out
b/regression-test/data/external_table_p0/iceberg/test_iceberg_runtime_filter_partition_pruning_transform.out
new file mode 100644
index 00000000000..1c26361afa5
Binary files /dev/null and
b/regression-test/data/external_table_p0/iceberg/test_iceberg_runtime_filter_partition_pruning_transform.out
differ
diff --git
a/regression-test/data/external_table_p0/iceberg/test_iceberg_transform_partitions.out
b/regression-test/data/external_table_p0/iceberg/test_iceberg_transform_partitions.out
new file mode 100644
index 00000000000..b62eb888266
Binary files /dev/null and
b/regression-test/data/external_table_p0/iceberg/test_iceberg_transform_partitions.out
differ
diff --git
a/regression-test/suites/external_table_p0/iceberg/test_iceberg_runtime_filter_partition_pruning_transform.groovy
b/regression-test/suites/external_table_p0/iceberg/test_iceberg_runtime_filter_partition_pruning_transform.groovy
new file mode 100644
index 00000000000..63a723ebe9e
--- /dev/null
+++
b/regression-test/suites/external_table_p0/iceberg/test_iceberg_runtime_filter_partition_pruning_transform.groovy
@@ -0,0 +1,215 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_iceberg_runtime_filter_partition_pruning_transform",
"p0,external,doris,external_docker,external_docker_doris") {
+
+ String enabled = context.config.otherConfigs.get("enableIcebergTest")
+ if (enabled == null || !enabled.equalsIgnoreCase("true")) {
+ logger.info("disable iceberg test.")
+ return
+ }
+
+ String catalog_name =
"test_iceberg_runtime_filter_partition_pruning_transform"
+ String db_name = "transform_partition_db"
+ String rest_port = context.config.otherConfigs.get("iceberg_rest_uri_port")
+ String minio_port = context.config.otherConfigs.get("iceberg_minio_port")
+ String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
+ sql """drop catalog if exists ${catalog_name}"""
+ sql """
+ CREATE CATALOG ${catalog_name} PROPERTIES (
+ 'type'='iceberg',
+ 'iceberg.catalog.type'='rest',
+ 'uri' = 'http://${externalEnvIp}:${rest_port}',
+ "s3.access_key" = "admin",
+ "s3.secret_key" = "password",
+ "s3.endpoint" = "http://${externalEnvIp}:${minio_port}",
+ "s3.region" = "us-east-1"
+ );"""
+
+ sql """switch ${catalog_name}"""
+ sql """use ${db_name}"""
+
+ def test_runtime_filter_partition_pruning_transform = {
+ // Bucket partitions
+ qt_bucket_int_eq """
+ select count(*) from bucket_int_4 where partition_key =
+ (select partition_key from bucket_int_4
+ group by partition_key having count(*) > 0
+ order by partition_key desc limit 1);
+ """
+ qt_bucket_int_in """
+ select count(*) from bucket_int_4 where partition_key in
+ (select partition_key from bucket_int_4
+ group by partition_key having count(*) > 0
+ order by partition_key desc limit 2);
+ """
+
+ qt_bucket_bigint_eq """
+ select count(*) from bucket_bigint_4 where partition_key =
+ (select partition_key from bucket_bigint_4
+ group by partition_key having count(*) > 0
+ order by partition_key desc limit 1);
+ """
+ qt_bucket_bigint_in """
+ select count(*) from bucket_bigint_4 where partition_key in
+ (select partition_key from bucket_bigint_4
+ group by partition_key having count(*) > 0
+ order by partition_key desc limit 2);
+ """
+
+ qt_bucket_string_eq """
+ select count(*) from bucket_string_4 where partition_key =
+ (select partition_key from bucket_string_4
+ group by partition_key having count(*) > 0
+ order by partition_key desc limit 1);
+ """
+ qt_bucket_string_in """
+ select count(*) from bucket_string_4 where partition_key in
+ (select partition_key from bucket_string_4
+ group by partition_key having count(*) > 0
+ order by partition_key desc limit 2);
+ """
+
+ qt_bucket_date_eq """
+ select count(*) from bucket_date_4 where partition_key =
+ (select partition_key from bucket_date_4
+ group by partition_key having count(*) > 0
+ order by partition_key desc limit 1);
+ """
+ qt_bucket_date_in """
+ select count(*) from bucket_date_4 where partition_key in
+ (select partition_key from bucket_date_4
+ group by partition_key having count(*) > 0
+ order by partition_key desc limit 2);
+ """
+
+ qt_bucket_ts_eq """
+ select count(*) from bucket_timestamp_4 where partition_key =
+ (select partition_key from bucket_timestamp_4
+ group by partition_key having count(*) > 0
+ order by partition_key desc limit 1);
+ """
+ qt_bucket_ts_in """
+ select count(*) from bucket_timestamp_4 where partition_key in
+ (select partition_key from bucket_timestamp_4
+ group by partition_key having count(*) > 0
+ order by partition_key desc limit 2);
+ """
+
+ qt_bucket_tntz_eq """
+ select count(*) from bucket_timestamp_ntz_4 where partition_key =
+ (select partition_key from bucket_timestamp_ntz_4
+ group by partition_key having count(*) > 0
+ order by partition_key desc limit 1);
+ """
+ qt_bucket_tntz_in """
+ select count(*) from bucket_timestamp_ntz_4 where partition_key in
+ (select partition_key from bucket_timestamp_ntz_4
+ group by partition_key having count(*) > 0
+ order by partition_key desc limit 2);
+ """
+
+ qt_bucket_binary_eq """
+ select count(*) from bucket_binary_4 where partition_key =
+ (select partition_key from bucket_binary_4
+ group by partition_key having count(*) > 0
+ order by partition_key desc limit 1);
+ """
+ qt_bucket_binary_in """
+ select count(*) from bucket_binary_4 where partition_key in
+ (select partition_key from bucket_binary_4
+ group by partition_key having count(*) > 0
+ order by partition_key desc limit 2);
+ """
+
+ // Truncate partitions
+ qt_trunc_string_eq """
+ select count(*) from truncate_string_3 where partition_key =
+ (select partition_key from truncate_string_3
+ group by partition_key having count(*) > 0
+ order by partition_key desc limit 1);
+ """
+ qt_trunc_string_in """
+ select count(*) from truncate_string_3 where partition_key in
+ (select partition_key from truncate_string_3
+ group by partition_key having count(*) > 0
+ order by partition_key desc limit 2);
+ """
+
+ qt_trunc_binary_eq """
+ select count(*) from truncate_binary_4 where partition_key =
+ (select partition_key from truncate_binary_4
+ group by partition_key having count(*) > 0
+ order by partition_key desc limit 1);
+ """
+ qt_trunc_binary_in """
+ select count(*) from truncate_binary_4 where partition_key in
+ (select partition_key from truncate_binary_4
+ group by partition_key having count(*) > 0
+ order by partition_key desc limit 2);
+ """
+
+ qt_trunc_int_eq """
+ select count(*) from truncate_int_10 where partition_key =
+ (select partition_key from truncate_int_10
+ group by partition_key having count(*) > 0
+ order by partition_key desc limit 1);
+ """
+ qt_trunc_int_in """
+ select count(*) from truncate_int_10 where partition_key in
+ (select partition_key from truncate_int_10
+ group by partition_key having count(*) > 0
+ order by partition_key desc limit 2);
+ """
+
+ qt_trunc_bigint_eq """
+ select count(*) from truncate_bigint_100 where partition_key =
+ (select partition_key from truncate_bigint_100
+ group by partition_key having count(*) > 0
+ order by partition_key desc limit 1);
+ """
+ qt_trunc_bigint_in """
+ select count(*) from truncate_bigint_100 where partition_key in
+ (select partition_key from truncate_bigint_100
+ group by partition_key having count(*) > 0
+ order by partition_key desc limit 2);
+ """
+
+ qt_trunc_decimal_eq """
+ select count(*) from truncate_decimal_10 where partition_key =
+ (select partition_key from truncate_decimal_10
+ group by partition_key having count(*) > 0
+ order by partition_key desc limit 1);
+ """
+ qt_trunc_decimal_in """
+ select count(*) from truncate_decimal_10 where partition_key in
+ (select partition_key from truncate_decimal_10
+ group by partition_key having count(*) > 0
+ order by partition_key desc limit 2);
+ """
+ }
+ try {
+ sql """ set time_zone = 'Asia/Shanghai'; """
+ sql """ set enable_runtime_filter_partition_prune = false; """
+ test_runtime_filter_partition_pruning_transform()
+ sql """ set enable_runtime_filter_partition_prune = true; """
+ test_runtime_filter_partition_pruning_transform()
+ } finally {
+ sql """ unset variable time_zone; """
+ sql """ set enable_runtime_filter_partition_prune = true; """
+ }
+}
diff --git
a/regression-test/suites/external_table_p0/iceberg/test_iceberg_transform_partitions.groovy
b/regression-test/suites/external_table_p0/iceberg/test_iceberg_transform_partitions.groovy
new file mode 100644
index 00000000000..f6c6d778f46
--- /dev/null
+++
b/regression-test/suites/external_table_p0/iceberg/test_iceberg_transform_partitions.groovy
@@ -0,0 +1,205 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_iceberg_transform_partitions",
"p0,external,doris,external_docker,external_docker_doris") {
+
+ String enabled = context.config.otherConfigs.get("enableIcebergTest")
+ if (enabled == null || !enabled.equalsIgnoreCase("true")) {
+ logger.info("disable iceberg test.")
+ return
+ }
+
+ String catalog_name = "test_iceberg_transform_partitions"
+ String db_name = "transform_partition_db"
+ String rest_port = context.config.otherConfigs.get("iceberg_rest_uri_port")
+ String minio_port = context.config.otherConfigs.get("iceberg_minio_port")
+ String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
+ sql """drop catalog if exists ${catalog_name}"""
+ sql """
+ CREATE CATALOG ${catalog_name} PROPERTIES (
+ 'type'='iceberg',
+ 'iceberg.catalog.type'='rest',
+ 'uri' = 'http://${externalEnvIp}:${rest_port}',
+ "s3.access_key" = "admin",
+ "s3.secret_key" = "password",
+ "s3.endpoint" = "http://${externalEnvIp}:${minio_port}",
+ "s3.region" = "us-east-1"
+ );"""
+
+ sql """switch ${catalog_name}"""
+ sql """use ${db_name}"""
+
+ def test_iceberg_transform_partitions = {
+ // Bucket by INT (empty table)
+ qt_bucket_int_empty_cnt1 """
+ select count(*) from bucket_int_empty;
+ """
+ qt_bucket_int_empty_select1 """
+ select * from bucket_int_empty order by id;
+ """
+
+ // Bucket by INT
+ qt_bucket_int_4_select1 """
+ select * from bucket_int_4 where partition_key = 1 order by id;
+ """
+ qt_bucket_int_4_select2 """
+ select * from bucket_int_4 where partition_key in (2, 16) order by
id;
+ """
+ qt_bucket_int_4_cnt1 """
+ select count(*) from bucket_int_4 where partition_key = -100;
+ """
+
+ // Bucket by BIGINT
+ qt_bucket_bigint_4_cnt1 """
+ select count(*) from bucket_bigint_4 where partition_key = 1;
+ """
+ qt_bucket_bigint_4_cnt2 """
+ select count(*) from bucket_bigint_4 where partition_key in (-1,
1234567890123);
+ """
+ qt_bucket_bigint_4_select1 """
+ select * from bucket_bigint_4 where partition_key = 1 order by id;
+ """
+
+ // Bucket by STRING
+ qt_bucket_string_4_cnt1 """
+ select count(*) from bucket_string_4 where partition_key = 'abc';
+ """
+ qt_bucket_string_4_cnt2 """
+ select count(*) from bucket_string_4 where partition_key in ('',
'😊');
+ """
+ qt_bucket_string_4_select1 """
+ select * from bucket_string_4 where partition_key = 'abc' order by
id;
+ """
+
+ // Bucket by DATE
+ qt_bucket_date_4_cnt1 """
+ select count(*) from bucket_date_4 where partition_key = DATE
'2024-02-29';
+ """
+ qt_bucket_date_4_cnt2 """
+ select count(*) from bucket_date_4 where partition_key in (DATE
'1970-01-01', DATE '1999-12-31');
+ """
+ qt_bucket_date_4_select1 """
+ select * from bucket_date_4 where partition_key = DATE
'2024-02-29' order by id;
+ """
+
+ // Bucket by TIMESTAMP
+ qt_bucket_timestamp_4_cnt1 """
+ select count(*) from bucket_timestamp_4 where partition_key =
TIMESTAMP '2024-01-15 08:00:00';
+ """
+ qt_bucket_timestamp_4_cnt2 """
+ select count(*) from bucket_timestamp_4 where partition_key in
(TIMESTAMP '2024-06-30 23:59:59', TIMESTAMP '2030-12-31 23:59:59');
+ """
+ qt_bucket_timestamp_4_select1 """
+ select * from bucket_timestamp_4 where partition_key = TIMESTAMP
'2024-01-15 08:00:00' order by id;
+ """
+
+ // Bucket by TIMESTAMP_NTZ
+ qt_bucket_timestamp_ntz_4_cnt1 """
+ select count(*) from bucket_timestamp_ntz_4 where partition_key =
'2024-01-15 08:00:00';
+ """
+ qt_bucket_timestamp_ntz_4_cnt2 """
+ select count(*) from bucket_timestamp_ntz_4 where partition_key in
('2024-06-30 23:59:59', '2030-12-31 23:59:59');
+ """
+ qt_bucket_timestamp_ntz_4_select1 """
+ select * from bucket_timestamp_ntz_4 where partition_key =
'2024-01-15 08:00:00' order by id;
+ """
+
+ // Bucket by DECIMAL
+ qt_bucket_decimal_4_cnt1 """
+ select count(*) from bucket_decimal_4 where partition_key = 10.50;
+ """
+ qt_bucket_decimal_4_cnt2 """
+ select count(*) from bucket_decimal_4 where partition_key in
(-1.25, 9999999.99);
+ """
+ qt_bucket_decimal_4_select1 """
+ select * from bucket_decimal_4 where partition_key = 10.50 order
by id;
+ """
+
+ // Bucket by BINARY
+ qt_bucket_binary_4_cnt1 """
+ select count(*) from bucket_binary_4 where partition_key = 'abc';
+ """
+ qt_bucket_binary_4_cnt2 """
+ select count(*) from bucket_binary_4 where partition_key in ('',
'你好');
+ """
+ qt_bucket_binary_4_select1 """
+ select * from bucket_binary_4 where partition_key = 'abc' order by
id;
+ """
+
+ // Truncate STRING(3)
+ qt_truncate_string_3_cnt1 """
+ select count(*) from truncate_string_3 where partition_key =
'abcdef';
+ """
+ qt_truncate_string_3_cnt2 """
+ select count(*) from truncate_string_3 where partition_key in
('abc', 'abcdef');
+ """
+ qt_truncate_string_3_select1 """
+ select * from truncate_string_3 where partition_key = 'abcdef'
order by id;
+ """
+
+ // Truncate BINARY(4)
+ qt_truncate_binary_4_cnt1 """
+ select count(*) from truncate_binary_4 where partition_key =
'abcdef';
+ """
+ qt_truncate_binary_4_cnt2 """
+ select count(*) from truncate_binary_4 where partition_key in
('abcd', 'abcdef');
+ """
+ qt_truncate_binary_4_select1 """
+ select * from truncate_binary_4 where partition_key = 'abcdef'
order by id;
+ """
+
+ // Truncate INT(10)
+ qt_truncate_int_10_cnt1 """
+ select count(*) from truncate_int_10 where partition_key = 19;
+ """
+ qt_truncate_int_10_cnt2 """
+ select count(*) from truncate_int_10 where partition_key in (7,
19);
+ """
+ qt_truncate_int_10_select1 """
+ select * from truncate_int_10 where partition_key = 19 order by id;
+ """
+
+ // Truncate BIGINT(100)
+ qt_truncate_bigint_100_cnt1 """
+ select count(*) from truncate_bigint_100 where partition_key = 199;
+ """
+ qt_truncate_bigint_100_cnt2 """
+ select count(*) from truncate_bigint_100 where partition_key in
(7, 199);
+ """
+ qt_truncate_bigint_100_select1 """
+ select * from truncate_bigint_100 where partition_key = 199 order
by id;
+ """
+
+ // Truncate DECIMAL(10,2) width 10
+ qt_truncate_decimal_10_cnt1 """
+ select count(*) from truncate_decimal_10 where partition_key =
19.99;
+ """
+ qt_truncate_decimal_10_cnt2 """
+ select count(*) from truncate_decimal_10 where partition_key in
(9.99, 19.99);
+ """
+ qt_truncate_decimal_10_select1 """
+ select * from truncate_decimal_10 where partition_key = 19.99
order by id;
+ """
+ }
+
+ try {
+ sql """ set time_zone = 'Asia/Shanghai'; """
+ test_iceberg_transform_partitions()
+ } finally {
+ sql """ unset variable time_zone; """
+ }
+}
diff --git
a/regression-test/suites/external_table_p0/iceberg/write/test_iceberg_write_transform_partitions.groovy
b/regression-test/suites/external_table_p0/iceberg/write/test_iceberg_write_transform_partitions.groovy
new file mode 100644
index 00000000000..2662de4dfda
--- /dev/null
+++
b/regression-test/suites/external_table_p0/iceberg/write/test_iceberg_write_transform_partitions.groovy
@@ -0,0 +1,70 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_iceberg_write_transform_partitions",
"p0,external,iceberg,external_docker,external_docker_iceberg") {
+ String enabled = context.config.otherConfigs.get("enableIcebergTest")
+ if (enabled == null || !enabled.equalsIgnoreCase("true")) {
+ logger.info("disable iceberg test.")
+ return
+ }
+
+ String catalog_name = "test_iceberg_write_transform_partitions"
+ String db_name = "transform_partition_db"
+ String rest_port = context.config.otherConfigs.get("iceberg_rest_uri_port")
+ String minio_port = context.config.otherConfigs.get("iceberg_minio_port")
+ String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
+ sql """drop catalog if exists ${catalog_name}"""
+ sql """
+ CREATE CATALOG ${catalog_name} PROPERTIES (
+ 'type'='iceberg',
+ 'iceberg.catalog.type'='rest',
+ 'uri' = 'http://${externalEnvIp}:${rest_port}',
+ "s3.access_key" = "admin",
+ "s3.secret_key" = "password",
+ "s3.endpoint" = "http://${externalEnvIp}:${minio_port}",
+ "s3.region" = "us-east-1"
+ );"""
+
+ sql """switch ${catalog_name}"""
+ sql """use ${db_name}"""
+
+ // While we currently cannot reset the table data, we can only test
whether the INSERT statements execute successfully
+ def test_write_transform_partitions = { String table ->
+ sql """ insert into ${table}_copy select * from ${table}; """
+ sql """ insert into ${table}_copy select * from ${table}_copy limit
50; """
+ sql """ insert into ${table}_copy select * from ${table}_copy limit
50; """
+ }
+
+ try {
+ sql """ set time_zone = 'Asia/Shanghai'; """
+ test_write_transform_partitions("bucket_int_4");
+ test_write_transform_partitions("bucket_bigint_4");
+ test_write_transform_partitions("bucket_string_4");
+ test_write_transform_partitions("bucket_date_4");
+ test_write_transform_partitions("bucket_timestamp_4");
+ test_write_transform_partitions("bucket_timestamp_ntz_4");
+ test_write_transform_partitions("bucket_decimal_4");
+ test_write_transform_partitions("bucket_binary_4");
+ test_write_transform_partitions("truncate_string_3");
+ test_write_transform_partitions("truncate_binary_4");
+ test_write_transform_partitions("truncate_int_10");
+ test_write_transform_partitions("truncate_bigint_100");
+ test_write_transform_partitions("truncate_decimal_10");
+ } finally {
+ sql """ unset variable time_zone; """
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]