This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 6622f506bac [fix](paimon)Handle oversized CHAR/VARCHAR fields in
Paimon to Doris type mapping (#55051)
6622f506bac is described below
commit 6622f506bacec14ee1b9fed537c458914230af46
Author: Petrichor <[email protected]>
AuthorDate: Fri Aug 29 05:17:38 2025 +0800
[fix](paimon)Handle oversized CHAR/VARCHAR fields in Paimon to Doris type
mapping (#55051)
### What problem does this PR solve?
In PR https://github.com/apache/doris/pull/49623, we implemented
conversion from Paimon `VARCHAR/CHAR` types to Doris `VARCHAR/CHAR`
types. However, there are significant differences in the maximum length
constraints between these systems:
**Apache Paimon:**
- `CHAR` : Fixed-length character string declared using CHAR(n) where n
is the number of code points. n must have a value between `1` and
`2,147,483,647` (inclusive). Defaults to n=1 if no length is specified.
- `VARCHAR`: Variable-length character string declared using VARCHAR(n)
where n is the maximum number of code points. n must have a value
between `1` and `2,147,483,647` (inclusive). Defaults to n=1 if no
length is specified.
**Apache Doris:**
- `CHAR `: Maximum length is `255` characters
- `VARCHAR` : Maximum length is `65,533` characters
**Solution:**
This PR addresses the length constraint mismatch by automatically
converting oversized Paimon VARCHAR/CHAR types to Doris STRING type when
they exceed Doris limits:
- Paimon `VARCHAR` with length > 65,533 → Doris `STRING`
- Paimon `CHAR` with length > 255 → Doris `STRING`
This ensures compatibility while preserving data integrity during type
mapping from Paimon to Doris.
---
.../create_preinstalled_scripts/paimon/run03.sql | 11 +++++++----
.../apache/doris/datasource/paimon/PaimonUtil.java | 12 ++++++++++--
.../external_table_p0/paimon/paimon_system_table.out | Bin 5130 -> 4905 bytes
.../paimon/test_paimon_char_varchar_type.out | Bin 282 -> 318 bytes
.../paimon/test_paimon_schema_change.out | Bin 4751 -> 4601 bytes
5 files changed, 17 insertions(+), 6 deletions(-)
diff --git
a/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/paimon/run03.sql
b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/paimon/run03.sql
index bd8d673636c..bee50ea7e0b 100644
---
a/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/paimon/run03.sql
+++
b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/paimon/run03.sql
@@ -8,8 +8,11 @@ drop table if exists test_varchar_char_type;
create table test_varchar_char_type (
c1 int,
c2 char(1),
- c3 char(2147483647),
- c4 varchar(1),
- c6 varchar(2147483646),
- c5 varchar(2147483647)
+ c3 char(254),
+ c4 char(2147483647),
+ c5 varchar(1),
+ c6 varchar(65533),
+ c7 varchar(2147483646),
+ c8 varchar(2147483647),
+ c9 string
);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonUtil.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonUtil.java
index 87792393294..f319d9cad28 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonUtil.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonUtil.java
@@ -225,9 +225,17 @@ public class PaimonUtil {
case TINYINT:
return Type.TINYINT;
case VARCHAR:
- return ScalarType.createVarcharType(((VarCharType)
dataType).getLength());
+ int varcharLen = ((VarCharType) dataType).getLength();
+ if (varcharLen > 65533) {
+ return ScalarType.createStringType();
+ }
+ return ScalarType.createVarcharType(varcharLen);
case CHAR:
- return ScalarType.createCharType(((CharType)
dataType).getLength());
+ int charLen = ((CharType) dataType).getLength();
+ if (charLen > 255) {
+ return ScalarType.createStringType();
+ }
+ return ScalarType.createCharType(charLen);
case BINARY:
case VARBINARY:
return Type.STRING;
diff --git
a/regression-test/data/external_table_p0/paimon/paimon_system_table.out
b/regression-test/data/external_table_p0/paimon/paimon_system_table.out
index 3bfc38f8bd1..c9877a46812 100644
Binary files
a/regression-test/data/external_table_p0/paimon/paimon_system_table.out and
b/regression-test/data/external_table_p0/paimon/paimon_system_table.out differ
diff --git
a/regression-test/data/external_table_p0/paimon/test_paimon_char_varchar_type.out
b/regression-test/data/external_table_p0/paimon/test_paimon_char_varchar_type.out
index 80fca7ed402..e7dcf54571a 100644
Binary files
a/regression-test/data/external_table_p0/paimon/test_paimon_char_varchar_type.out
and
b/regression-test/data/external_table_p0/paimon/test_paimon_char_varchar_type.out
differ
diff --git
a/regression-test/data/external_table_p0/paimon/test_paimon_schema_change.out
b/regression-test/data/external_table_p0/paimon/test_paimon_schema_change.out
index fed740a8803..5d33ed9f7e6 100644
Binary files
a/regression-test/data/external_table_p0/paimon/test_paimon_schema_change.out
and
b/regression-test/data/external_table_p0/paimon/test_paimon_schema_change.out
differ
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]