This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new e2e806a5e7 [improve](clickhouse jdbc) support clickhouse array type (#17993) e2e806a5e7 is described below commit e2e806a5e7fe6304597ed82a97584cf9215066df Author: yongkang.zhong <zhong...@qq.com> AuthorDate: Wed Mar 22 19:42:32 2023 +0800 [improve](clickhouse jdbc) support clickhouse array type (#17993) In this PR, I match the array type of ClickHouse to the array type of Doris's jdbc external. --- be/src/vec/exec/vjdbc_connector.cpp | 3 +- docs/en/docs/lakehouse/external-table/jdbc.md | 46 ++++++++++++---------- docs/en/docs/lakehouse/multi-catalog/jdbc.md | 37 ++++++++--------- docs/zh-CN/docs/lakehouse/external-table/jdbc.md | 44 +++++++++++---------- docs/zh-CN/docs/lakehouse/multi-catalog/jdbc.md | 37 ++++++++--------- .../org/apache/doris/external/jdbc/JdbcClient.java | 6 +++ .../java/org/apache/doris/udf/JdbcExecutor.java | 38 +++++++++++++++++- 7 files changed, 131 insertions(+), 80 deletions(-) diff --git a/be/src/vec/exec/vjdbc_connector.cpp b/be/src/vec/exec/vjdbc_connector.cpp index e9caf93647..226904588d 100644 --- a/be/src/vec/exec/vjdbc_connector.cpp +++ b/be/src/vec/exec/vjdbc_connector.cpp @@ -313,7 +313,8 @@ Status JdbcConnector::_check_type(SlotDescriptor* slot_desc, const std::string& break; } case TYPE_ARRAY: { - if (type_str != "java.sql.Array" && type_str != "java.lang.String") { + if (type_str != "java.sql.Array" && type_str != "java.lang.String" && + type_str != "java.lang.Object") { return Status::InternalError(error_msg); } if (!slot_desc->type().children[0].children.empty()) { diff --git a/docs/en/docs/lakehouse/external-table/jdbc.md b/docs/en/docs/lakehouse/external-table/jdbc.md index 5cfb8c1b9b..9d5d478e37 100644 --- a/docs/en/docs/lakehouse/external-table/jdbc.md +++ b/docs/en/docs/lakehouse/external-table/jdbc.md @@ -177,7 +177,8 @@ Test information on more versions will be provided in the future. | ClickHouse Version | ClickHouse JDBC Driver Version | | ------------------ | ------------------------------------- | -| 22 | clickhouse-jdbc-0.3.2-patch11-all.jar | +| 22 | clickhouse-jdbc-0.3.2-patch11-all.jar | +| 22 | clickhouse-jdbc-0.4.1-all.jar | #### 6.Sap_HanaTest @@ -275,29 +276,32 @@ The followings list how data types in different databases are mapped in Doris. ### ClickHouse -| ClickHouse | Doris | -| :--------: | :------: | -| BOOLEAN | BOOLEAN | -| CHAR | CHAR | -| VARCHAR | VARCHAR | -| STRING | STRING | -| DATE | DATE | -| Float32 | FLOAT | -| Float64 | DOUBLE | -| Int8 | TINYINT | -| Int16 | SMALLINT | -| Int32 | INT | -| Int64 | BIGINT | -| Int128 | LARGEINT | -| DATETIME | DATETIME | -| DECIMAL | DECIMAL | - -Note: - +| ClickHouse | Doris | +|:-------------------------------------------------------:|:------------------------:| +| Boolean | BOOLEAN | +| String | STRING | +| Date/Date32 | DATE/DATEV2 | +| DateTime/DateTime64 | DATETIME/DATETIMEV2 | +| Float32 | FLOAT | +| Float64 | DOUBLE | +| Int8 | TINYINT | +| Int16/UInt8 | SMALLINT | +| Int32/UInt16 | INT | +| Int64/Uint32 | BIGINT | +| Int128/UInt64 | LARGEINT | +| Int256/UInt128/UInt256 | STRING | +| Decimal | DECIMAL/DECIMALV3/STRING | +| Enum/IPv4/IPv6/UUID | STRING | +| <version since="dev" type="inline"> Array(T) </version> | ARRAY\<T\> | + + +**Note:** + +- <version since="dev" type="inline"> For Array types in ClickHouse, use Doris's Array type to match them. For basic types in an Array, see Basic type matching rules. Nested arrays are not supported. </version> - Some data types in ClickHouse, such as UUID, IPv4, IPv6, and Enum8, will be mapped to Varchar/String in Doris. IPv4 and IPv6 will be displayed with an `/` as a prefix. You can use the `split_part` function to remove the `/` . - The Point Geo type in ClickHouse cannot be mapped in Doris by far. -### SAP_HANA +### SAP HANA | SAP_HANA | Doris | |:------------:|:-------------------:| diff --git a/docs/en/docs/lakehouse/multi-catalog/jdbc.md b/docs/en/docs/lakehouse/multi-catalog/jdbc.md index c96db6df6e..758986dcac 100644 --- a/docs/en/docs/lakehouse/multi-catalog/jdbc.md +++ b/docs/en/docs/lakehouse/multi-catalog/jdbc.md @@ -329,23 +329,24 @@ The transaction mechanism ensures the atomicity of data writing to JDBC External ### Clickhouse -| ClickHouse Type | Doris Type | Comment | -| ---------------------- | ----------- | ------------------------------------------------------------ | -| Bool | BOOLEAN | | -| String | STRING | | -| Date/Date32 | DATE | | -| DateTime/DateTime64 | DATETIME | Data beyond the maximum precision of DateTime in Doris will be truncated. | -| Float32 | FLOAT | | -| Float64 | DOUBLE | | -| Int8 | TINYINT | | -| Int16/UInt8 | SMALLINT | Doris does not support UNSIGNED data types so UInt8 will be mapped to SMALLINT. | -| Int32/UInt16 | INT | Doris does not support UNSIGNED data types so UInt16 will be mapped to INT. | -| Int64/Uint32 | BIGINT | Doris does not support UNSIGNED data types so UInt32 will be mapped to BIGINT. | -| Int128/UInt64 | LARGEINT | Doris does not support UNSIGNED data types so UInt64 will be mapped to LARGEINT. | -| Int256/UInt128/UInt256 | STRING | Doris does not support data types of such orders of magnitude so these will be mapped to STRING. | -| DECIMAL | DECIMAL | Data beyond the maximum decimal precision in Doris will be truncated. | -| Enum/IPv4/IPv6/UUID | STRING | Data of IPv4 and IPv6 type will be displayed with an extra `/` as a prefix. To remove the `/`, you can use the `split_part`function. | -| Other | UNSUPPORTED | | +| ClickHouse Type | Doris Type | Comment | +|---------------------------------------------------------|--------------------------|--------------------------------------------------------------------------------------------------------------------------------------| +| Bool | BOOLEAN | | +| String | STRING | | +| Date/Date32 | DATEV2 | JDBC CATLOG uses Datev2 type default when connecting DORIS | +| DateTime/DateTime64 | DATETIMEV2 | JDBC CATLOG uses DateTimev2 type default when connecting DORIS | +| Float32 | FLOAT | | +| Float64 | DOUBLE | | +| Int8 | TINYINT | | +| Int16/UInt8 | SMALLINT | Doris does not support UNSIGNED data types so UInt8 will be mapped to SMALLINT. | +| Int32/UInt16 | INT | Doris does not support UNSIGNED data types so UInt16 will be mapped to INT. | +| Int64/Uint32 | BIGINT | Doris does not support UNSIGNED data types so UInt32 will be mapped to BIGINT. | +| Int128/UInt64 | LARGEINT | Doris does not support UNSIGNED data types so UInt64 will be mapped to LARGEINT. | +| Int256/UInt128/UInt256 | STRING | Doris does not support data types of such orders of magnitude so these will be mapped to STRING. | +| DECIMAL | DECIMAL/DECIMALV3/STRING | The Data type is based on the DECIMAL field's (precision, scale) and the `enable_decimal_conversion` configuration. | +| Enum/IPv4/IPv6/UUID | STRING | Data of IPv4 and IPv6 type will be displayed with an extra `/` as a prefix. To remove the `/`, you can use the `split_part`function. | +| <version since="dev" type="inline"> Array(T) </version> | ARRAY\<T\> | Array internal basic type adaptation logic refers to the preceding types. Nested types are not supported | +| Other | UNSUPPORTED | | ### Doris @@ -370,7 +371,7 @@ The transaction mechanism ensures the atomicity of data writing to JDBC External | TEXT | STRING | | |Other| UNSUPPORTED | -### SAP_HANA +### SAP HANA | SAP_HANA | Doris | Comment | |--------------|--------------------------|--------------------------------------------------------------------------------------------------------------------| diff --git a/docs/zh-CN/docs/lakehouse/external-table/jdbc.md b/docs/zh-CN/docs/lakehouse/external-table/jdbc.md index 0773d26ea7..4b04a1abec 100644 --- a/docs/zh-CN/docs/lakehouse/external-table/jdbc.md +++ b/docs/zh-CN/docs/lakehouse/external-table/jdbc.md @@ -169,9 +169,10 @@ PROPERTIES ( 目前只测试了这一个版本其他版本测试后补充 #### 5.ClickHouse测试 -| ClickHouse版本 | ClickHouse JDBC驱动版本 | -|----------| ------------------- | -| 22 | clickhouse-jdbc-0.3.2-patch11-all.jar | +| ClickHouse版本 | ClickHouse JDBC驱动版本 | +|--------------|---------------------------------------| +| 22 | clickhouse-jdbc-0.3.2-patch11-all.jar | +| 22 | clickhouse-jdbc-0.4.1-all.jar | #### 6.Sap_Hana测试 @@ -269,28 +270,31 @@ PROPERTIES ( ### ClickHouse -| ClickHouse | Doris | -|:----------:|:--------:| -| BOOLEAN | BOOLEAN | -| CHAR | CHAR | -| VARCHAR | VARCHAR | -| STRING | STRING | -| DATE | DATE | -| Float32 | FLOAT | -| Float64 | DOUBLE | -| Int8 | TINYINT | -| Int16 | SMALLINT | -| Int32 | INT | -| Int64 | BIGINT | -| Int128 | LARGEINT | -| DATETIME | DATETIME | -| DECIMAL | DECIMAL | +| ClickHouse | Doris | +|:--------------------------------------------------------:|:------------------------:| +| Boolean | BOOLEAN | +| String | STRING | +| Date/Date32 | DATE/DATEV2 | +| DateTime/DateTime64 | DATETIME/DATETIMEV2 | +| Float32 | FLOAT | +| Float64 | DOUBLE | +| Int8 | TINYINT | +| Int16/UInt8 | SMALLINT | +| Int32/UInt16 | INT | +| Int64/Uint32 | BIGINT | +| Int128/UInt64 | LARGEINT | +| Int256/UInt128/UInt256 | STRING | +| Decimal | DECIMAL/DECIMALV3/STRING | +| Enum/IPv4/IPv6/UUID | STRING | +| <version since="dev" type="inline"> Array(T) </version> | ARRAY\<T\> | **注意:** + +- <version since="dev" type="inline"> 对于ClickHouse里的Array类型,可用Doris的Array类型来匹配,Array内的基础类型匹配参考基础类型匹配规则即可,不支持嵌套Array </version> - 对于ClickHouse里的一些特殊类型,如UUID,IPv4,IPv6,Enum8可以用Doris的Varchar/String类型来匹配,但是在显示上IPv4,IPv6会额外在数据最前面显示一个`/`,需要自己用`split_part`函数处理 - 对于ClickHouse的Geo类型Point,无法进行匹配 -### SAP_HANA +### SAP HANA | SAP_HANA | Doris | |:------------:|:-------------------:| diff --git a/docs/zh-CN/docs/lakehouse/multi-catalog/jdbc.md b/docs/zh-CN/docs/lakehouse/multi-catalog/jdbc.md index 0494443355..a0d0b7d437 100644 --- a/docs/zh-CN/docs/lakehouse/multi-catalog/jdbc.md +++ b/docs/zh-CN/docs/lakehouse/multi-catalog/jdbc.md @@ -328,23 +328,24 @@ set enable_odbc_transcation = true; ### Clickhouse -| ClickHouse Type | Doris Type | Comment | -|------------------------|------------|-----------------------------------------------------| -| Bool | BOOLEAN | | -| String | STRING | | -| Date/Date32 | DATE | | -| DateTime/DateTime64 | DATETIME | 对于超过了Doris最大的DateTime精度的数据,将截断处理 | -| Float32 | FLOAT | | -| Float64 | DOUBLE | | -| Int8 | TINYINT | | -| Int16/UInt8 | SMALLINT | Doris没有UNSIGNED数据类型,所以扩大一个数量级 | -| Int32/UInt16 | INT | Doris没有UNSIGNED数据类型,所以扩大一个数量级 | -| Int64/Uint32 | BIGINT | Doris没有UNSIGNED数据类型,所以扩大一个数量级 | -| Int128/UInt64 | LARGEINT | Doris没有UNSIGNED数据类型,所以扩大一个数量级 | -| Int256/UInt128/UInt256 | STRING | Doris没有这个数量级的数据类型,采用STRING处理 | -| DECIMAL | DECIMAL | 对于超过了Doris最大的Decimal精度的数据,将映射为STRING | -| Enum/IPv4/IPv6/UUID | STRING | 在显示上IPv4,IPv6会额外在数据最前面显示一个`/`,需要自己用`split_part`函数处理 | -|Other| UNSUPPORTED | +| ClickHouse Type | Doris Type | Comment | +|---------------------------------------------------------|--------------------------|---------------------------------------------------| +| Bool | BOOLEAN | | +| String | STRING | | +| Date/Date32 | DATEV2 | Jdbc Catlog连接Doris时默认使用DATEV2类型 | +| DateTime/DateTime64 | DATETIMEV2 | Jdbc Catlog连接Doris时默认使用DATETIMEV2类型 | +| Float32 | FLOAT | | +| Float64 | DOUBLE | | +| Int8 | TINYINT | | +| Int16/UInt8 | SMALLINT | Doris没有UNSIGNED数据类型,所以扩大一个数量级 | +| Int32/UInt16 | INT | Doris没有UNSIGNED数据类型,所以扩大一个数量级 | +| Int64/Uint32 | BIGINT | Doris没有UNSIGNED数据类型,所以扩大一个数量级 | +| Int128/UInt64 | LARGEINT | Doris没有UNSIGNED数据类型,所以扩大一个数量级 | +| Int256/UInt128/UInt256 | STRING | Doris没有这个数量级的数据类型,采用STRING处理 | +| DECIMAL | DECIMAL/DECIMALV3/STRING | 将根据Doris DECIMAL字段的(precision, scale)和`enable_decimal_conversion`开关选择用何种类型| +| Enum/IPv4/IPv6/UUID | STRING | 在显示上IPv4,IPv6会额外在数据最前面显示一个`/`,需要自己用`split_part`函数处理 | +| <version since="dev" type="inline"> Array(T) </version> | ARRAY\<T\> | Array内部类型适配逻辑参考上述类型,不支持嵌套类型 | +| Other | UNSUPPORTED | | ### Doris @@ -369,7 +370,7 @@ set enable_odbc_transcation = true; | TEXT | STRING | | |Other| UNSUPPORTED | -### SAP_HANA +### SAP HANA | SAP_HANA | Doris | Comment | |--------------|--------------------------|---------------------------------------------------------------------------------------| diff --git a/fe/fe-core/src/main/java/org/apache/doris/external/jdbc/JdbcClient.java b/fe/fe-core/src/main/java/org/apache/doris/external/jdbc/JdbcClient.java index 1310a89f03..32ef14f9ce 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/external/jdbc/JdbcClient.java +++ b/fe/fe-core/src/main/java/org/apache/doris/external/jdbc/JdbcClient.java @@ -17,6 +17,7 @@ package org.apache.doris.external.jdbc; +import org.apache.doris.catalog.ArrayType; import org.apache.doris.catalog.Column; import org.apache.doris.catalog.JdbcResource; import org.apache.doris.catalog.PrimitiveType; @@ -578,6 +579,11 @@ public class JdbcClient { return ScalarType.createStringType(); } else if (ckType.startsWith("DateTime")) { return ScalarType.createDatetimeV2Type(0); + } else if (ckType.startsWith("Array")) { + String cktype = ckType.substring(6, ckType.length() - 1); + fieldSchema.setDataTypeName(cktype); + Type type = clickhouseTypeToDoris(fieldSchema); + return ArrayType.create(type, true); } switch (ckType) { case "Bool": diff --git a/fe/java-udf/src/main/java/org/apache/doris/udf/JdbcExecutor.java b/fe/java-udf/src/main/java/org/apache/doris/udf/JdbcExecutor.java index df30b8e1b5..9e9ea2cb8a 100644 --- a/fe/java-udf/src/main/java/org/apache/doris/udf/JdbcExecutor.java +++ b/fe/java-udf/src/main/java/org/apache/doris/udf/JdbcExecutor.java @@ -32,6 +32,8 @@ import java.lang.reflect.Array; import java.math.BigDecimal; import java.math.BigInteger; import java.math.RoundingMode; +import java.net.Inet4Address; +import java.net.Inet6Address; import java.net.MalformedURLException; import java.nio.charset.StandardCharsets; import java.sql.Connection; @@ -45,7 +47,11 @@ import java.time.LocalDate; import java.time.LocalDateTime; import java.util.ArrayList; import java.util.Arrays; +import java.util.HashMap; import java.util.List; +import java.util.Map; +import java.util.UUID; +import java.util.function.Function; public class JdbcExecutor { private static final Logger LOG = Logger.getLogger(JdbcExecutor.class); @@ -197,9 +203,37 @@ public class JdbcExecutor { Object[] columnData = (Object[]) obj; if (columnData[idx] instanceof String) { return (String) columnData[idx]; - } else { + } else if (columnData[idx] instanceof java.sql.Array) { return (java.sql.Array) columnData[idx]; - } + } else { + //For the ClickHouse array type, we need the concatenated string after toString + return convertClickHouseArray(columnData[idx]); + } + } + + private static final Map<Class<?>, Function<Object, String>> CK_ARRAY_CONVERTERS = new HashMap<>(); + + static { + CK_ARRAY_CONVERTERS.put(String[].class, res -> Arrays.toString((String[]) res)); + CK_ARRAY_CONVERTERS.put(byte[].class, res -> Arrays.toString((byte[]) res)); + CK_ARRAY_CONVERTERS.put(Byte[].class, res -> Arrays.toString((Byte[]) res)); + CK_ARRAY_CONVERTERS.put(LocalDate[].class, res -> Arrays.toString((LocalDate[]) res)); + CK_ARRAY_CONVERTERS.put(LocalDateTime[].class, res -> Arrays.toString((LocalDateTime[]) res)); + CK_ARRAY_CONVERTERS.put(float[].class, res -> Arrays.toString((float[]) res)); + CK_ARRAY_CONVERTERS.put(double[].class, res -> Arrays.toString((double[]) res)); + CK_ARRAY_CONVERTERS.put(short[].class, res -> Arrays.toString((short[]) res)); + CK_ARRAY_CONVERTERS.put(int[].class, res -> Arrays.toString((int[]) res)); + CK_ARRAY_CONVERTERS.put(long[].class, res -> Arrays.toString((long[]) res)); + CK_ARRAY_CONVERTERS.put(BigInteger[].class, res -> Arrays.toString((BigInteger[]) res)); + CK_ARRAY_CONVERTERS.put(BigDecimal[].class, res -> Arrays.toString((BigDecimal[]) res)); + CK_ARRAY_CONVERTERS.put(Inet4Address[].class, res -> Arrays.toString((Inet4Address[]) res)); + CK_ARRAY_CONVERTERS.put(Inet6Address[].class, res -> Arrays.toString((Inet6Address[]) res)); + CK_ARRAY_CONVERTERS.put(UUID[].class, res -> Arrays.toString((UUID[]) res)); + } + + public static Object convertClickHouseArray(Object obj) { + Function<Object, String> converter = CK_ARRAY_CONVERTERS.get(obj.getClass()); + return converter != null ? converter.apply(obj) : obj; } private void init(String driverUrl, String sql, int batchSize, String driverClass, String jdbcUrl, String jdbcUser, --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org