This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new e2e806a5e7 [improve](clickhouse jdbc) support clickhouse array type 
(#17993)
e2e806a5e7 is described below

commit e2e806a5e7fe6304597ed82a97584cf9215066df
Author: yongkang.zhong <zhong...@qq.com>
AuthorDate: Wed Mar 22 19:42:32 2023 +0800

    [improve](clickhouse jdbc) support clickhouse array type (#17993)
    
    In this PR, I match the array type of ClickHouse to the array type of 
Doris's jdbc external.
---
 be/src/vec/exec/vjdbc_connector.cpp                |  3 +-
 docs/en/docs/lakehouse/external-table/jdbc.md      | 46 ++++++++++++----------
 docs/en/docs/lakehouse/multi-catalog/jdbc.md       | 37 ++++++++---------
 docs/zh-CN/docs/lakehouse/external-table/jdbc.md   | 44 +++++++++++----------
 docs/zh-CN/docs/lakehouse/multi-catalog/jdbc.md    | 37 ++++++++---------
 .../org/apache/doris/external/jdbc/JdbcClient.java |  6 +++
 .../java/org/apache/doris/udf/JdbcExecutor.java    | 38 +++++++++++++++++-
 7 files changed, 131 insertions(+), 80 deletions(-)

diff --git a/be/src/vec/exec/vjdbc_connector.cpp 
b/be/src/vec/exec/vjdbc_connector.cpp
index e9caf93647..226904588d 100644
--- a/be/src/vec/exec/vjdbc_connector.cpp
+++ b/be/src/vec/exec/vjdbc_connector.cpp
@@ -313,7 +313,8 @@ Status JdbcConnector::_check_type(SlotDescriptor* 
slot_desc, const std::string&
         break;
     }
     case TYPE_ARRAY: {
-        if (type_str != "java.sql.Array" && type_str != "java.lang.String") {
+        if (type_str != "java.sql.Array" && type_str != "java.lang.String" &&
+            type_str != "java.lang.Object") {
             return Status::InternalError(error_msg);
         }
         if (!slot_desc->type().children[0].children.empty()) {
diff --git a/docs/en/docs/lakehouse/external-table/jdbc.md 
b/docs/en/docs/lakehouse/external-table/jdbc.md
index 5cfb8c1b9b..9d5d478e37 100644
--- a/docs/en/docs/lakehouse/external-table/jdbc.md
+++ b/docs/en/docs/lakehouse/external-table/jdbc.md
@@ -177,7 +177,8 @@ Test information on more versions will be provided in the 
future.
 
 | ClickHouse Version | ClickHouse JDBC Driver Version        |
 | ------------------ | ------------------------------------- |
-| 22                 | clickhouse-jdbc-0.3.2-patch11-all.jar |
+| 22           | clickhouse-jdbc-0.3.2-patch11-all.jar |
+| 22           | clickhouse-jdbc-0.4.1-all.jar         |
 
 #### 6.Sap_HanaTest
 
@@ -275,29 +276,32 @@ The followings list how data types in different databases 
are mapped in Doris.
 
 ### ClickHouse
 
-| ClickHouse |  Doris   |
-| :--------: | :------: |
-|  BOOLEAN   | BOOLEAN  |
-|    CHAR    |   CHAR   |
-|  VARCHAR   | VARCHAR  |
-|   STRING   |  STRING  |
-|    DATE    |   DATE   |
-|  Float32   |  FLOAT   |
-|  Float64   |  DOUBLE  |
-|    Int8    | TINYINT  |
-|   Int16    | SMALLINT |
-|   Int32    |   INT    |
-|   Int64    |  BIGINT  |
-|   Int128   | LARGEINT |
-|  DATETIME  | DATETIME |
-|  DECIMAL   | DECIMAL  |
-
-Note:
-
+|                       ClickHouse                        |          Doris     
      |
+|:-------------------------------------------------------:|:------------------------:|
+|                         Boolean                         |         BOOLEAN    
      |
+|                         String                          |          STRING    
      |
+|                       Date/Date32                       |       DATE/DATEV2  
      |
+|                   DateTime/DateTime64                   |   
DATETIME/DATETIMEV2    |
+|                         Float32                         |          FLOAT     
      |
+|                         Float64                         |          DOUBLE    
      |
+|                          Int8                           |         TINYINT    
      |
+|                       Int16/UInt8                       |         SMALLINT   
      |
+|                      Int32/UInt16                       |           INT      
      |
+|                      Int64/Uint32                       |          BIGINT    
      |
+|                      Int128/UInt64                      |         LARGEINT   
      |
+|                 Int256/UInt128/UInt256                  |          STRING    
      |
+|                         Decimal                         | 
DECIMAL/DECIMALV3/STRING |
+|                   Enum/IPv4/IPv6/UUID                   |          STRING    
      |
+| <version since="dev" type="inline"> Array(T) </version> |        ARRAY\<T\>  
      |
+
+
+**Note:**
+
+- <version since="dev" type="inline"> For Array types in ClickHouse, use 
Doris's Array type to match them. For basic types in an Array, see Basic type 
matching rules. Nested arrays are not supported. </version>
 - Some data types in ClickHouse, such as UUID, IPv4, IPv6, and Enum8, will be 
mapped to Varchar/String in Doris. IPv4 and IPv6 will be displayed with an `/` 
as a prefix. You can use the `split_part` function to remove the `/` .
 - The Point Geo type in ClickHouse cannot be mapped in Doris by far. 
 
-### SAP_HANA
+### SAP HANA
 
 |   SAP_HANA   |        Doris        |
 |:------------:|:-------------------:|
diff --git a/docs/en/docs/lakehouse/multi-catalog/jdbc.md 
b/docs/en/docs/lakehouse/multi-catalog/jdbc.md
index c96db6df6e..758986dcac 100644
--- a/docs/en/docs/lakehouse/multi-catalog/jdbc.md
+++ b/docs/en/docs/lakehouse/multi-catalog/jdbc.md
@@ -329,23 +329,24 @@ The transaction mechanism ensures the atomicity of data 
writing to JDBC External
 
 ### Clickhouse
 
-| ClickHouse Type        | Doris Type  | Comment                               
                       |
-| ---------------------- | ----------- | 
------------------------------------------------------------ |
-| Bool                   | BOOLEAN     |                                       
                       |
-| String                 | STRING      |                                       
                       |
-| Date/Date32            | DATE        |                                       
                       |
-| DateTime/DateTime64    | DATETIME    | Data beyond the maximum precision of 
DateTime in Doris will be truncated. |
-| Float32                | FLOAT       |                                       
                       |
-| Float64                | DOUBLE      |                                       
                       |
-| Int8                   | TINYINT     |                                       
                       |
-| Int16/UInt8            | SMALLINT    | Doris does not support UNSIGNED data 
types so UInt8 will be mapped to SMALLINT. |
-| Int32/UInt16           | INT         | Doris does not support UNSIGNED data 
types so UInt16 will be mapped to INT. |
-| Int64/Uint32           | BIGINT      | Doris does not support UNSIGNED data 
types so UInt32 will be mapped to BIGINT. |
-| Int128/UInt64          | LARGEINT    | Doris does not support UNSIGNED data 
types so UInt64 will be mapped to LARGEINT. |
-| Int256/UInt128/UInt256 | STRING      | Doris does not support data types of 
such orders of magnitude so these will be mapped to STRING. |
-| DECIMAL                | DECIMAL     | Data beyond the maximum decimal 
precision in Doris will be truncated. |
-| Enum/IPv4/IPv6/UUID    | STRING      | Data of IPv4 and IPv6 type will be 
displayed with an extra `/` as a prefix. To remove the `/`, you can use the 
`split_part`function. |
-| Other                  | UNSUPPORTED |                                       
                       |
+| ClickHouse Type                                         | Doris Type         
      | Comment                                                                 
                                                             |
+|---------------------------------------------------------|--------------------------|--------------------------------------------------------------------------------------------------------------------------------------|
+| Bool                                                    | BOOLEAN            
      |                                                                         
                                                             |
+| String                                                  | STRING             
      |                                                                         
                                                             |
+| Date/Date32                                             | DATEV2             
      | JDBC CATLOG uses Datev2 type default when connecting DORIS              
                                                             |
+| DateTime/DateTime64                                     | DATETIMEV2         
      | JDBC CATLOG uses DateTimev2 type default when connecting DORIS          
                                                             |
+| Float32                                                 | FLOAT              
      |                                                                         
                                                             |
+| Float64                                                 | DOUBLE             
      |                                                                         
                                                             |
+| Int8                                                    | TINYINT            
      |                                                                         
                                                             |
+| Int16/UInt8                                             | SMALLINT           
      | Doris does not support UNSIGNED data types so UInt8 will be mapped to 
SMALLINT.                                                      |
+| Int32/UInt16                                            | INT                
      | Doris does not support UNSIGNED data types so UInt16 will be mapped to 
INT.                                                          |
+| Int64/Uint32                                            | BIGINT             
      | Doris does not support UNSIGNED data types so UInt32 will be mapped to 
BIGINT.                                                       |
+| Int128/UInt64                                           | LARGEINT           
      | Doris does not support UNSIGNED data types so UInt64 will be mapped to 
LARGEINT.                                                     |
+| Int256/UInt128/UInt256                                  | STRING             
      | Doris does not support data types of such orders of magnitude so these 
will be mapped to STRING.                                     |
+| DECIMAL                                                 | 
DECIMAL/DECIMALV3/STRING | The Data type is based on the DECIMAL field's 
(precision, scale) and the `enable_decimal_conversion` configuration.           
       |
+| Enum/IPv4/IPv6/UUID                                     | STRING             
      | Data of IPv4 and IPv6 type will be displayed with an extra `/` as a 
prefix. To remove the `/`, you can use the `split_part`function. |
+| <version since="dev" type="inline"> Array(T) </version> | ARRAY\<T\>         
      | Array internal basic type adaptation logic refers to the preceding 
types. Nested types are not supported                             |
+| Other                                                   | UNSUPPORTED        
      |                                                                         
                                                             |
 
 ### Doris
 
@@ -370,7 +371,7 @@ The transaction mechanism ensures the atomicity of data 
writing to JDBC External
 | TEXT | STRING | |
 |Other| UNSUPPORTED |
 
-### SAP_HANA
+### SAP HANA
 
 | SAP_HANA     | Doris                    | Comment                            
                                                                                
|
 
|--------------|--------------------------|--------------------------------------------------------------------------------------------------------------------|
diff --git a/docs/zh-CN/docs/lakehouse/external-table/jdbc.md 
b/docs/zh-CN/docs/lakehouse/external-table/jdbc.md
index 0773d26ea7..4b04a1abec 100644
--- a/docs/zh-CN/docs/lakehouse/external-table/jdbc.md
+++ b/docs/zh-CN/docs/lakehouse/external-table/jdbc.md
@@ -169,9 +169,10 @@ PROPERTIES (
 目前只测试了这一个版本其他版本测试后补充
 
 #### 5.ClickHouse测试
-| ClickHouse版本 | ClickHouse JDBC驱动版本 |
-|----------| ------------------- |
-| 22       | clickhouse-jdbc-0.3.2-patch11-all.jar |
+| ClickHouse版本 | ClickHouse JDBC驱动版本                   |
+|--------------|---------------------------------------|
+| 22           | clickhouse-jdbc-0.3.2-patch11-all.jar |
+| 22           | clickhouse-jdbc-0.4.1-all.jar         |
 
 #### 6.Sap_Hana测试
 
@@ -269,28 +270,31 @@ PROPERTIES (
 
 ### ClickHouse
 
-| ClickHouse |  Doris   |
-|:----------:|:--------:|
-|  BOOLEAN   | BOOLEAN  |
-|    CHAR    |   CHAR   |
-|  VARCHAR   | VARCHAR  |
-|   STRING   |  STRING  |
-|    DATE    |   DATE   |
-|  Float32   |  FLOAT   |
-|  Float64   |  DOUBLE  |
-|    Int8    | TINYINT  |
-|   Int16    | SMALLINT |
-|   Int32    |   INT    |
-|   Int64    |  BIGINT  |
-|   Int128   | LARGEINT |
-|  DATETIME  | DATETIME |
-|  DECIMAL   | DECIMAL  |
+|                        ClickHouse                        |          Doris    
       |
+|:--------------------------------------------------------:|:------------------------:|
+|                         Boolean                          |         BOOLEAN   
       |
+|                          String                          |          STRING   
       |
+|                       Date/Date32                        |       DATE/DATEV2 
       |
+|                   DateTime/DateTime64                    |   
DATETIME/DATETIMEV2    |
+|                         Float32                          |          FLOAT    
       |
+|                         Float64                          |          DOUBLE   
       |
+|                           Int8                           |         TINYINT   
       |
+|                       Int16/UInt8                        |         SMALLINT  
       |
+|                       Int32/UInt16                       |           INT     
       |
+|                       Int64/Uint32                       |          BIGINT   
       |
+|                      Int128/UInt64                       |         LARGEINT  
       |
+|                  Int256/UInt128/UInt256                  |          STRING   
       |
+|                         Decimal                          | 
DECIMAL/DECIMALV3/STRING |
+|                   Enum/IPv4/IPv6/UUID                    |          STRING   
       |
+| <version since="dev" type="inline"> Array(T)  </version> |        ARRAY\<T\> 
       |
 
 **注意:**
+
+- <version since="dev" type="inline"> 
对于ClickHouse里的Array类型,可用Doris的Array类型来匹配,Array内的基础类型匹配参考基础类型匹配规则即可,不支持嵌套Array 
</version>
 - 
对于ClickHouse里的一些特殊类型,如UUID,IPv4,IPv6,Enum8可以用Doris的Varchar/String类型来匹配,但是在显示上IPv4,IPv6会额外在数据最前面显示一个`/`,需要自己用`split_part`函数处理
 - 对于ClickHouse的Geo类型Point,无法进行匹配
 
-### SAP_HANA
+### SAP HANA
 
 |   SAP_HANA   |        Doris        |
 |:------------:|:-------------------:|
diff --git a/docs/zh-CN/docs/lakehouse/multi-catalog/jdbc.md 
b/docs/zh-CN/docs/lakehouse/multi-catalog/jdbc.md
index 0494443355..a0d0b7d437 100644
--- a/docs/zh-CN/docs/lakehouse/multi-catalog/jdbc.md
+++ b/docs/zh-CN/docs/lakehouse/multi-catalog/jdbc.md
@@ -328,23 +328,24 @@ set enable_odbc_transcation = true;
 
 ### Clickhouse
 
-| ClickHouse Type        | Doris Type | Comment                                
             |
-|------------------------|------------|-----------------------------------------------------|
-| Bool                   | BOOLEAN    |                                        
             |
-| String                 | STRING     |                                        
             |
-| Date/Date32            | DATE       |                                        
             |
-| DateTime/DateTime64    | DATETIME   | 对于超过了Doris最大的DateTime精度的数据,将截断处理       
             |
-| Float32                | FLOAT      |                                        
             |
-| Float64                | DOUBLE     |                                        
             |
-| Int8                   | TINYINT    |                                        
             |
-| Int16/UInt8            | SMALLINT   | Doris没有UNSIGNED数据类型,所以扩大一个数量级          
             |
-| Int32/UInt16           | INT        | Doris没有UNSIGNED数据类型,所以扩大一个数量级          
             |
-| Int64/Uint32           | BIGINT     | Doris没有UNSIGNED数据类型,所以扩大一个数量级          
             |
-| Int128/UInt64          | LARGEINT   | Doris没有UNSIGNED数据类型,所以扩大一个数量级          
             |
-| Int256/UInt128/UInt256 | STRING     | Doris没有这个数量级的数据类型,采用STRING处理           
             |
-| DECIMAL                | DECIMAL    | 对于超过了Doris最大的Decimal精度的数据,将映射为STRING   
             |
-| Enum/IPv4/IPv6/UUID    | STRING     | 
在显示上IPv4,IPv6会额外在数据最前面显示一个`/`,需要自己用`split_part`函数处理 |
-|Other| UNSUPPORTED |
+| ClickHouse Type                                         | Doris Type         
      | Comment                                           |
+|---------------------------------------------------------|--------------------------|---------------------------------------------------|
+| Bool                                                    | BOOLEAN            
      |                                                   |
+| String                                                  | STRING             
      |                                                   |
+| Date/Date32                                             | DATEV2             
      | Jdbc Catlog连接Doris时默认使用DATEV2类型                   |
+| DateTime/DateTime64                                     | DATETIMEV2         
      | Jdbc Catlog连接Doris时默认使用DATETIMEV2类型               |
+| Float32                                                 | FLOAT              
      |                                                   |
+| Float64                                                 | DOUBLE             
      |                                                   |
+| Int8                                                    | TINYINT            
      |                                                   |
+| Int16/UInt8                                             | SMALLINT           
      | Doris没有UNSIGNED数据类型,所以扩大一个数量级                     |
+| Int32/UInt16                                            | INT                
      | Doris没有UNSIGNED数据类型,所以扩大一个数量级                     |
+| Int64/Uint32                                            | BIGINT             
      | Doris没有UNSIGNED数据类型,所以扩大一个数量级                     |
+| Int128/UInt64                                           | LARGEINT           
      | Doris没有UNSIGNED数据类型,所以扩大一个数量级                     |
+| Int256/UInt128/UInt256                                  | STRING             
      | Doris没有这个数量级的数据类型,采用STRING处理                      |
+| DECIMAL                                                 | 
DECIMAL/DECIMALV3/STRING | 将根据Doris DECIMAL字段的(precision, 
scale)和`enable_decimal_conversion`开关选择用何种类型|
+| Enum/IPv4/IPv6/UUID                                     | STRING             
      | 在显示上IPv4,IPv6会额外在数据最前面显示一个`/`,需要自己用`split_part`函数处理 |
+| <version since="dev" type="inline"> Array(T) </version> | ARRAY\<T\>         
      | Array内部类型适配逻辑参考上述类型,不支持嵌套类型        |
+| Other                                                   | UNSUPPORTED        
      |                                                   |
 
 ### Doris
 
@@ -369,7 +370,7 @@ set enable_odbc_transcation = true;
 | TEXT | STRING | |
 |Other| UNSUPPORTED |
 
-### SAP_HANA
+### SAP HANA
 
 | SAP_HANA     | Doris                    | Comment                            
                                                   |
 
|--------------|--------------------------|---------------------------------------------------------------------------------------|
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/external/jdbc/JdbcClient.java 
b/fe/fe-core/src/main/java/org/apache/doris/external/jdbc/JdbcClient.java
index 1310a89f03..32ef14f9ce 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/external/jdbc/JdbcClient.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/external/jdbc/JdbcClient.java
@@ -17,6 +17,7 @@
 
 package org.apache.doris.external.jdbc;
 
+import org.apache.doris.catalog.ArrayType;
 import org.apache.doris.catalog.Column;
 import org.apache.doris.catalog.JdbcResource;
 import org.apache.doris.catalog.PrimitiveType;
@@ -578,6 +579,11 @@ public class JdbcClient {
             return ScalarType.createStringType();
         } else if (ckType.startsWith("DateTime")) {
             return ScalarType.createDatetimeV2Type(0);
+        } else if (ckType.startsWith("Array")) {
+            String cktype = ckType.substring(6, ckType.length() - 1);
+            fieldSchema.setDataTypeName(cktype);
+            Type type = clickhouseTypeToDoris(fieldSchema);
+            return ArrayType.create(type, true);
         }
         switch (ckType) {
             case "Bool":
diff --git a/fe/java-udf/src/main/java/org/apache/doris/udf/JdbcExecutor.java 
b/fe/java-udf/src/main/java/org/apache/doris/udf/JdbcExecutor.java
index df30b8e1b5..9e9ea2cb8a 100644
--- a/fe/java-udf/src/main/java/org/apache/doris/udf/JdbcExecutor.java
+++ b/fe/java-udf/src/main/java/org/apache/doris/udf/JdbcExecutor.java
@@ -32,6 +32,8 @@ import java.lang.reflect.Array;
 import java.math.BigDecimal;
 import java.math.BigInteger;
 import java.math.RoundingMode;
+import java.net.Inet4Address;
+import java.net.Inet6Address;
 import java.net.MalformedURLException;
 import java.nio.charset.StandardCharsets;
 import java.sql.Connection;
@@ -45,7 +47,11 @@ import java.time.LocalDate;
 import java.time.LocalDateTime;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
+import java.util.UUID;
+import java.util.function.Function;
 
 public class JdbcExecutor {
     private static final Logger LOG = Logger.getLogger(JdbcExecutor.class);
@@ -197,9 +203,37 @@ public class JdbcExecutor {
         Object[] columnData = (Object[]) obj;
         if (columnData[idx] instanceof String) {
             return (String) columnData[idx];
-        } else {
+        } else if (columnData[idx] instanceof java.sql.Array) {
             return (java.sql.Array) columnData[idx];
-        }
+        } else {
+            //For the ClickHouse array type, we need the concatenated string 
after toString
+            return convertClickHouseArray(columnData[idx]);
+        }
+    }
+
+    private static final Map<Class<?>, Function<Object, String>> 
CK_ARRAY_CONVERTERS = new HashMap<>();
+
+    static {
+        CK_ARRAY_CONVERTERS.put(String[].class, res -> 
Arrays.toString((String[]) res));
+        CK_ARRAY_CONVERTERS.put(byte[].class, res -> Arrays.toString((byte[]) 
res));
+        CK_ARRAY_CONVERTERS.put(Byte[].class, res -> Arrays.toString((Byte[]) 
res));
+        CK_ARRAY_CONVERTERS.put(LocalDate[].class, res -> 
Arrays.toString((LocalDate[]) res));
+        CK_ARRAY_CONVERTERS.put(LocalDateTime[].class, res -> 
Arrays.toString((LocalDateTime[]) res));
+        CK_ARRAY_CONVERTERS.put(float[].class, res -> 
Arrays.toString((float[]) res));
+        CK_ARRAY_CONVERTERS.put(double[].class, res -> 
Arrays.toString((double[]) res));
+        CK_ARRAY_CONVERTERS.put(short[].class, res -> 
Arrays.toString((short[]) res));
+        CK_ARRAY_CONVERTERS.put(int[].class, res -> Arrays.toString((int[]) 
res));
+        CK_ARRAY_CONVERTERS.put(long[].class, res -> Arrays.toString((long[]) 
res));
+        CK_ARRAY_CONVERTERS.put(BigInteger[].class, res -> 
Arrays.toString((BigInteger[]) res));
+        CK_ARRAY_CONVERTERS.put(BigDecimal[].class, res -> 
Arrays.toString((BigDecimal[]) res));
+        CK_ARRAY_CONVERTERS.put(Inet4Address[].class, res -> 
Arrays.toString((Inet4Address[]) res));
+        CK_ARRAY_CONVERTERS.put(Inet6Address[].class, res -> 
Arrays.toString((Inet6Address[]) res));
+        CK_ARRAY_CONVERTERS.put(UUID[].class, res -> Arrays.toString((UUID[]) 
res));
+    }
+
+    public static Object convertClickHouseArray(Object obj) {
+        Function<Object, String> converter = 
CK_ARRAY_CONVERTERS.get(obj.getClass());
+        return converter != null ? converter.apply(obj) : obj;
     }
 
     private void init(String driverUrl, String sql, int batchSize, String 
driverClass, String jdbcUrl, String jdbcUser,


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to