This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch branch-1.2-lts in repository https://gitbox.apache.org/repos/asf/doris.git
commit 8689265ea7f82db17a719286dde156f2a33ec617 Author: zhangstar333 <87313068+zhangstar...@users.noreply.github.com> AuthorDate: Thu Dec 8 10:27:47 2022 +0800 [Vectorized](jdbc) add check type for jdbc table (#14501) --- be/src/vec/exec/vjdbc_connector.cpp | 131 +++++++++++++++++++++ be/src/vec/exec/vjdbc_connector.h | 4 + .../docs/ecosystem/external-table/jdbc-of-doris.md | 31 +++-- .../docs/ecosystem/external-table/jdbc-of-doris.md | 30 ++--- .../java/org/apache/doris/udf/JdbcExecutor.java | 14 +++ .../suites/jdbc_p0/test_jdbc_query_pg.groovy | 4 +- 6 files changed, 181 insertions(+), 33 deletions(-) diff --git a/be/src/vec/exec/vjdbc_connector.cpp b/be/src/vec/exec/vjdbc_connector.cpp index f3a7688d38..ce171ad569 100644 --- a/be/src/vec/exec/vjdbc_connector.cpp +++ b/be/src/vec/exec/vjdbc_connector.cpp @@ -22,6 +22,7 @@ #include "gen_cpp/Types_types.h" #include "gutil/strings/substitute.h" #include "jni.h" +#include "runtime/define_primitive_type.h" #include "runtime/user_function_cache.h" #include "util/jni-util.h" #include "vec/columns/column_nullable.h" @@ -34,6 +35,7 @@ const char* JDBC_EXECUTOR_CTOR_SIGNATURE = "([B)V"; const char* JDBC_EXECUTOR_WRITE_SIGNATURE = "(Ljava/lang/String;)I"; const char* JDBC_EXECUTOR_HAS_NEXT_SIGNATURE = "()Z"; const char* JDBC_EXECUTOR_GET_BLOCK_SIGNATURE = "(I)Ljava/util/List;"; +const char* JDBC_EXECUTOR_GET_TYPES_SIGNATURE = "()Ljava/util/List;"; const char* JDBC_EXECUTOR_CLOSE_SIGNATURE = "()V"; const char* JDBC_EXECUTOR_CONVERT_DATE_SIGNATURE = "(Ljava/lang/Object;)J"; const char* JDBC_EXECUTOR_CONVERT_DATETIME_SIGNATURE = "(Ljava/lang/Object;)J"; @@ -172,6 +174,125 @@ Status JdbcConnector::query() { if (colunm_count != materialize_num) { return Status::InternalError("input and output column num not equal of jdbc query."); } + LOG(INFO) << "JdbcConnector::query has exec success: " << _sql_str; + RETURN_IF_ERROR(_check_column_type()); + return Status::OK(); +} + +Status JdbcConnector::_check_column_type() { + JNIEnv* env = nullptr; + RETURN_IF_ERROR(JniUtil::GetJNIEnv(&env)); + jobject type_lists = + env->CallNonvirtualObjectMethod(_executor_obj, _executor_clazz, _executor_get_types_id); + auto column_size = _tuple_desc->slots().size(); + for (int column_index = 0, materialized_column_index = 0; column_index < column_size; + ++column_index) { + auto slot_desc = _tuple_desc->slots()[column_index]; + if (!slot_desc->is_materialized()) { + continue; + } + jobject column_type = + env->CallObjectMethod(type_lists, _executor_get_list_id, materialized_column_index); + + const std::string& type_str = _jobject_to_string(env, column_type); + RETURN_IF_ERROR(_check_type(slot_desc, type_str)); + env->DeleteLocalRef(column_type); + materialized_column_index++; + } + env->DeleteLocalRef(type_lists); + return JniUtil::GetJniExceptionMsg(env); +} +/* type mapping: https://doris.apache.org/zh-CN/docs/dev/ecosystem/external-table/jdbc-of-doris?_highlight=jdbc + +Doris MYSQL PostgreSQL Oracle SQLServer + +BOOLEAN java.lang.Boolean java.lang.Boolean java.lang.Boolean +TINYINT java.lang.Integer java.lang.Short +SMALLINT java.lang.Integer java.lang.Integer java.math.BigDecimal java.lang.Short +INT java.lang.Integer java.lang.Integer java.math.BigDecimal java.lang.Integer +BIGINT java.lang.Long java.lang.Long java.lang.Long +LARGET java.math.BigInteger +DECIMAL java.math.BigDecimal java.math.BigDecimal java.math.BigDecimal java.math.BigDecimal +VARCHAR java.lang.String java.lang.String java.lang.String java.lang.String +DOUBLE java.lang.Double java.lang.Double java.lang.Double java.lang.Double +FLOAT java.lang.Float java.lang.Float java.lang.Float +DATE java.sql.Date java.sql.Date java.sql.Date +DATETIME java.sql.Timestamp java.sql.Timestamp java.sql.Timestamp java.sql.Timestamp + +NOTE: because oracle always use number(p,s) to create all numerical type, so it's java type maybe java.math.BigDecimal +*/ + +Status JdbcConnector::_check_type(SlotDescriptor* slot_desc, const std::string& type_str) { + const std::string error_msg = fmt::format( + "Fail to convert jdbc type of {} to doris type {} on column: {}. You need to " + "check this column type between external table and doris table.", + type_str, slot_desc->type().debug_string(), slot_desc->col_name()); + switch (slot_desc->type().type) { + case TYPE_BOOLEAN: { + if (type_str != "java.lang.Boolean" && type_str != "java.math.BigDecimal") { + return Status::InternalError(error_msg); + } + break; + } + case TYPE_TINYINT: + case TYPE_SMALLINT: + case TYPE_INT: { + if (type_str != "java.lang.Short" && type_str != "java.lang.Integer" && + type_str != "java.math.BigDecimal") { + return Status::InternalError(error_msg); + } + break; + } + case TYPE_BIGINT: + case TYPE_LARGEINT: { + if (type_str != "java.lang.Long" && type_str != "java.math.BigDecimal" && + type_str != "java.math.BigInteger") { + return Status::InternalError(error_msg); + } + break; + } + case TYPE_FLOAT: { + if (type_str != "java.lang.Float" && type_str != "java.math.BigDecimal") { + return Status::InternalError(error_msg); + } + break; + } + case TYPE_DOUBLE: { + if (type_str != "java.lang.Double" && type_str != "java.math.BigDecimal") { + return Status::InternalError(error_msg); + } + break; + } + case TYPE_CHAR: + case TYPE_VARCHAR: + case TYPE_STRING: { + //now here break directly + break; + } + case TYPE_DATE: + case TYPE_DATEV2: + case TYPE_TIMEV2: + case TYPE_DATETIME: + case TYPE_DATETIMEV2: { + if (type_str != "java.sql.Timestamp" && type_str != "java.time.LocalDateTime" && + type_str != "java.sql.Date") { + return Status::InternalError(error_msg); + } + break; + } + case TYPE_DECIMALV2: + case TYPE_DECIMAL32: + case TYPE_DECIMAL64: + case TYPE_DECIMAL128I: { + if (type_str != "java.math.BigDecimal") { + return Status::InternalError(error_msg); + } + break; + } + default: { + return Status::InternalError(error_msg); + } + } return Status::OK(); } @@ -262,6 +383,8 @@ Status JdbcConnector::_register_func_id(JNIEnv* env) { _executor_finish_trans_id)); RETURN_IF_ERROR(register_id(_executor_clazz, "rollbackTrans", JDBC_EXECUTOR_TRANSACTION_SIGNATURE, _executor_abort_trans_id)); + RETURN_IF_ERROR(register_id(_executor_clazz, "getResultColumnTypeNames", + JDBC_EXECUTOR_GET_TYPES_SIGNATURE, _executor_get_types_id)); return Status::OK(); } @@ -314,6 +437,14 @@ Status JdbcConnector::_convert_column_data(JNIEnv* env, jobject jobj, reinterpret_cast<vectorized::ColumnVector<vectorized::Int64>*>(col_ptr)->insert_value(num); break; } + case TYPE_LARGEINT: { + StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS; + std::string data = _jobject_to_string(env, jobj); + __int128 num = + StringParser::string_to_int<__int128>(data.data(), data.size(), &parse_result); + reinterpret_cast<vectorized::ColumnVector<vectorized::Int128>*>(col_ptr)->insert_value(num); + break; + } case TYPE_DECIMALV2: { std::string data = _jobject_to_string(env, jobj); DecimalV2Value decimal_slot; diff --git a/be/src/vec/exec/vjdbc_connector.h b/be/src/vec/exec/vjdbc_connector.h index f3575dfd52..e6da1a0151 100644 --- a/be/src/vec/exec/vjdbc_connector.h +++ b/be/src/vec/exec/vjdbc_connector.h @@ -19,6 +19,7 @@ #include <jni.h> +#include "common/status.h" #include "exec/table_connector.h" namespace doris { @@ -60,6 +61,8 @@ public: private: Status _register_func_id(JNIEnv* env); + Status _check_column_type(); + Status _check_type(SlotDescriptor*, const std::string& type_str); Status _convert_column_data(JNIEnv* env, jobject jobj, const SlotDescriptor* slot_desc, vectorized::IColumn* column_ptr); std::string _jobject_to_string(JNIEnv* env, jobject jobj); @@ -78,6 +81,7 @@ private: jmethodID _executor_read_id; jmethodID _executor_has_next_id; jmethodID _executor_get_blocks_id; + jmethodID _executor_get_types_id; jmethodID _executor_close_id; jmethodID _executor_get_list_id; jmethodID _executor_get_list_size_id; diff --git a/docs/en/docs/ecosystem/external-table/jdbc-of-doris.md b/docs/en/docs/ecosystem/external-table/jdbc-of-doris.md index 552cd7e848..8d19f7ffab 100644 --- a/docs/en/docs/ecosystem/external-table/jdbc-of-doris.md +++ b/docs/en/docs/ecosystem/external-table/jdbc-of-doris.md @@ -162,16 +162,16 @@ There are different data types among different databases. Here is a list of the | MySQL | Doris | | :------: | :------: | | BOOLEAN | BOOLEAN | -| CHAR | CHAR | -| VARCHAR | VARCHAR | -| DATE | DATE | -| FLOAT | FLOAT | | TINYINT | TINYINT | | SMALLINT | SMALLINT | | INT | INT | | BIGINT | BIGINT | -| DOUBLE | DOUBLE | +|BIGINT UNSIGNED|LARGEINT| +| VARCHAR | VARCHAR | +| DATE | DATE | +| FLOAT | FLOAT | | DATETIME | DATETIME | +| DOUBLE | DOUBLE | | DECIMAL | DECIMAL | @@ -180,43 +180,42 @@ There are different data types among different databases. Here is a list of the | PostgreSQL | Doris | | :--------------: | :------: | | BOOLEAN | BOOLEAN | -| CHAR | CHAR | -| VARCHAR | VARCHAR | -| DATE | DATE | -| REAL | FLOAT | | SMALLINT | SMALLINT | | INT | INT | | BIGINT | BIGINT | -| DOUBLE PRECISION | DOUBLE | +| VARCHAR | VARCHAR | +| DATE | DATE | | TIMESTAMP | DATETIME | +| REAL | FLOAT | +| FLOAT | DOUBLE | | DECIMAL | DECIMAL | ### Oracle | Oracle | Doris | | :------: | :------: | -| CHAR | CHAR | | VARCHAR | VARCHAR | | DATE | DATETIME | | SMALLINT | SMALLINT | | INT | INT | +| REAL | FLOAT | +| FLOAT | DOUBLE | | NUMBER | DECIMAL | - ### SQL server | SQLServer | Doris | | :-------: | :------: | | BIT | BOOLEAN | -| CHAR | CHAR | -| VARCHAR | VARCHAR | -| DATE | DATE | -| REAL | FLOAT | | TINYINT | TINYINT | | SMALLINT | SMALLINT | | INT | INT | | BIGINT | BIGINT | +| VARCHAR | VARCHAR | +| DATE | DATE | | DATETIME | DATETIME | +| REAL | FLOAT | +| FLOAT | DOUBLE | | DECIMAL | DECIMAL | ### ClickHouse diff --git a/docs/zh-CN/docs/ecosystem/external-table/jdbc-of-doris.md b/docs/zh-CN/docs/ecosystem/external-table/jdbc-of-doris.md index d333e8850f..4ef47e1d66 100644 --- a/docs/zh-CN/docs/ecosystem/external-table/jdbc-of-doris.md +++ b/docs/zh-CN/docs/ecosystem/external-table/jdbc-of-doris.md @@ -165,16 +165,16 @@ PROPERTIES ( | MySQL | Doris | | :------: | :------: | | BOOLEAN | BOOLEAN | -| CHAR | CHAR | -| VARCHAR | VARCHAR | -| DATE | DATE | -| FLOAT | FLOAT | | TINYINT | TINYINT | | SMALLINT | SMALLINT | | INT | INT | | BIGINT | BIGINT | -| DOUBLE | DOUBLE | +|BIGINT UNSIGNED|LARGEINT| +| VARCHAR | VARCHAR | +| DATE | DATE | +| FLOAT | FLOAT | | DATETIME | DATETIME | +| DOUBLE | DOUBLE | | DECIMAL | DECIMAL | @@ -183,26 +183,26 @@ PROPERTIES ( | PostgreSQL | Doris | | :--------------: | :------: | | BOOLEAN | BOOLEAN | -| CHAR | CHAR | -| VARCHAR | VARCHAR | -| DATE | DATE | -| REAL | FLOAT | | SMALLINT | SMALLINT | | INT | INT | | BIGINT | BIGINT | -| DOUBLE PRECISION | DOUBLE | +| VARCHAR | VARCHAR | +| DATE | DATE | | TIMESTAMP | DATETIME | +| REAL | FLOAT | +| FLOAT | DOUBLE | | DECIMAL | DECIMAL | ### Oracle | Oracle | Doris | | :------: | :------: | -| CHAR | CHAR | | VARCHAR | VARCHAR | | DATE | DATETIME | | SMALLINT | SMALLINT | | INT | INT | +| REAL | FLOAT | +| FLOAT | DOUBLE | | NUMBER | DECIMAL | @@ -211,15 +211,15 @@ PROPERTIES ( | SQLServer | Doris | | :-------: | :------: | | BIT | BOOLEAN | -| CHAR | CHAR | -| VARCHAR | VARCHAR | -| DATE | DATE | -| REAL | FLOAT | | TINYINT | TINYINT | | SMALLINT | SMALLINT | | INT | INT | | BIGINT | BIGINT | +| VARCHAR | VARCHAR | +| DATE | DATE | | DATETIME | DATETIME | +| REAL | FLOAT | +| FLOAT | DOUBLE | | DECIMAL | DECIMAL | ### ClickHouse diff --git a/fe/java-udf/src/main/java/org/apache/doris/udf/JdbcExecutor.java b/fe/java-udf/src/main/java/org/apache/doris/udf/JdbcExecutor.java index 0a267bd9a6..9539f65aba 100644 --- a/fe/java-udf/src/main/java/org/apache/doris/udf/JdbcExecutor.java +++ b/fe/java-udf/src/main/java/org/apache/doris/udf/JdbcExecutor.java @@ -53,6 +53,7 @@ public class JdbcExecutor { private ResultSetMetaData resultSetMetaData = null; // Use HikariDataSource to help us manage the JDBC connections. private HikariDataSource dataSource = null; + private List<String> resultColumnTypeNames = null; public JdbcExecutor(byte[] thriftParams) throws Exception { TJdbcExecutorCtorParams request = new TJdbcExecutorCtorParams(); @@ -110,6 +111,19 @@ public class JdbcExecutor { } } + public List<String> getResultColumnTypeNames() throws UdfRuntimeException { + try { + int count = resultSetMetaData.getColumnCount(); + resultColumnTypeNames = new ArrayList<>(count); + for (int i = 0; i < count; ++i) { + resultColumnTypeNames.add(resultSetMetaData.getColumnClassName(i + 1)); + } + return resultColumnTypeNames; + } catch (SQLException e) { + throw new UdfRuntimeException("JDBC executor getResultColumnTypeNames has error: ", e); + } + } + public void openTrans() throws UdfRuntimeException { try { if (conn != null) { diff --git a/regression-test/suites/jdbc_p0/test_jdbc_query_pg.groovy b/regression-test/suites/jdbc_p0/test_jdbc_query_pg.groovy index b84ee57072..a18156527c 100644 --- a/regression-test/suites/jdbc_p0/test_jdbc_query_pg.groovy +++ b/regression-test/suites/jdbc_p0/test_jdbc_query_pg.groovy @@ -55,7 +55,7 @@ suite("test_jdbc_query_pg", "p0") { k2 char(100), k3 varchar(128), k4 date, - k5 double, + k5 float, k6 smallint, k7 int, k8 bigint, @@ -209,7 +209,7 @@ suite("test_jdbc_query_pg", "p0") { `m_time` DATETIME NULL, `app_id` BIGINT(20) NULL, `t_id` BIGINT(20) NULL, - `deleted` TEXT NULL, + `deleted` boolean NULL, `w_t_s` DATETIME NULL, `rf_id` TEXT NULL, `e_info` TEXT NULL, --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org