This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new 53ee740b88b [enhance](hive) Add regression-test cases for hive text ddl and hive text insert and fix reading null string bug #42200 (#42272) 53ee740b88b is described below commit 53ee740b88b374bb98bd3ff23b87d22e2606225f Author: Rayner Chen <morning...@163.com> AuthorDate: Tue Oct 22 23:42:49 2024 +0800 [enhance](hive) Add regression-test cases for hive text ddl and hive text insert and fix reading null string bug #42200 (#42272) cherry pick from #42200 Co-authored-by: Socrates <suxiaogang...@icloud.com> --- be/src/vec/exec/format/csv/csv_reader.cpp | 2 +- .../scripts/create_preinstalled_scripts/run63.hql | 18 ++- .../hive/ddl/test_hive_ddl_text_format.out | 57 +++++++ .../hive/ddl/test_hive_ddl_text_format.groovy | 177 +++++++++++++++------ 4 files changed, 200 insertions(+), 54 deletions(-) diff --git a/be/src/vec/exec/format/csv/csv_reader.cpp b/be/src/vec/exec/format/csv/csv_reader.cpp index 0583b74d735..bf0e543d650 100644 --- a/be/src/vec/exec/format/csv/csv_reader.cpp +++ b/be/src/vec/exec/format/csv/csv_reader.cpp @@ -622,7 +622,7 @@ template <bool from_json> Status CsvReader::deserialize_nullable_string(IColumn& column, Slice& slice) { auto& null_column = assert_cast<ColumnNullable&>(column); if (!(from_json && _options.converted_from_string && slice.trim_double_quotes())) { - if (slice.size == 2 && slice[0] == '\\' && slice[1] == 'N') { + if (slice.compare(Slice(_options.null_format, _options.null_len)) == 0) { null_column.insert_data(nullptr, 0); return Status::OK(); } diff --git a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run63.hql b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run63.hql index aebd7522959..c287595278f 100755 --- a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run63.hql +++ b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run63.hql @@ -560,7 +560,14 @@ CREATE TABLE `all_types_text`( `t_array_string_all_nulls` array<string>, `dt` int) stored as textfile -TBLPROPERTIES("line.delim"="\n", "field.delim"="\1"); +TBLPROPERTIES( + 'field.delim'='\t', + 'line.delim'='\n', + 'collection.delim'=',', + 'mapkey.delim'=':', + 'escape.delim'='|', + 'serialization.null.format'='null' +); CREATE TABLE all_types_par_text( `boolean_col` boolean, @@ -628,4 +635,11 @@ CREATE TABLE all_types_par_text( PARTITIONED BY ( `dt` int) stored as textfile -TBLPROPERTIES("line.delim"="\n", "field.delim"="\1"); +TBLPROPERTIES( + 'field.delim'='\t', + 'line.delim'='\n', + 'collection.delim'=',', + 'mapkey.delim'=':', + 'escape.delim'='|', + 'serialization.null.format'='null' +); diff --git a/regression-test/data/external_table_p0/hive/ddl/test_hive_ddl_text_format.out b/regression-test/data/external_table_p0/hive/ddl/test_hive_ddl_text_format.out new file mode 100644 index 00000000000..faf343ce09b --- /dev/null +++ b/regression-test/data/external_table_p0/hive/ddl/test_hive_ddl_text_format.out @@ -0,0 +1,57 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !default_properties -- +1 Alice ["tag1", "tag2"] {"key1":"value1", "key2":"value2"} +2 Bob ["tagA", "tagB"] {"keyA":"valueA", "keyB":"valueB"} +3 Charlie \N {"keyC":"valueC", "keyD":"valueD"} + +-- !hive_docker_default_properties -- +1 Alice ["tag1","tag2"] {"key1":"value1","key2":"value2"} +2 Bob ["tagA","tagB"] {"keyA":"valueA","keyB":"valueB"} +3 Charlie \N {"keyC":"valueC","keyD":"valueD"} + +-- !standard_properties -- +1 Alice ["tag1", "tag2"] {"key1":"value1", "key2":"value2"} +2 Bob ["tagA", "tagB"] {"keyA":"valueA", "keyB":"valueB"} +3 Charlie \N {"keyC":"valueC", "keyD":"valueD"} + +-- !hive_docker_standard_properties -- +1 Alice ["tag1","tag2"] {"key1":"value1","key2":"value2"} +2 Bob ["tagA","tagB"] {"keyA":"valueA","keyB":"valueB"} +3 Charlie \N {"keyC":"valueC","keyD":"valueD"} + +-- !different_properties -- +1 Alice ["tag1", "tag2"] {"key1":"value1", "key2":"value2"} +2 Bob ["tagA", "tagB"] {"keyA":"valueA", "keyB":"valueB"} +3 Charlie \N {"keyC":"valueC", "keyD":"valueD"} + +-- !hive_docker_different_properties -- +1 Alice ["tag1,tag2"] {"key1":"value1,key2:value2\\u00042"} + +-- !default_properties -- +1 Alice ["tag1", "tag2"] {"key1":"value1", "key2":"value2"} +2 Bob ["tagA", "tagB"] {"keyA":"valueA", "keyB":"valueB"} +3 Charlie \N {"keyC":"valueC", "keyD":"valueD"} + +-- !hive_docker_default_properties -- +1 Alice ["tag1","tag2"] {"key1":"value1","key2":"value2"} +2 Bob ["tagA","tagB"] {"keyA":"valueA","keyB":"valueB"} +3 Charlie \N {"keyC":"valueC","keyD":"valueD"} + +-- !standard_properties -- +1 Alice ["tag1", "tag2"] {"key1":"value1", "key2":"value2"} +2 Bob ["tagA", "tagB"] {"keyA":"valueA", "keyB":"valueB"} +3 Charlie \N {"keyC":"valueC", "keyD":"valueD"} + +-- !hive_docker_standard_properties -- +1 Alice ["tag1","tag2"] {"key1":"value1","key2":"value2"} +2 Bob ["tagA","tagB"] {"keyA":"valueA","keyB":"valueB"} +3 Charlie \N {"keyC":"valueC","keyD":"valueD"} + +-- !different_properties -- +1 Alice ["tag1", "tag2"] {"key1":"value1", "key2":"value2"} +2 Bob ["tagA", "tagB"] {"keyA":"valueA", "keyB":"valueB"} +3 Charlie \N {"keyC":"valueC", "keyD":"valueD"} + +-- !hive_docker_different_properties -- +1 Alice ["tag1","tag2"] {"key1":"value1","key2":"value2\\u00042"} + diff --git a/regression-test/suites/external_table_p0/hive/ddl/test_hive_ddl_text_format.groovy b/regression-test/suites/external_table_p0/hive/ddl/test_hive_ddl_text_format.groovy index aaa5b198e69..730db1247cd 100644 --- a/regression-test/suites/external_table_p0/hive/ddl/test_hive_ddl_text_format.groovy +++ b/regression-test/suites/external_table_p0/hive/ddl/test_hive_ddl_text_format.groovy @@ -17,62 +17,137 @@ suite("test_hive_ddl_text_format", "p0,external,hive,external_docker,external_docker_hive") { String enabled = context.config.otherConfigs.get("enableHiveTest") - if (enabled != null && enabled.equalsIgnoreCase("true")) { - String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") - String hms_port = context.config.otherConfigs.get("hive3HmsPort") - String hdfs_port = context.config.otherConfigs.get("hive3HdfsPort") - String catalog_name = "test_hive_ddl_text_format" - String table_name = "table_with_pars"; + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + logger.info("diable Hive test.") + return; + } + + for (String hivePrefix : ["hive2", "hive3"]) { + setHivePrefix(hivePrefix) + try{ + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + String hms_port = context.config.otherConfigs.get(hivePrefix + "HmsPort") + String hdfs_port = context.config.otherConfigs.get(hivePrefix + "HdfsPort") + String catalog_name = "test_hive_ddl_text_format" + String table_name = "table_with_pars"; - sql """drop catalog if exists ${catalog_name};""" + sql """drop catalog if exists ${catalog_name};""" - sql """ - create catalog if not exists ${catalog_name} properties ( - 'type'='hms', - 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}', - 'fs.defaultFS' = 'hdfs://${externalEnvIp}:${hdfs_port}', - 'use_meta_cache' = 'true' + sql """ + create catalog if not exists ${catalog_name} properties ( + 'type'='hms', + 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}', + 'fs.defaultFS' = 'hdfs://${externalEnvIp}:${hdfs_port}', + 'use_meta_cache' = 'true' + ); + """ + logger.info("catalog " + catalog_name + " created") + sql """switch ${catalog_name};""" + logger.info("switched to catalog " + catalog_name) + sql """use `default`;""" + + sql """ drop table if exists text_table_default_properties """ + sql """ + create table text_table_default_properties ( + id int, + `name` string, + tags array<string>, + attributes map<string, string> + ) PROPERTIES ( + 'file_format'='text' ); - """ - logger.info("catalog " + catalog_name + " created") - sql """switch ${catalog_name};""" - logger.info("switched to catalog " + catalog_name) - sql """use `default`;""" + """ + sql """ + INSERT INTO text_table_default_properties VALUES + (1, 'Alice', array('tag1', 'tag2'), map('key1', 'value1', 'key2', 'value2')), + (2, 'Bob', array('tagA', 'tagB'), map('keyA', 'valueA', 'keyB', 'valueB')), + (3, 'Charlie', NULL, map('keyC', 'valueC', 'keyD', 'valueD')); + """ + order_qt_default_properties """ select * from text_table_default_properties """ + + order_qt_hive_docker_default_properties""" select * from text_table_default_properties """ - sql """ drop table if exists tb_text """ - sql """ - create table tb_text ( - id int, - `name` string - ) PROPERTIES ( - 'compression'='gzip', - 'file_format'='text', - 'field.delim'='\t', - 'line.delim'='\n', - 'collection.delim'=';', - 'mapkey.delim'=':', - 'serialization.null.format'='\\N' - ); - """ + sql """ drop table if exists text_table_standard_properties """ + // Escape characters need to be considered in groovy scripts + sql """ + create table text_table_standard_properties ( + id int, + `name` string, + tags array<string>, + attributes map<string, string> + ) PROPERTIES ( + 'compression'='plain', + 'file_format'='text', + 'field.delim'='\\1', + 'line.delim'='\\n', + 'collection.delim'='\\2', + 'mapkey.delim'='\\3', + 'escape.delim'= '\\\\', + 'serialization.null.format'='\\\\N' + ); + """ + sql """ + INSERT INTO text_table_standard_properties VALUES + (1, 'Alice', array('tag1', 'tag2'), map('key1', 'value1', 'key2', 'value2')), + (2, 'Bob', array('tagA', 'tagB'), map('keyA', 'valueA', 'keyB', 'valueB')), + (3, 'Charlie', NULL, map('keyC', 'valueC', 'keyD', 'valueD')); + """ + order_qt_standard_properties """ select * from text_table_standard_properties """ + order_qt_hive_docker_standard_properties """ select * from text_table_standard_properties order by id; """ + + sql """ drop table if exists text_table_different_properties """ + sql """ + create table text_table_different_properties ( + id int, + `name` string, + tags array<string>, + attributes map<string, string> + ) PROPERTIES ( + 'compression'='gzip', + 'file_format'='text', + 'field.delim'='A', + 'line.delim'='\\4', + 'collection.delim'=',', + 'mapkey.delim'=':', + 'escape.delim'='|', + 'serialization.null.format'='null' + ); + """ + sql """ + INSERT INTO text_table_different_properties VALUES + (1, 'Alice', array('tag1', 'tag2'), map('key1', 'value1', 'key2', 'value2')), + (2, 'Bob', array('tagA', 'tagB'), map('keyA', 'valueA', 'keyB', 'valueB')), + (3, 'Charlie', NULL, map('keyC', 'valueC', 'keyD', 'valueD')); + """ + order_qt_different_properties """ select * from text_table_different_properties """ + order_qt_hive_docker_different_properties """ select * from text_table_different_properties order by id; """ - String serde = "'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'" - String input_format = "'org.apache.hadoop.mapred.TextInputFormat'" - String output_format = "'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'" - String doris_fileformat = "'doris.file_format'='text'" - String filed_delim = "'field.delim'" - String line_delim = "'line.delim'" - String mapkey_delim = "'mapkey.delim'" + String serde = "'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'" + String input_format = "'org.apache.hadoop.mapred.TextInputFormat'" + String output_format = "'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'" + String doris_fileformat = "'doris.file_format'='text'" + String filed_delim = "'field.delim'" + String line_delim = "'line.delim'" + String mapkey_delim = "'mapkey.delim'" + String collection_delim = "'collection.delim'" + String escape_delim = "'escape.delim'" + String serialization_null_format = "'serialization.null.format'" - def create_tbl_res = sql """ show create table tb_text """ - String res = create_tbl_res.toString() - logger.info("${res}") - assertTrue(res.containsIgnoreCase("${serde}")) - assertTrue(res.containsIgnoreCase("${input_format}")) - assertTrue(res.containsIgnoreCase("${output_format}")) - assertTrue(res.containsIgnoreCase("${doris_fileformat}")) - assertTrue(res.containsIgnoreCase("${filed_delim}")) - assertTrue(res.containsIgnoreCase("${filed_delim}")) - assertTrue(res.containsIgnoreCase("${line_delim}")) - assertTrue(res.containsIgnoreCase("${mapkey_delim}")) + def create_tbl_res = sql """ show create table text_table_standard_properties """ + String res = create_tbl_res.toString() + logger.info("${res}") + assertTrue(res.containsIgnoreCase("${serde}")) + assertTrue(res.containsIgnoreCase("${input_format}")) + assertTrue(res.containsIgnoreCase("${output_format}")) + assertTrue(res.containsIgnoreCase("${doris_fileformat}")) + assertTrue(res.containsIgnoreCase("${filed_delim}")) + assertTrue(res.containsIgnoreCase("${filed_delim}")) + assertTrue(res.containsIgnoreCase("${line_delim}")) + assertTrue(res.containsIgnoreCase("${mapkey_delim}")) + assertTrue(res.containsIgnoreCase("${collection_delim}")) + assertTrue(res.containsIgnoreCase("${escape_delim}")) + assertTrue(res.containsIgnoreCase("${serialization_null_format}")) + } finally { + } } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org