This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 53ee740b88b [enhance](hive) Add regression-test cases for hive text 
ddl and hive text insert and fix reading null string bug #42200 (#42272)
53ee740b88b is described below

commit 53ee740b88b374bb98bd3ff23b87d22e2606225f
Author: Rayner Chen <morning...@163.com>
AuthorDate: Tue Oct 22 23:42:49 2024 +0800

    [enhance](hive) Add regression-test cases for hive text ddl and hive text 
insert and fix reading null string bug #42200 (#42272)
    
    cherry pick from #42200
    
    Co-authored-by: Socrates <suxiaogang...@icloud.com>
---
 be/src/vec/exec/format/csv/csv_reader.cpp          |   2 +-
 .../scripts/create_preinstalled_scripts/run63.hql  |  18 ++-
 .../hive/ddl/test_hive_ddl_text_format.out         |  57 +++++++
 .../hive/ddl/test_hive_ddl_text_format.groovy      | 177 +++++++++++++++------
 4 files changed, 200 insertions(+), 54 deletions(-)

diff --git a/be/src/vec/exec/format/csv/csv_reader.cpp 
b/be/src/vec/exec/format/csv/csv_reader.cpp
index 0583b74d735..bf0e543d650 100644
--- a/be/src/vec/exec/format/csv/csv_reader.cpp
+++ b/be/src/vec/exec/format/csv/csv_reader.cpp
@@ -622,7 +622,7 @@ template <bool from_json>
 Status CsvReader::deserialize_nullable_string(IColumn& column, Slice& slice) {
     auto& null_column = assert_cast<ColumnNullable&>(column);
     if (!(from_json && _options.converted_from_string && 
slice.trim_double_quotes())) {
-        if (slice.size == 2 && slice[0] == '\\' && slice[1] == 'N') {
+        if (slice.compare(Slice(_options.null_format, _options.null_len)) == 
0) {
             null_column.insert_data(nullptr, 0);
             return Status::OK();
         }
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run63.hql
 
b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run63.hql
index aebd7522959..c287595278f 100755
--- 
a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run63.hql
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run63.hql
@@ -560,7 +560,14 @@ CREATE TABLE `all_types_text`(
   `t_array_string_all_nulls` array<string>,
   `dt` int)
 stored as textfile
-TBLPROPERTIES("line.delim"="\n", "field.delim"="\1");
+TBLPROPERTIES(
+  'field.delim'='\t',
+  'line.delim'='\n',
+  'collection.delim'=',',
+  'mapkey.delim'=':',
+  'escape.delim'='|',
+  'serialization.null.format'='null'
+);
 
 CREATE TABLE all_types_par_text(
     `boolean_col` boolean,
@@ -628,4 +635,11 @@ CREATE TABLE all_types_par_text(
 PARTITIONED BY (
   `dt` int)
 stored as textfile
-TBLPROPERTIES("line.delim"="\n", "field.delim"="\1");
+TBLPROPERTIES(
+  'field.delim'='\t',
+  'line.delim'='\n',
+  'collection.delim'=',',
+  'mapkey.delim'=':',
+  'escape.delim'='|',
+  'serialization.null.format'='null'
+);
diff --git 
a/regression-test/data/external_table_p0/hive/ddl/test_hive_ddl_text_format.out 
b/regression-test/data/external_table_p0/hive/ddl/test_hive_ddl_text_format.out
new file mode 100644
index 00000000000..faf343ce09b
--- /dev/null
+++ 
b/regression-test/data/external_table_p0/hive/ddl/test_hive_ddl_text_format.out
@@ -0,0 +1,57 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !default_properties --
+1      Alice   ["tag1", "tag2"]        {"key1":"value1", "key2":"value2"}
+2      Bob     ["tagA", "tagB"]        {"keyA":"valueA", "keyB":"valueB"}
+3      Charlie \N      {"keyC":"valueC", "keyD":"valueD"}
+
+-- !hive_docker_default_properties --
+1      Alice   ["tag1","tag2"] {"key1":"value1","key2":"value2"}
+2      Bob     ["tagA","tagB"] {"keyA":"valueA","keyB":"valueB"}
+3      Charlie \N      {"keyC":"valueC","keyD":"valueD"}
+
+-- !standard_properties --
+1      Alice   ["tag1", "tag2"]        {"key1":"value1", "key2":"value2"}
+2      Bob     ["tagA", "tagB"]        {"keyA":"valueA", "keyB":"valueB"}
+3      Charlie \N      {"keyC":"valueC", "keyD":"valueD"}
+
+-- !hive_docker_standard_properties --
+1      Alice   ["tag1","tag2"] {"key1":"value1","key2":"value2"}
+2      Bob     ["tagA","tagB"] {"keyA":"valueA","keyB":"valueB"}
+3      Charlie \N      {"keyC":"valueC","keyD":"valueD"}
+
+-- !different_properties --
+1      Alice   ["tag1", "tag2"]        {"key1":"value1", "key2":"value2"}
+2      Bob     ["tagA", "tagB"]        {"keyA":"valueA", "keyB":"valueB"}
+3      Charlie \N      {"keyC":"valueC", "keyD":"valueD"}
+
+-- !hive_docker_different_properties --
+1      Alice   ["tag1,tag2"]   {"key1":"value1,key2:value2\\u00042"}
+
+-- !default_properties --
+1      Alice   ["tag1", "tag2"]        {"key1":"value1", "key2":"value2"}
+2      Bob     ["tagA", "tagB"]        {"keyA":"valueA", "keyB":"valueB"}
+3      Charlie \N      {"keyC":"valueC", "keyD":"valueD"}
+
+-- !hive_docker_default_properties --
+1      Alice   ["tag1","tag2"] {"key1":"value1","key2":"value2"}
+2      Bob     ["tagA","tagB"] {"keyA":"valueA","keyB":"valueB"}
+3      Charlie \N      {"keyC":"valueC","keyD":"valueD"}
+
+-- !standard_properties --
+1      Alice   ["tag1", "tag2"]        {"key1":"value1", "key2":"value2"}
+2      Bob     ["tagA", "tagB"]        {"keyA":"valueA", "keyB":"valueB"}
+3      Charlie \N      {"keyC":"valueC", "keyD":"valueD"}
+
+-- !hive_docker_standard_properties --
+1      Alice   ["tag1","tag2"] {"key1":"value1","key2":"value2"}
+2      Bob     ["tagA","tagB"] {"keyA":"valueA","keyB":"valueB"}
+3      Charlie \N      {"keyC":"valueC","keyD":"valueD"}
+
+-- !different_properties --
+1      Alice   ["tag1", "tag2"]        {"key1":"value1", "key2":"value2"}
+2      Bob     ["tagA", "tagB"]        {"keyA":"valueA", "keyB":"valueB"}
+3      Charlie \N      {"keyC":"valueC", "keyD":"valueD"}
+
+-- !hive_docker_different_properties --
+1      Alice   ["tag1","tag2"] {"key1":"value1","key2":"value2\\u00042"}
+
diff --git 
a/regression-test/suites/external_table_p0/hive/ddl/test_hive_ddl_text_format.groovy
 
b/regression-test/suites/external_table_p0/hive/ddl/test_hive_ddl_text_format.groovy
index aaa5b198e69..730db1247cd 100644
--- 
a/regression-test/suites/external_table_p0/hive/ddl/test_hive_ddl_text_format.groovy
+++ 
b/regression-test/suites/external_table_p0/hive/ddl/test_hive_ddl_text_format.groovy
@@ -17,62 +17,137 @@
 
 suite("test_hive_ddl_text_format", 
"p0,external,hive,external_docker,external_docker_hive") {
     String enabled = context.config.otherConfigs.get("enableHiveTest")
-    if (enabled != null && enabled.equalsIgnoreCase("true")) {
-        String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
-        String hms_port = context.config.otherConfigs.get("hive3HmsPort")
-        String hdfs_port = context.config.otherConfigs.get("hive3HdfsPort")
-        String catalog_name = "test_hive_ddl_text_format"
-        String table_name = "table_with_pars";
+    if (enabled == null || !enabled.equalsIgnoreCase("true")) {
+        logger.info("diable Hive test.")
+        return;
+    }
+
+    for (String hivePrefix : ["hive2", "hive3"]) {
+        setHivePrefix(hivePrefix)
+        try{
+            String externalEnvIp = 
context.config.otherConfigs.get("externalEnvIp")
+            String hms_port = context.config.otherConfigs.get(hivePrefix + 
"HmsPort")
+            String hdfs_port = context.config.otherConfigs.get(hivePrefix + 
"HdfsPort")
+            String catalog_name = "test_hive_ddl_text_format"
+            String table_name = "table_with_pars";
 
-        sql """drop catalog if exists ${catalog_name};"""
+            sql """drop catalog if exists ${catalog_name};"""
 
-        sql """
-            create catalog if not exists ${catalog_name} properties (
-                'type'='hms',
-                'hive.metastore.uris' = 
'thrift://${externalEnvIp}:${hms_port}',
-                'fs.defaultFS' = 'hdfs://${externalEnvIp}:${hdfs_port}',
-                'use_meta_cache' = 'true'
+            sql """
+                create catalog if not exists ${catalog_name} properties (
+                    'type'='hms',
+                    'hive.metastore.uris' = 
'thrift://${externalEnvIp}:${hms_port}',
+                    'fs.defaultFS' = 'hdfs://${externalEnvIp}:${hdfs_port}',
+                    'use_meta_cache' = 'true'
+                );
+            """
+            logger.info("catalog " + catalog_name + " created")
+            sql """switch ${catalog_name};"""
+            logger.info("switched to catalog " + catalog_name)
+            sql """use `default`;"""
+
+            sql """ drop table if exists text_table_default_properties """
+            sql """
+            create table text_table_default_properties (
+                id int,
+                `name` string,
+                tags array<string>,
+                attributes map<string, string>
+            ) PROPERTIES (
+                'file_format'='text'
             );
-        """
-        logger.info("catalog " + catalog_name + " created")
-        sql """switch ${catalog_name};"""
-        logger.info("switched to catalog " + catalog_name)
-        sql """use `default`;"""
+            """
+            sql """
+            INSERT INTO text_table_default_properties VALUES
+                (1, 'Alice', array('tag1', 'tag2'), map('key1', 'value1', 
'key2', 'value2')),
+                (2, 'Bob', array('tagA', 'tagB'), map('keyA', 'valueA', 
'keyB', 'valueB')),
+                (3, 'Charlie', NULL, map('keyC', 'valueC', 'keyD', 'valueD'));
+            """
+            order_qt_default_properties """ select * from 
text_table_default_properties """
+
+            order_qt_hive_docker_default_properties""" select * from 
text_table_default_properties """
 
-        sql """ drop table if exists tb_text """
-        sql """
-        create table tb_text (
-            id int,
-            `name` string
-        ) PROPERTIES (
-            'compression'='gzip',
-            'file_format'='text',
-            'field.delim'='\t',
-            'line.delim'='\n',
-            'collection.delim'=';',
-            'mapkey.delim'=':',
-            'serialization.null.format'='\\N'
-        );
-        """
+            sql """ drop table if exists text_table_standard_properties """
+            // Escape characters need to be considered in groovy scripts
+            sql """
+            create table text_table_standard_properties (
+                id int,
+                `name` string,
+                tags array<string>,
+                attributes map<string, string>
+            ) PROPERTIES (
+                'compression'='plain',
+                'file_format'='text',
+                'field.delim'='\\1',
+                'line.delim'='\\n',
+                'collection.delim'='\\2',
+                'mapkey.delim'='\\3',
+                'escape.delim'= '\\\\',
+                'serialization.null.format'='\\\\N'
+            );
+            """
+            sql """
+            INSERT INTO text_table_standard_properties VALUES
+                (1, 'Alice', array('tag1', 'tag2'), map('key1', 'value1', 
'key2', 'value2')),
+                (2, 'Bob', array('tagA', 'tagB'), map('keyA', 'valueA', 
'keyB', 'valueB')),
+                (3, 'Charlie', NULL, map('keyC', 'valueC', 'keyD', 'valueD'));
+            """
+            order_qt_standard_properties """ select * from 
text_table_standard_properties """
+            order_qt_hive_docker_standard_properties """ select * from 
text_table_standard_properties order by id; """
+
+            sql """ drop table if exists text_table_different_properties """
+            sql """
+            create table text_table_different_properties (
+                id int,
+                `name` string,
+                tags array<string>,
+                attributes map<string, string>
+            ) PROPERTIES (
+                'compression'='gzip',
+                'file_format'='text',
+                'field.delim'='A',
+                'line.delim'='\\4',
+                'collection.delim'=',',
+                'mapkey.delim'=':',
+                'escape.delim'='|',
+                'serialization.null.format'='null'
+            );
+            """
+            sql """
+            INSERT INTO text_table_different_properties VALUES
+                (1, 'Alice', array('tag1', 'tag2'), map('key1', 'value1', 
'key2', 'value2')),
+                (2, 'Bob', array('tagA', 'tagB'), map('keyA', 'valueA', 
'keyB', 'valueB')),
+                (3, 'Charlie', NULL, map('keyC', 'valueC', 'keyD', 'valueD'));
+            """
+            order_qt_different_properties """ select * from 
text_table_different_properties """
+            order_qt_hive_docker_different_properties """ select * from 
text_table_different_properties order by id; """
 
-        String serde = "'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'"
-        String input_format = "'org.apache.hadoop.mapred.TextInputFormat'"
-        String output_format = 
"'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'"
-        String doris_fileformat = "'doris.file_format'='text'"
-        String filed_delim = "'field.delim'"
-        String line_delim = "'line.delim'"
-        String mapkey_delim = "'mapkey.delim'"
+            String serde = 
"'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'"
+            String input_format = "'org.apache.hadoop.mapred.TextInputFormat'"
+            String output_format = 
"'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'"
+            String doris_fileformat = "'doris.file_format'='text'"
+            String filed_delim = "'field.delim'"
+            String line_delim = "'line.delim'"
+            String mapkey_delim = "'mapkey.delim'"
+            String collection_delim = "'collection.delim'"
+            String escape_delim = "'escape.delim'"
+            String serialization_null_format = "'serialization.null.format'"
 
-        def create_tbl_res = sql """ show create table tb_text """
-        String res = create_tbl_res.toString()
-        logger.info("${res}")
-        assertTrue(res.containsIgnoreCase("${serde}"))
-        assertTrue(res.containsIgnoreCase("${input_format}"))
-        assertTrue(res.containsIgnoreCase("${output_format}"))
-        assertTrue(res.containsIgnoreCase("${doris_fileformat}"))
-        assertTrue(res.containsIgnoreCase("${filed_delim}"))
-        assertTrue(res.containsIgnoreCase("${filed_delim}"))
-        assertTrue(res.containsIgnoreCase("${line_delim}"))
-        assertTrue(res.containsIgnoreCase("${mapkey_delim}"))
+            def create_tbl_res = sql """ show create table 
text_table_standard_properties """
+            String res = create_tbl_res.toString()
+            logger.info("${res}")
+            assertTrue(res.containsIgnoreCase("${serde}"))
+            assertTrue(res.containsIgnoreCase("${input_format}"))
+            assertTrue(res.containsIgnoreCase("${output_format}"))
+            assertTrue(res.containsIgnoreCase("${doris_fileformat}"))
+            assertTrue(res.containsIgnoreCase("${filed_delim}"))
+            assertTrue(res.containsIgnoreCase("${filed_delim}"))
+            assertTrue(res.containsIgnoreCase("${line_delim}"))
+            assertTrue(res.containsIgnoreCase("${mapkey_delim}"))
+            assertTrue(res.containsIgnoreCase("${collection_delim}"))
+            assertTrue(res.containsIgnoreCase("${escape_delim}"))
+            assertTrue(res.containsIgnoreCase("${serialization_null_format}"))
+        } finally {
+        }
     }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to