This is an automated email from the ASF dual-hosted git repository. dataroaring pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 4bd260a7dc22e24defaeb1b126adcbeddf864ffd Author: wuwenchi <wuwenchi...@hotmail.com> AuthorDate: Mon Jul 1 15:23:11 2024 +0800 [test](migrate) move 2 cases from p2 to p0 (#37004) releated: #36787 - test_text_garbled_file - test_hive_statistic_timeout - test_hive_to_array - test_hive_statistic - test_hive_default_partition --- .../hive_textfile_array_all_types/create_table.hql | 27 ++ .../hive_textfile_array_all_types/data.tar.gz | Bin 0 -> 625 bytes .../hive_textfile_array_all_types/run.sh | 13 + .../hive_textfile_array_delimiter/create_table.hql | 32 ++ .../hive_textfile_array_delimiter/data.tar.gz | Bin 0 -> 690 bytes .../hive_textfile_array_delimiter/run.sh | 13 + .../hive_textfile_nestedarray/create_table.hql | 16 + .../hive_textfile_nestedarray/data.tar.gz | Bin 0 -> 280 bytes .../multi_catalog/hive_textfile_nestedarray/run.sh | 13 + .../multi_catalog/logs1_parquet/create_table.hql | 39 +++ .../data/multi_catalog/logs1_parquet/run.sh | 22 ++ .../multi_catalog/one_partition/create_table.hql | 22 ++ .../data/multi_catalog/one_partition/data.tar.gz | Bin 0 -> 296 bytes .../data/multi_catalog/one_partition/run.sh | 13 + .../test_csv_format_error/create_table.hql | 68 ++++ .../test_csv_format_error/data.tar.gz | Bin 0 -> 151583 bytes .../multi_catalog/test_csv_format_error/run.sh | 13 + .../test_date_string_partition/create_table.hql | 25 ++ .../test_date_string_partition/data.tar.gz | Bin 0 -> 353 bytes .../test_date_string_partition/run.sh | 13 + .../multi_catalog/two_partition/create_table.hql | 25 ++ .../data/multi_catalog/two_partition/data.tar.gz | Bin 0 -> 375 bytes .../data/multi_catalog/two_partition/run.sh | 13 + .../data/statistics/statistics/create_table.hql | 33 ++ .../scripts/data/statistics/statistics/data.tar.gz | Bin 0 -> 3956 bytes .../hive/scripts/data/statistics/statistics/run.sh | 13 + .../data/tpch_1000_parquet/part/create_table.hql | 24 ++ .../scripts/data/tpch_1000_parquet/part/run.sh | 22 ++ .../hive/test_hive_default_partition.out | 174 +++++++++++ .../hive/test_hive_to_array.out | 21 ++ .../hive/test_text_garbled_file.out | Bin 296830 -> 593565 bytes .../hive/test_hive_default_partition.groovy | 17 +- .../hive/test_hive_statistic.groovy | 344 +++++++++++++++++++++ .../hive/test_hive_statistic_timeout.groovy | 23 +- .../hive/test_hive_to_array.groovy | 17 +- .../hive/test_text_garbled_file.groovy | 47 +++ .../hive/test_hive_statistic.groovy | 338 -------------------- .../hive/test_text_garbled_file.groovy | 46 --- 38 files changed, 1084 insertions(+), 402 deletions(-) diff --git a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_array_all_types/create_table.hql b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_array_all_types/create_table.hql new file mode 100644 index 00000000000..6b700396838 --- /dev/null +++ b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_array_all_types/create_table.hql @@ -0,0 +1,27 @@ +create database if not exists multi_catalog; +use multi_catalog; + +CREATE TABLE IF NOT EXISTS `hive_textfile_array_all_types`( + `col1` array<tinyint>, + `col2` array<smallint>, + `col3` array<int>, + `col4` array<bigint>, + `col5` array<boolean>, + `col6` array<float>, + `col7` array<double>, + `col8` array<string>, + `col9` array<timestamp>, + `col10` array<date>, + `col11` array<decimal(10,3)>, + `col12` array<char(1)>, + `col13` array<varchar(10)>) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.mapred.TextInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' +LOCATION + '/user/doris/suites/multi_catalog/hive_textfile_array_all_types'; + +msck repair table hive_textfile_array_all_types; \ No newline at end of file diff --git a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_array_all_types/data.tar.gz b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_array_all_types/data.tar.gz new file mode 100644 index 00000000000..b9d64ab29bc Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_array_all_types/data.tar.gz differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_array_all_types/run.sh b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_array_all_types/run.sh new file mode 100755 index 00000000000..7c2e7e7aed2 --- /dev/null +++ b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_array_all_types/run.sh @@ -0,0 +1,13 @@ +#!/bin/bash +set -x + +CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" + +## mkdir and put data to hdfs +cd "${CUR_DIR}" && rm -rf data/ && tar xzf data.tar.gz +hadoop fs -mkdir -p /user/doris/suites/multi_catalog/ +hadoop fs -put "${CUR_DIR}"/data/* /user/doris/suites/multi_catalog/ + +# create table +hive -f "${CUR_DIR}"/create_table.hql + diff --git a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_array_delimiter/create_table.hql b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_array_delimiter/create_table.hql new file mode 100644 index 00000000000..7e40a2c6bb7 --- /dev/null +++ b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_array_delimiter/create_table.hql @@ -0,0 +1,32 @@ +create database if not exists multi_catalog; +use multi_catalog; + +CREATE TABLE IF NOT EXISTS `hive_textfile_array_delimiter`( + `col1` array<tinyint>, + `col2` array<smallint>, + `col3` array<int>, + `col4` array<bigint>, + `col5` array<boolean>, + `col6` array<float>, + `col7` array<double>, + `col8` array<string>, + `col9` array<timestamp>, + `col10` array<date>, + `col11` array<decimal(10,3)>, + `col12` int, + `col13` array<array<array<int>>>) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +WITH SERDEPROPERTIES ( + 'colelction.delim'=',', + 'field.delim'='\t', + 'line.delim'='\n', + 'serialization.format'='\t') +STORED AS INPUTFORMAT + 'org.apache.hadoop.mapred.TextInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' +LOCATION + '/user/doris/suites/multi_catalog/hive_textfile_array_delimiter'; + +msck repair table hive_textfile_array_delimiter; \ No newline at end of file diff --git a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_array_delimiter/data.tar.gz b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_array_delimiter/data.tar.gz new file mode 100644 index 00000000000..f2ec01fa79f Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_array_delimiter/data.tar.gz differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_array_delimiter/run.sh b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_array_delimiter/run.sh new file mode 100755 index 00000000000..7c2e7e7aed2 --- /dev/null +++ b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_array_delimiter/run.sh @@ -0,0 +1,13 @@ +#!/bin/bash +set -x + +CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" + +## mkdir and put data to hdfs +cd "${CUR_DIR}" && rm -rf data/ && tar xzf data.tar.gz +hadoop fs -mkdir -p /user/doris/suites/multi_catalog/ +hadoop fs -put "${CUR_DIR}"/data/* /user/doris/suites/multi_catalog/ + +# create table +hive -f "${CUR_DIR}"/create_table.hql + diff --git a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_nestedarray/create_table.hql b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_nestedarray/create_table.hql new file mode 100644 index 00000000000..478a5341537 --- /dev/null +++ b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_nestedarray/create_table.hql @@ -0,0 +1,16 @@ +create database if not exists multi_catalog; +use multi_catalog; + +CREATE TABLE IF NOT EXISTS `hive_textfile_nestedarray`( + `col1` int, + `col2` array<array<array<int>>>) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.mapred.TextInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' +LOCATION + '/user/doris/suites/multi_catalog/hive_textfile_nestedarray'; + +msck repair table hive_textfile_nestedarray; \ No newline at end of file diff --git a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_nestedarray/data.tar.gz b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_nestedarray/data.tar.gz new file mode 100644 index 00000000000..7c6f8bfef0c Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_nestedarray/data.tar.gz differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_nestedarray/run.sh b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_nestedarray/run.sh new file mode 100755 index 00000000000..7c2e7e7aed2 --- /dev/null +++ b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_nestedarray/run.sh @@ -0,0 +1,13 @@ +#!/bin/bash +set -x + +CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" + +## mkdir and put data to hdfs +cd "${CUR_DIR}" && rm -rf data/ && tar xzf data.tar.gz +hadoop fs -mkdir -p /user/doris/suites/multi_catalog/ +hadoop fs -put "${CUR_DIR}"/data/* /user/doris/suites/multi_catalog/ + +# create table +hive -f "${CUR_DIR}"/create_table.hql + diff --git a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/logs1_parquet/create_table.hql b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/logs1_parquet/create_table.hql new file mode 100644 index 00000000000..d4fb4be9a4e --- /dev/null +++ b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/logs1_parquet/create_table.hql @@ -0,0 +1,39 @@ + +create database if not exists multi_catalog; +use multi_catalog; + +CREATE TABLE IF NOT EXISTS `logs1_parquet`( + `log_time` timestamp, + `machine_name` varchar(128), + `machine_group` varchar(128), + `cpu_idle` float, + `cpu_nice` float, + `cpu_system` float, + `cpu_user` float, + `cpu_wio` float, + `disk_free` float, + `disk_total` float, + `part_max_used` float, + `load_fifteen` float, + `load_five` float, + `load_one` float, + `mem_buffers` float, + `mem_cached` float, + `mem_free` float, + `mem_shared` float, + `swap_free` float, + `bytes_in` float, + `bytes_out` float) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +WITH SERDEPROPERTIES ( + 'field.delim'=',', + 'serialization.format'=',') +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION + '/user/doris/suites/multi_catalog/logs1_parquet'; + +msck repair table logs1_parquet; \ No newline at end of file diff --git a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/logs1_parquet/run.sh b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/logs1_parquet/run.sh new file mode 100644 index 00000000000..da5415b332a --- /dev/null +++ b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/logs1_parquet/run.sh @@ -0,0 +1,22 @@ +#!/bin/bash +set -x + +CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" + +if [[ ! -d "${CUR_DIR}/data" ]]; then + echo "${CUR_DIR}/data does not exist" + cd "${CUR_DIR}" && rm -f data.tar.gz \ + && curl -O https://s3BucketName.s3Endpoint/regression/datalake/pipeline_data/multi_catalog/logs1_parquet/data.tar.gz \ + && tar xzf data.tar.gz + cd - +else + echo "${CUR_DIR}/data exist, continue !" +fi + +## mkdir and put data to hdfs +hadoop fs -mkdir -p /user/doris/suites/multi_catalog/ +hadoop fs -put "${CUR_DIR}"/data/* /user/doris/suites/multi_catalog/ + +# create table +hive -f "${CUR_DIR}"/create_table.hql + diff --git a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/one_partition/create_table.hql b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/one_partition/create_table.hql new file mode 100644 index 00000000000..8d12858b5f1 --- /dev/null +++ b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/one_partition/create_table.hql @@ -0,0 +1,22 @@ +create database if not exists multi_catalog; +use multi_catalog; + + +CREATE TABLE IF NOT EXISTS `one_partition`( + `id` int) +PARTITIONED BY ( + `part1` int) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +WITH SERDEPROPERTIES ( + 'field.delim'='|', + 'serialization.format'='|') +STORED AS INPUTFORMAT + 'org.apache.hadoop.mapred.TextInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' +LOCATION + '/user/doris/suites/multi_catalog/one_partition'; + + +msck repair table one_partition; \ No newline at end of file diff --git a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/one_partition/data.tar.gz b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/one_partition/data.tar.gz new file mode 100644 index 00000000000..ff9d50b2da6 Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/one_partition/data.tar.gz differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/one_partition/run.sh b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/one_partition/run.sh new file mode 100755 index 00000000000..cc8392b9baa --- /dev/null +++ b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/one_partition/run.sh @@ -0,0 +1,13 @@ +#!/bin/bash +set -x + +CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" + +## mkdir and put data to hdfs +cd "${CUR_DIR}" && rm -rf data/ && tar xzf data.tar.gz +hadoop fs -mkdir -p /user/doris/suites/multi_catalog/ +hadoop fs -put "${CUR_DIR}"/data/* /user/doris/suites/multi_catalog/ + +# create table +hive -f "${CUR_DIR}/create_table.hql" + diff --git a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/test_csv_format_error/create_table.hql b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/test_csv_format_error/create_table.hql new file mode 100644 index 00000000000..a1a31120325 --- /dev/null +++ b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/test_csv_format_error/create_table.hql @@ -0,0 +1,68 @@ +create database if not exists multi_catalog; +use multi_catalog; + +CREATE TABLE IF NOT EXISTS `test_csv_format_error`( + `device_id` string COMMENT '设备唯一识别ID ', + `user_id` bigint COMMENT '设备唯一识别ID HASH DEVICE_ID ', + `user_app_id` int COMMENT '使用样本应用的用户Id ', + `standard_app_id` int COMMENT '标准应用ID ', + `standard_app_name` string COMMENT '标准应用名称 ', + `standard_package_name` string COMMENT '标准应用包名 ', + `device_brand_id` int COMMENT '设备品牌ID ', + `device_brand_name` string COMMENT '设备品牌名称 ', + `device_eqp_id` int COMMENT '设备型号ID ', + `device_eqp_name` string COMMENT '设备型号名称 ', + `os_version_id` int COMMENT '系统版本ID ', + `os_version_name` string COMMENT '系统版本名称 ', + `os_type_id` int COMMENT '操作系统类型ID 0 安卓 1 IOS ', + `os_type_name` string COMMENT '操作系统类型名称 0 安卓 1 IOS ', + `os_name` string COMMENT '操作系统名称 ', + `oem_os_version` string COMMENT '厂商封装后的操作系统版本 ', + `oem_os_name` string COMMENT '厂商封装后的操作系统名称 ', + `app_version` string COMMENT '样本应用版本 ', + `app_key` string COMMENT '样本应用key ', + `app_channel` string COMMENT '推广渠道 ', + `package_name` string COMMENT '宿主APP包名 ', + `app_name` string COMMENT '宿主APP名称', + `sdk_version` string COMMENT 'SDK版本 ', + `api_level` string COMMENT 'API等级 ', + `carrier_id` int COMMENT '运营商ID ', + `carrier_name` string COMMENT '运营商名称 ', + `phone_num` string COMMENT '手机号码 ', + `ip` string COMMENT 'IP地址 ', + `country_id` int COMMENT '国家id', + `country_name` string COMMENT '国家name', + `province_id` int COMMENT '省份ID ', + `province_name` string COMMENT '省份名称 ', + `city_id` int COMMENT '地级市ID ', + `city_name` string COMMENT '地级市名称 ', + `county_id` int COMMENT '县级市ID ', + `county_name` string COMMENT '县级市名称 ', + `mac_address` string COMMENT 'MAC地址 ', + `network_id` int COMMENT '网络类型ID ', + `network_name` string COMMENT '网络类型 ', + `org_package_name` string COMMENT '原始应用包名 ', + `org_app_name` string COMMENT '原始应用程序名 ', + `org_app_version` string COMMENT '原始应用版本 ', + `app_flag` int COMMENT '安装、更新、还是卸载', + `action_time` string COMMENT '行为发生的时间', + `day_realy` string COMMENT '行为发生的日期', + `memo` map<string,string> COMMENT '备注') +COMMENT 'ods-App_Installed' +PARTITIONED BY ( + `day` string) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +WITH SERDEPROPERTIES ( + 'colelction.delim'=',', + 'field.delim'='\t', + 'mapkey.delim'=':', + 'serialization.format'='\t') +STORED AS INPUTFORMAT + 'org.apache.hadoop.mapred.TextInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' +LOCATION + '/user/doris/suites/multi_catalog/test_csv_format_error'; + +msck repair table test_csv_format_error; \ No newline at end of file diff --git a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/test_csv_format_error/data.tar.gz b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/test_csv_format_error/data.tar.gz new file mode 100644 index 00000000000..c0c9d63e2b8 Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/test_csv_format_error/data.tar.gz differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/test_csv_format_error/run.sh b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/test_csv_format_error/run.sh new file mode 100755 index 00000000000..cc8392b9baa --- /dev/null +++ b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/test_csv_format_error/run.sh @@ -0,0 +1,13 @@ +#!/bin/bash +set -x + +CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" + +## mkdir and put data to hdfs +cd "${CUR_DIR}" && rm -rf data/ && tar xzf data.tar.gz +hadoop fs -mkdir -p /user/doris/suites/multi_catalog/ +hadoop fs -put "${CUR_DIR}"/data/* /user/doris/suites/multi_catalog/ + +# create table +hive -f "${CUR_DIR}/create_table.hql" + diff --git a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/test_date_string_partition/create_table.hql b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/test_date_string_partition/create_table.hql new file mode 100644 index 00000000000..85db9993656 --- /dev/null +++ b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/test_date_string_partition/create_table.hql @@ -0,0 +1,25 @@ +create database if not exists multi_catalog; +use multi_catalog; + + + +CREATE TABLE IF NOT EXISTS `test_date_string_partition`( + `k1` int) +PARTITIONED BY ( + `day1` string, + `day2` date) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +WITH SERDEPROPERTIES ( + 'field.delim'=',', + 'serialization.format'=',') +STORED AS INPUTFORMAT + 'org.apache.hadoop.mapred.TextInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' +LOCATION + '/user/doris/suites/multi_catalog/test_date_string_partition'; + + +msck repair table test_date_string_partition; + diff --git a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/test_date_string_partition/data.tar.gz b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/test_date_string_partition/data.tar.gz new file mode 100644 index 00000000000..70f7491953a Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/test_date_string_partition/data.tar.gz differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/test_date_string_partition/run.sh b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/test_date_string_partition/run.sh new file mode 100755 index 00000000000..cc8392b9baa --- /dev/null +++ b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/test_date_string_partition/run.sh @@ -0,0 +1,13 @@ +#!/bin/bash +set -x + +CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" + +## mkdir and put data to hdfs +cd "${CUR_DIR}" && rm -rf data/ && tar xzf data.tar.gz +hadoop fs -mkdir -p /user/doris/suites/multi_catalog/ +hadoop fs -put "${CUR_DIR}"/data/* /user/doris/suites/multi_catalog/ + +# create table +hive -f "${CUR_DIR}/create_table.hql" + diff --git a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/two_partition/create_table.hql b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/two_partition/create_table.hql new file mode 100644 index 00000000000..7cdc6015674 --- /dev/null +++ b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/two_partition/create_table.hql @@ -0,0 +1,25 @@ +create database if not exists multi_catalog; +use multi_catalog; + + +CREATE TABLE IF NOT EXISTS `two_partition`( + `id` int) +PARTITIONED BY ( + `part1` int, + `part2` string) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +WITH SERDEPROPERTIES ( + 'field.delim'='|', + 'serialization.format'='|') +STORED AS INPUTFORMAT + 'org.apache.hadoop.mapred.TextInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' +LOCATION + '/user/doris/suites/multi_catalog/two_partition'; + + + +msck repair table two_partition; + diff --git a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/two_partition/data.tar.gz b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/two_partition/data.tar.gz new file mode 100644 index 00000000000..cf12e715f3e Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/two_partition/data.tar.gz differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/two_partition/run.sh b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/two_partition/run.sh new file mode 100755 index 00000000000..cc8392b9baa --- /dev/null +++ b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/two_partition/run.sh @@ -0,0 +1,13 @@ +#!/bin/bash +set -x + +CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" + +## mkdir and put data to hdfs +cd "${CUR_DIR}" && rm -rf data/ && tar xzf data.tar.gz +hadoop fs -mkdir -p /user/doris/suites/multi_catalog/ +hadoop fs -put "${CUR_DIR}"/data/* /user/doris/suites/multi_catalog/ + +# create table +hive -f "${CUR_DIR}/create_table.hql" + diff --git a/docker/thirdparties/docker-compose/hive/scripts/data/statistics/statistics/create_table.hql b/docker/thirdparties/docker-compose/hive/scripts/data/statistics/statistics/create_table.hql new file mode 100644 index 00000000000..e131a4dc78d --- /dev/null +++ b/docker/thirdparties/docker-compose/hive/scripts/data/statistics/statistics/create_table.hql @@ -0,0 +1,33 @@ +create database if not exists statistics; +use statistics; + + +CREATE TABLE IF NOT EXISTS `statistics`( + `lo_orderkey` int, + `lo_linenumber` int, + `lo_custkey` int, + `lo_partkey` int, + `lo_suppkey` int, + `lo_orderdate` int, + `lo_orderpriority` string, + `lo_shippriority` int, + `lo_quantity` int, + `lo_extendedprice` int, + `lo_ordtotalprice` int, + `lo_discount` int, + `lo_revenue` int, + `lo_supplycost` int, + `lo_tax` int, + `lo_commitdate` int, + `lo_shipmode` string) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION + '/user/doris/suites/statistics/statistics'; + + +msck repair table statistics; diff --git a/docker/thirdparties/docker-compose/hive/scripts/data/statistics/statistics/data.tar.gz b/docker/thirdparties/docker-compose/hive/scripts/data/statistics/statistics/data.tar.gz new file mode 100644 index 00000000000..bf2af70acb9 Binary files /dev/null and b/docker/thirdparties/docker-compose/hive/scripts/data/statistics/statistics/data.tar.gz differ diff --git a/docker/thirdparties/docker-compose/hive/scripts/data/statistics/statistics/run.sh b/docker/thirdparties/docker-compose/hive/scripts/data/statistics/statistics/run.sh new file mode 100755 index 00000000000..1b9818427b7 --- /dev/null +++ b/docker/thirdparties/docker-compose/hive/scripts/data/statistics/statistics/run.sh @@ -0,0 +1,13 @@ +#!/bin/bash +set -x + +CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" + +## mkdir and put data to hdfs +cd "${CUR_DIR}" && rm -rf data/ && tar xzf data.tar.gz +hadoop fs -mkdir -p /user/doris/suites/statistics/ +hadoop fs -put "${CUR_DIR}"/data/* /user/doris/suites/statistics/ + +# create table +hive -f "${CUR_DIR}/create_table.hql" + diff --git a/docker/thirdparties/docker-compose/hive/scripts/data/tpch_1000_parquet/part/create_table.hql b/docker/thirdparties/docker-compose/hive/scripts/data/tpch_1000_parquet/part/create_table.hql new file mode 100644 index 00000000000..bf02000f3f7 --- /dev/null +++ b/docker/thirdparties/docker-compose/hive/scripts/data/tpch_1000_parquet/part/create_table.hql @@ -0,0 +1,24 @@ +create database if not exists tpch_1000_parquet; +use tpch_1000_parquet; + + +CREATE TABLE IF NOT EXISTS `part`( + `p_partkey` int, + `p_name` varchar(55), + `p_mfgr` char(25), + `p_brand` char(10), + `p_type` varchar(25), + `p_size` int, + `p_container` char(10), + `p_retailprice` decimal(15,2), + `p_comment` varchar(23)) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION + '/user/doris/suites/tpch_1000_parquet/part'; + +msck repair table part; \ No newline at end of file diff --git a/docker/thirdparties/docker-compose/hive/scripts/data/tpch_1000_parquet/part/run.sh b/docker/thirdparties/docker-compose/hive/scripts/data/tpch_1000_parquet/part/run.sh new file mode 100644 index 00000000000..8d77dd927bf --- /dev/null +++ b/docker/thirdparties/docker-compose/hive/scripts/data/tpch_1000_parquet/part/run.sh @@ -0,0 +1,22 @@ +#!/bin/bash +set -x + +# CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" + +# if [[ ! -d "${CUR_DIR}/data" ]]; then +# echo "${CUR_DIR}/data does not exist" +# cd "${CUR_DIR}" && rm -f data.tar.gz \ +# && curl -O https://s3BucketName.s3Endpoint/regression/datalake/pipeline_data/tpch_1000_parquet/part/data.tar.gz \ +# && tar xzf data.tar.gz +# cd - +# else +# echo "${CUR_DIR}/data exist, continue !" +# fi + +# ## mkdir and put data to hdfs +# hadoop fs -mkdir -p /user/doris/suites/tpch_1000_parquet/ +# hadoop fs -put "${CUR_DIR}"/data/* /user/doris/suites/tpch_1000_parquet/ + +# # create table +# hive -f "${CUR_DIR}"/create_table.hql + diff --git a/regression-test/data/external_table_p2/hive/test_hive_default_partition.out b/regression-test/data/external_table_p0/hive/test_hive_default_partition.out similarity index 51% rename from regression-test/data/external_table_p2/hive/test_hive_default_partition.out rename to regression-test/data/external_table_p0/hive/test_hive_default_partition.out index cace5bba2bd..24227b9af22 100644 --- a/regression-test/data/external_table_p2/hive/test_hive_default_partition.out +++ b/regression-test/data/external_table_p0/hive/test_hive_default_partition.out @@ -173,3 +173,177 @@ -- !string_part_prune5 -- 2 2023-08-16 2023-08-16 +-- !one_partition1 -- +1 1 +2 1 +3 2 +4 2 +5 \N +6 \N + +-- !one_partition2 -- +5 \N +6 \N + +-- !one_partition3 -- +1 +2 +3 +4 + +-- !one_partition4 -- +1 +1 +2 +2 + +-- !one_partition5 -- +4 2 +5 \N +6 \N + +-- !one_partition6 -- +3 2 +4 2 +5 \N +6 \N + +-- !two_partition1 -- +1 \N one +2 \N one +3 2 \N +4 2 \N +5 3 three +6 3 three +7 \N \N +8 \N \N + +-- !two_partition2 -- +1 \N one +2 \N one +7 \N \N +8 \N \N + +-- !two_partition3 -- +3 2 \N +4 2 \N +5 3 three +6 3 three + +-- !two_partition4 -- +3 2 \N +4 2 \N +7 \N \N +8 \N \N + +-- !two_partition5 -- +1 \N one +2 \N one +5 3 three +6 3 three + +-- !two_partition6 -- +5 3 three +6 3 three + +-- !two_partition7 -- +1 \N one +2 \N one + +-- !two_partition8 -- +3 2 \N +4 2 \N + +-- !two_partition9 -- +7 \N \N +8 \N \N + +-- !two_partition10 -- +1 \N one +2 \N one +3 2 \N +4 2 \N +5 3 three +6 3 three + +-- !two_partition11 -- +1 \N one +2 \N one +5 3 three +6 3 three +7 \N \N +8 \N \N + +-- !two_partition12 -- +3 2 \N +4 2 \N +5 3 three +6 3 three +7 \N \N +8 \N \N + +-- !two_partition13 -- +1 \N one +2 \N one +3 2 \N +4 2 \N +7 \N \N +8 \N \N + +-- !two_partition14 -- +1 \N one +2 \N one +3 2 \N +4 2 \N +5 3 three +6 3 three + +-- !two_partition15 -- +6 3 three +7 \N \N +8 \N \N + +-- !two_partition16 -- +3 2 \N +4 2 \N +5 3 three +6 3 three + +-- !two_partition17 -- +1 \N one +2 \N one + +-- !two_partition18 -- +5 3 three +6 3 three + +-- !string_part_prune1 -- +3 2023-08-17 2023-08-17 + +-- !string_part_prune2 -- + +-- !string_part_prune3 -- +3 2023-08-17 2023-08-17 + +-- !string_part_prune4 -- +1 2023-08-15 2023-08-15 +1 2023-8-15 2023-08-15 +2 2023-08-16 2023-08-16 +3 2023-08-17 2023-08-17 + +-- !string_part_prune5 -- +3 2023-08-17 2023-08-17 + +-- !string_part_prune5 -- +1 2023-08-15 2023-08-15 +2 2023-08-16 2023-08-16 + +-- !string_part_prune5 -- +2 2023-08-16 2023-08-16 + +-- !string_part_prune5 -- +2 2023-08-16 2023-08-16 + +-- !string_part_prune5 -- +2 2023-08-16 2023-08-16 + diff --git a/regression-test/data/external_table_p2/hive/test_hive_to_array.out b/regression-test/data/external_table_p0/hive/test_hive_to_array.out similarity index 50% rename from regression-test/data/external_table_p2/hive/test_hive_to_array.out rename to regression-test/data/external_table_p0/hive/test_hive_to_array.out index 0965e8d8be4..5dab8a92760 100644 --- a/regression-test/data/external_table_p2/hive/test_hive_to_array.out +++ b/regression-test/data/external_table_p0/hive/test_hive_to_array.out @@ -20,3 +20,24 @@ [4, 5, 6, 7] [2100, 3100, 4100, 5100] [110000, 220000, 330000] [60000000000000, 60000000000000, 60000000000000] [1] [120.301, 450.602, 780.9001] [100.0000001, 200.0000002, 300.0000003] ["hive", "text", "file", "format"] ["2023-07-09 12:00:00.000000", "2023-07-09 12:00:00.000000", "2023-07-09 12:00:00.000000"] ["2021-07-09", "2021-07-09", "2021-07-09"] [3311111.111, 2211111.111, 3121111.111] 5 [[[1, 2, 3], [4, 5, 6], [7, 8, 9]], [[10, 11, 12], [13, 14, 15], [16, 17, 18]], [[1, 2, 3], [4, [...] [40, 50, 60, 70] [210, 310, 410, 510] [110000, 210000, 310000] [400000000000000, 50000000000000, 60000000000000] [1] [120.301, 450.602, 780.9001] [100.0000001, 200.0000002, 300.0000003] ["hello", "world"] ["2023-07-02 12:00:00.000000", "2023-07-02 12:00:00.000000", "2023-07-02 12:00:00.000000"] ["2021-07-06", "2021-07-06", "2021-07-06"] [3311111.111, 2211111.111, 3121111.111] 4 [[[1]]] +-- !types -- +[1, 2, 3, 4] [10, 20, 30, 40] [100, 200, 300] [100000000000000, 20000000000000, 30000000000000, 40000000000000] [1, 0, 1] [1.23, 4.56, 7.89] [10.1, 20.2, 30.3] ["apple", "banana", "orange"] ["2023-07-04 12:00:00.000000", "2023-07-05 12:00:00.000000", "2023-07-06 12:00:00.000000"] ["2023-07-04", "2023-07-05", "2023-07-06"] [1111111.111, 2111111.111, 3111111.111] ["a", "b", "c"] ["aa", "bb", "cc"] +[10, 20, 30] [100, 200, 300, 400] [1000, 2000, 3000] [1000000000000000, 200000000000000, 300000000000000, 400000000000000] [1, 1, 1, 1] [12.3, 45.6, 78.9] [100.1, 200.2, 300.3] ["grapes", "watermelon", "kiwi"] ["2023-07-03 12:00:00.000000", "2023-07-03 12:00:00.000000", "2023-07-03 12:00:00.000000"] ["2021-07-05", "2021-07-05", "2021-07-05"] [2222222.111, 2222222.112, 2222222.113] \N \N +[20, 30, 40, 50] [200, 300, 400, 500] [10000, 20000, 30000] [100000000000000, 20000000000000, 30000000000000] [1, 1, 1, 1, 0, 0] [120.3, 450.6, 780.9] [100.001, 200.002, 300.003] ["melon", "strawberry", "blueberry"] ["2023-07-02 12:00:00.000000", "2023-07-02 12:00:00.000000", "2023-07-02 12:00:00.000000"] ["2021-07-06", "2021-07-06", "2021-07-06"] [1111111.111, 2111111.111, 3111111.111] \N \N +[4, 5, 6, 7] [2100, 3100, 4100, 5100] [110000, 220000, 330000] [60000000000000, 60000000000000, 60000000000000] [1] [120.301, 450.602, 780.9001] [100.0000001, 200.0000002, 300.0000003] ["hive", "text", "file", "format"] ["2023-07-09 12:00:00.000000", "2023-07-09 12:00:00.000000", "2023-07-09 12:00:00.000000"] ["2021-07-09", "2021-07-09", "2021-07-09"] [3311111.111, 2211111.111, 3121111.111] ["d", "d", "d", "d"] ["ffffffff"] +[40, 50, 60, 70] [210, 310, 410, 510] [110000, 210000, 310000] [400000000000000, 50000000000000, 60000000000000] [1] [120.301, 450.602, 780.9001] [100.0000001, 200.0000002, 300.0000003] ["hello", "world"] ["2023-07-02 12:00:00.000000", "2023-07-02 12:00:00.000000", "2023-07-02 12:00:00.000000"] ["2021-07-06", "2021-07-06", "2021-07-06"] [3311111.111, 2211111.111, 3121111.111] ["1"] ["hello", "world"] + +-- !array -- +1 [[[1, 2, 3], [4, 5, 6], [7, 8, 9]], [[10, 11, 12], [13, 14, 15], [16, 17, 18]]] +2 [[[19, 20, 21], [22, 23, 24], [25, 26, 27]], [[28], [31], [34]], [[28, 29], [31, 32], [34, 35]]] +3 [[[1, 2, 3], [4, 5, 6], [7, 8, 9]]] +4 [[[1]]] +5 [[[1, 2, 3], [4, 5, 6], [7, 8, 9]], [[10, 11, 12], [13, 14, 15], [16, 17, 18]], [[1, 2, 3], [4, 5, 6], [7, 8, 9]], [[10, 11, 12], [13, 14, 15], [16, 17, 18]]] + +-- !delimiter -- +[1, 2, 3, 4] [10, 20, 30, 40] [100, 200, 300] [100000000000000, 20000000000000, 30000000000000, 40000000000000] [1, 0, 1] [1.23, 4.56, 7.89] [10.1, 20.2, 30.3] ["apple", "banana", "orange"] ["2023-07-04 12:00:00.000000", "2023-07-05 12:00:00.000000", "2023-07-06 12:00:00.000000"] ["2023-07-04", "2023-07-05", "2023-07-06"] [1111111.111, 2111111.111, 3111111.111] 1 [[[1, 2, 3], [4, 5, 6], [7, 8, 9]], [[10, 11, 12], [13, 14, 15], [16, 17, 18]]] +[10, 20, 30] [100, 200, 300, 400] [1000, 2000, 3000] [1000000000000000, 200000000000000, 300000000000000, 400000000000000] [1, 1, 1, 1] [12.3, 45.6, 78.9] [100.1, 200.2, 300.3] ["grapes", "watermelon", "kiwi"] ["2023-07-03 12:00:00.000000", "2023-07-03 12:00:00.000000", "2023-07-03 12:00:00.000000"] ["2021-07-05", "2021-07-05", "2021-07-05"] [2222222.111, 2222222.112, 2222222.113] 2 [[[19, 20, 21], [22, 23, 24], [25, 26, 27]], [[28], [31], [34]], [[28, 29], [31, 32], [34, 35]]] +[20, 30, 40, 50] [200, 300, 400, 500] [10000, 20000, 30000] [100000000000000, 20000000000000, 30000000000000] [1, 1, 1, 1, 0, 0] [120.3, 450.6, 780.9] [100.001, 200.002, 300.003] ["melon", "strawberry", "blueberry"] ["2023-07-02 12:00:00.000000", "2023-07-02 12:00:00.000000", "2023-07-02 12:00:00.000000"] ["2021-07-06", "2021-07-06", "2021-07-06"] [1111111.111, 2111111.111, 3111111.111] 3 [[[1, 2, 3], [4, 5, 6], [7, 8, 9]]] +[4, 5, 6, 7] [2100, 3100, 4100, 5100] [110000, 220000, 330000] [60000000000000, 60000000000000, 60000000000000] [1] [120.301, 450.602, 780.9001] [100.0000001, 200.0000002, 300.0000003] ["hive", "text", "file", "format"] ["2023-07-09 12:00:00.000000", "2023-07-09 12:00:00.000000", "2023-07-09 12:00:00.000000"] ["2021-07-09", "2021-07-09", "2021-07-09"] [3311111.111, 2211111.111, 3121111.111] 5 [[[1, 2, 3], [4, 5, 6], [7, 8, 9]], [[10, 11, 12], [13, 14, 15], [16, 17, 18]], [[1, 2, 3], [4, [...] +[40, 50, 60, 70] [210, 310, 410, 510] [110000, 210000, 310000] [400000000000000, 50000000000000, 60000000000000] [1] [120.301, 450.602, 780.9001] [100.0000001, 200.0000002, 300.0000003] ["hello", "world"] ["2023-07-02 12:00:00.000000", "2023-07-02 12:00:00.000000", "2023-07-02 12:00:00.000000"] ["2021-07-06", "2021-07-06", "2021-07-06"] [3311111.111, 2211111.111, 3121111.111] 4 [[[1]]] + diff --git a/regression-test/data/external_table_p2/hive/test_text_garbled_file.out b/regression-test/data/external_table_p0/hive/test_text_garbled_file.out similarity index 50% rename from regression-test/data/external_table_p2/hive/test_text_garbled_file.out rename to regression-test/data/external_table_p0/hive/test_text_garbled_file.out index b003cd49e97..0c44dc7de0a 100644 Binary files a/regression-test/data/external_table_p2/hive/test_text_garbled_file.out and b/regression-test/data/external_table_p0/hive/test_text_garbled_file.out differ diff --git a/regression-test/suites/external_table_p2/hive/test_hive_default_partition.groovy b/regression-test/suites/external_table_p0/hive/test_hive_default_partition.groovy similarity index 95% rename from regression-test/suites/external_table_p2/hive/test_hive_default_partition.groovy rename to regression-test/suites/external_table_p0/hive/test_hive_default_partition.groovy index 6e7215e31d1..b3760481559 100644 --- a/regression-test/suites/external_table_p2/hive/test_hive_default_partition.groovy +++ b/regression-test/suites/external_table_p0/hive/test_hive_default_partition.groovy @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -suite("test_hive_default_partition", "p2,external,hive,external_remote,external_remote_hive") { +suite("test_hive_default_partition", "p0,external,hive,external_docker,external_docker_hive") { def one_partition1 = """select * from one_partition order by id;""" def one_partition2 = """select id, part1 from one_partition where part1 is null order by id;""" def one_partition3 = """select id from one_partition where part1 is not null order by id;""" @@ -52,11 +52,16 @@ suite("test_hive_default_partition", "p2,external,hive,external_remote,external_ def string_part_prune8 = """select * from test_date_string_partition where cast(day1 as date) in ("2023-08-16", "2023-08-18");""" def string_part_prune9 = """select * from test_date_string_partition where cast(day1 as date) in (cast("2023-08-16" as date), "2023-08-18");""" - String enabled = context.config.otherConfigs.get("enableExternalHiveTest") - if (enabled != null && enabled.equalsIgnoreCase("true")) { - String extHiveHmsHost = context.config.otherConfigs.get("extHiveHmsHost") - String extHiveHmsPort = context.config.otherConfigs.get("extHiveHmsPort") - String catalog_name = "hive_default_partition" + String enabled = context.config.otherConfigs.get("enableHiveTest") + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + logger.info("disable Hive test.") + return + } + + for (String hivePrefix : ["hive2", "hive3"]) { + String extHiveHmsHost = context.config.otherConfigs.get("externalEnvIp") + String extHiveHmsPort = context.config.otherConfigs.get(hivePrefix + "HmsPort") + String catalog_name = hivePrefix + "_hive_default_partition" sql """drop catalog if exists ${catalog_name};""" sql """ create catalog if not exists ${catalog_name} properties ( diff --git a/regression-test/suites/external_table_p0/hive/test_hive_statistic.groovy b/regression-test/suites/external_table_p0/hive/test_hive_statistic.groovy new file mode 100644 index 00000000000..5da056ff00e --- /dev/null +++ b/regression-test/suites/external_table_p0/hive/test_hive_statistic.groovy @@ -0,0 +1,344 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_hive_statistic", "p0,external,hive,external_docker,external_docker_hive") { + String enabled = context.config.otherConfigs.get("enableHiveTest") + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + logger.info("disable Hive test.") + return + } + + for (String hivePrefix : ["hive2", "hive3"]) { + String extHiveHmsHost = context.config.otherConfigs.get("externalEnvIp") + String extHiveHmsPort = context.config.otherConfigs.get(hivePrefix + "HmsPort") + String catalog_name = hivePrefix + "_test_hive_statistic" + sql """drop catalog if exists ${catalog_name};""" + sql """ + create catalog if not exists ${catalog_name} properties ( + 'type'='hms', + 'hadoop.username' = 'hadoop', + 'hive.metastore.uris' = 'thrift://${extHiveHmsHost}:${extHiveHmsPort}' + ); + """ + logger.info("catalog " + catalog_name + " created") + sql """switch ${catalog_name};""" + + // TODO will be supported in future + // Test analyze table without init. + // sql """analyze table ${catalog_name}.tpch_1000_parquet.region with sync""" + + // logger.info("switched to catalog " + catalog_name) + // sql """use statistics;""" + // sql """analyze table `statistics` with sync""" + // def result = sql """show column stats `statistics` (lo_quantity)""" + // assertEquals(result.size(), 1) + // assertEquals(result[0][0], "lo_quantity") + // assertEquals(result[0][2], "100.0") + // assertEquals(result[0][3], "46.0") + // assertEquals(result[0][4], "0.0") + // assertEquals(result[0][5], "400.0") + // assertEquals(result[0][6], "4.0") + // assertEquals(result[0][7], "1") + // assertEquals(result[0][8], "50") + + // result = sql """show column stats `statistics` (lo_orderkey)""" + // assertEquals(result.size(), 1) + // assertEquals(result[0][0], "lo_orderkey") + // assertEquals(result[0][2], "100.0") + // assertEquals(result[0][3], "26.0") + // assertEquals(result[0][4], "0.0") + // assertEquals(result[0][5], "400.0") + // assertEquals(result[0][6], "4.0") + // assertEquals(result[0][7], "1") + // assertEquals(result[0][8], "98") + + // result = sql """show column stats `statistics` (lo_linenumber)""" + // assertEquals(result.size(), 1) + // assertEquals(result[0][0], "lo_linenumber") + // assertEquals(result[0][2], "100.0") + // assertEquals(result[0][3], "7.0") + // assertEquals(result[0][4], "0.0") + // assertEquals(result[0][5], "400.0") + // assertEquals(result[0][6], "4.0") + // assertEquals(result[0][7], "1") + // assertEquals(result[0][8], "7") + + // result = sql """show column stats `statistics` (lo_custkey)""" + // assertEquals(result.size(), 1) + // assertEquals(result[0][0], "lo_custkey") + // assertEquals(result[0][2], "100.0") + // assertEquals(result[0][3], "26.0") + // assertEquals(result[0][4], "0.0") + // assertEquals(result[0][5], "400.0") + // assertEquals(result[0][6], "4.0") + // assertEquals(result[0][7], "67423") + // assertEquals(result[0][8], "2735521") + + // result = sql """show column stats `statistics` (lo_partkey)""" + // assertEquals(result.size(), 1) + // assertEquals(result[0][0], "lo_partkey") + // assertEquals(result[0][2], "100.0") + // assertEquals(result[0][3], "100.0") + // assertEquals(result[0][4], "0.0") + // assertEquals(result[0][5], "400.0") + // assertEquals(result[0][6], "4.0") + // assertEquals(result[0][7], "2250") + // assertEquals(result[0][8], "989601") + + // result = sql """show column stats `statistics` (lo_suppkey)""" + // assertEquals(result.size(), 1) + // assertEquals(result[0][0], "lo_suppkey") + // assertEquals(result[0][2], "100.0") + // assertEquals(result[0][3], "100.0") + // assertEquals(result[0][4], "0.0") + // assertEquals(result[0][5], "400.0") + // assertEquals(result[0][6], "4.0") + // assertEquals(result[0][7], "4167") + // assertEquals(result[0][8], "195845") + + // result = sql """show column stats `statistics` (lo_orderdate)""" + // assertEquals(result.size(), 1) + // assertEquals(result[0][0], "lo_orderdate") + // assertEquals(result[0][2], "100.0") + // assertEquals(result[0][3], "26.0") + // assertEquals(result[0][4], "0.0") + // assertEquals(result[0][5], "400.0") + // assertEquals(result[0][6], "4.0") + // assertEquals(result[0][7], "19920221") + // assertEquals(result[0][8], "19980721") + + // result = sql """show column stats `statistics` (lo_orderpriority)""" + // assertEquals(result.size(), 1) + // assertEquals(result[0][0], "lo_orderpriority") + // assertEquals(result[0][2], "100.0") + // assertEquals(result[0][3], "5.0") + // assertEquals(result[0][4], "0.0") + // assertEquals(result[0][5], "880.0") + // assertEquals(result[0][6], "8.8") + // assertEquals(result[0][7], "'1-URGENT'") + // assertEquals(result[0][8], "'5-LOW'") + + // result = sql """show column stats `statistics` (lo_shippriority)""" + // assertEquals(result.size(), 1) + // assertEquals(result[0][0], "lo_shippriority") + // assertEquals(result[0][2], "100.0") + // assertEquals(result[0][3], "1.0") + // assertEquals(result[0][4], "0.0") + // assertEquals(result[0][5], "400.0") + // assertEquals(result[0][6], "4.0") + // assertEquals(result[0][7], "0") + // assertEquals(result[0][8], "0") + + // result = sql """show column stats `statistics` (lo_extendedprice)""" + // assertEquals(result.size(), 1) + // assertEquals(result[0][0], "lo_extendedprice") + // assertEquals(result[0][2], "100.0") + // assertEquals(result[0][3], "100.0") + // assertEquals(result[0][4], "0.0") + // assertEquals(result[0][5], "400.0") + // assertEquals(result[0][6], "4.0") + // assertEquals(result[0][7], "104300") + // assertEquals(result[0][8], "9066094") + + // result = sql """show column stats `statistics` (lo_ordtotalprice)""" + // assertEquals(result.size(), 1) + // assertEquals(result[0][0], "lo_ordtotalprice") + // assertEquals(result[0][2], "100.0") + // assertEquals(result[0][3], "26.0") + // assertEquals(result[0][4], "0.0") + // assertEquals(result[0][5], "400.0") + // assertEquals(result[0][6], "4.0") + // assertEquals(result[0][7], "3428256") + // assertEquals(result[0][8], "36771805") + + // result = sql """show column stats `statistics` (lo_discount)""" + // assertEquals(result.size(), 1) + // assertEquals(result[0][0], "lo_discount") + // assertEquals(result[0][2], "100.0") + // assertEquals(result[0][3], "11.0") + // assertEquals(result[0][4], "0.0") + // assertEquals(result[0][5], "400.0") + // assertEquals(result[0][6], "4.0") + // assertEquals(result[0][7], "0") + // assertEquals(result[0][8], "10") + + // result = sql """show column stats `statistics` (lo_revenue)""" + // assertEquals(result.size(), 1) + // assertEquals(result[0][0], "lo_revenue") + // assertEquals(result[0][2], "100.0") + // assertEquals(result[0][3], "100.0") + // assertEquals(result[0][4], "0.0") + // assertEquals(result[0][5], "400.0") + // assertEquals(result[0][6], "4.0") + // assertEquals(result[0][7], "101171") + // assertEquals(result[0][8], "8703450") + + // result = sql """show column stats `statistics` (lo_supplycost)""" + // assertEquals(result.size(), 1) + // assertEquals(result[0][0], "lo_supplycost") + // assertEquals(result[0][2], "100.0") + // assertEquals(result[0][3], "100.0") + // assertEquals(result[0][4], "0.0") + // assertEquals(result[0][5], "400.0") + // assertEquals(result[0][6], "4.0") + // assertEquals(result[0][7], "58023") + // assertEquals(result[0][8], "121374") + + // result = sql """show column stats `statistics` (lo_tax)""" + // assertEquals(result.size(), 1) + // assertEquals(result[0][0], "lo_tax") + // assertEquals(result[0][2], "100.0") + // assertEquals(result[0][3], "9.0") + // assertEquals(result[0][4], "0.0") + // assertEquals(result[0][5], "400.0") + // assertEquals(result[0][6], "4.0") + // assertEquals(result[0][7], "0") + // assertEquals(result[0][8], "8") + + // result = sql """show column stats `statistics` (lo_commitdate)""" + // assertEquals(result.size(), 1) + // assertEquals(result[0][0], "lo_commitdate") + // assertEquals(result[0][2], "100.0") + // assertEquals(result[0][3], "95.0") + // assertEquals(result[0][4], "0.0") + // assertEquals(result[0][5], "400.0") + // assertEquals(result[0][6], "4.0") + // assertEquals(result[0][7], "19920515") + // assertEquals(result[0][8], "19981016") + + // result = sql """show column stats `statistics` (lo_shipmode)""" + // assertEquals(result.size(), 1) + // assertEquals(result[0][0], "lo_shipmode") + // assertEquals(result[0][2], "100.0") + // assertEquals(result[0][3], "7.0") + // assertEquals(result[0][4], "0.0") + // assertEquals(result[0][5], "421.0") + // assertEquals(result[0][6], "4.21") + // assertEquals(result[0][7], "'AIR'") + // assertEquals(result[0][8], "'TRUCK'") + + // sql """ALTER TABLE statistics MODIFY COLUMN lo_shipmode SET STATS ('row_count'='6001215')""" + // result = sql "show column stats `statistics` (lo_shipmode)" + // assertEquals(result.size(), 1) + // assertEquals(result[0][0], "lo_shipmode") + // assertEquals(result[0][2], "6001215.0") + + // sql """drop stats statistics""" + // result = sql """show column stats statistics""" + // assertEquals(result.size(), 0) + + // sql """analyze database `statistics` with sync""" + // result = sql """show table stats statistics""" + // assertEquals(result.size(), 1) + // assertEquals(result[0][2], "100") + + // result = sql """show table cached stats statistics""" + // assertEquals(result.size(), 1) + // assertEquals(result[0][2], "100") + + // sql """drop stats statistics""" + // result = sql """show column cached stats statistics""" + // assertEquals(result.size(), 0) + + sql """use multi_catalog""" + sql """analyze table logs1_parquet (log_time) with sync""" + def ctlId + def dbId + def tblId + result = sql """show catalogs""" + + for (int i = 0; i < result.size(); i++) { + if (result[i][1] == catalog_name) { + ctlId = result[i][0] + } + } + result = sql """show proc '/catalogs/$ctlId'""" + for (int i = 0; i < result.size(); i++) { + if (result[i][1] == 'multi_catalog') { + dbId = result[i][0] + } + } + result = sql """show proc '/catalogs/$ctlId/$dbId'""" + for (int i = 0; i < result.size(); i++) { + if (result[i][1] == 'logs1_parquet') { + tblId = result[i][0] + } + } + + result = sql """select * from internal.__internal_schema.column_statistics where id = '${tblId}--1-log_time'""" + assertEquals(result.size(), 1) + def id = result[0][0] + def catalog_id = result[0][1] + def db_id = result[0][2] + def tbl_id = result[0][3] + def idx_id = result[0][4] + def col_id = result[0][5] + def count = result[0][7] + def ndv = result[0][8] + def null_count = result[0][9] + def data_size_in_bytes = result[0][12] + def update_time = result[0][13] + + sql """insert into internal.__internal_schema.column_statistics values ('$id', '$catalog_id', '$db_id', '$tbl_id', '$idx_id', '$col_id', NULL, $count, $ndv, $null_count, '', '', '$data_size_in_bytes', '$update_time')""" + + result = sql """show column stats logs1_parquet (log_time)""" + assertEquals(result.size(), 1) + assertEquals(result[0][7], "N/A") + assertEquals(result[0][8], "N/A") + + sql """use tpch1_parquet;""" + sql """drop stats region""" + sql """alter table region modify column r_comment set stats ('row_count'='5.0', 'ndv'='5.0', 'num_nulls'='0.0', 'data_size'='330.0', 'min_value'='ges. thinly even pinto beans ca', 'max_value'='uickly special accounts cajole carefully blithely close requests. carefully final asymptotes haggle furiousl');""" + sql """alter table region modify column r_name set stats ('row_count'='5.0', 'ndv'='5.0', 'num_nulls'='0.0', 'data_size'='34.0', 'min_value'='AFRICA', 'max_value'='MIDDLE EAST');""" + sql """alter table region modify column r_regionkey set stats ('row_count'='5.0', 'ndv'='5.0', 'num_nulls'='0.0', 'data_size'='20.0', 'min_value'='0', 'max_value'='4');""" + result = sql """show column stats region(r_regionkey)""" + assertEquals(result.size(), 1) + assertEquals(result[0][0], "r_regionkey") + assertEquals(result[0][2], "5.0") + assertEquals(result[0][3], "5.0") + assertEquals(result[0][4], "0.0") + assertEquals(result[0][5], "20.0") + assertEquals(result[0][6], "4.0") + assertEquals(result[0][7], "0") + assertEquals(result[0][8], "4") + + result = sql """show column stats region(r_comment)""" + assertEquals(result.size(), 1) + assertEquals(result[0][0], "r_comment") + assertEquals(result[0][2], "5.0") + assertEquals(result[0][3], "5.0") + assertEquals(result[0][4], "0.0") + assertEquals(result[0][5], "330.0") + assertEquals(result[0][6], "66.0") + assertEquals(result[0][7], "\'ges. thinly even pinto beans ca\'") + assertEquals(result[0][8], "\'uickly special accounts cajole carefully blithely close requests. carefully final asymptotes haggle furiousl\'") + + result = sql """show column stats region(r_name)""" + assertEquals(result.size(), 1) + assertEquals(result[0][0], "r_name") + assertEquals(result[0][2], "5.0") + assertEquals(result[0][3], "5.0") + assertEquals(result[0][4], "0.0") + assertEquals(result[0][5], "34.0") + assertEquals(result[0][6], "6.8") + assertEquals(result[0][7], "\'AFRICA\'") + assertEquals(result[0][8], "\'MIDDLE EAST\'") + + sql """drop catalog ${catalog_name}""" + } +} + diff --git a/regression-test/suites/external_table_p2/hive/test_hive_statistic_timeout.groovy b/regression-test/suites/external_table_p0/hive/test_hive_statistic_timeout.groovy similarity index 76% rename from regression-test/suites/external_table_p2/hive/test_hive_statistic_timeout.groovy rename to regression-test/suites/external_table_p0/hive/test_hive_statistic_timeout.groovy index a52a2304788..d1ae297f3d5 100644 --- a/regression-test/suites/external_table_p2/hive/test_hive_statistic_timeout.groovy +++ b/regression-test/suites/external_table_p0/hive/test_hive_statistic_timeout.groovy @@ -15,12 +15,21 @@ // specific language governing permissions and limitations // under the License. -suite("test_hive_statistic_timeout", "p2,external,hive,external_remote,external_remote_hive, nonConcurrent") { - String enabled = context.config.otherConfigs.get("enableExternalHiveTest") - if (enabled != null && enabled.equalsIgnoreCase("true")) { - String extHiveHmsHost = context.config.otherConfigs.get("extHiveHmsHost") - String extHiveHmsPort = context.config.otherConfigs.get("extHiveHmsPort") - String catalog_name = "test_hive_statistic_timeout" +suite("test_hive_statistic_timeout", "p0,external,hive,external_docker,external_docker_hive") { + String enabled = context.config.otherConfigs.get("enableHiveTest") + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + logger.info("disable Hive test.") + return + } + + for (String hivePrefix : ["hive2", "hive3"]) { + String extHiveHmsHost = context.config.otherConfigs.get("externalEnvIp") + String extHiveHmsPort = context.config.otherConfigs.get(hivePrefix + "HmsPort") + String catalog_name = hivePrefix + "_test_hive_statistic_timeout" + + // TODO tpch will be supported in future + return + sql """drop catalog if exists ${catalog_name};""" sql """ create catalog if not exists ${catalog_name} properties ( @@ -33,6 +42,7 @@ suite("test_hive_statistic_timeout", "p2,external,hive,external_remote,external_ sql """use ${catalog_name}.tpch_1000_parquet""" sql """set global analyze_timeout=1""" + try { test { sql """analyze table part (p_partkey, p_container, p_type, p_retailprice) with sync with full;""" @@ -41,6 +51,7 @@ suite("test_hive_statistic_timeout", "p2,external,hive,external_remote,external_ } finally { sql """set global analyze_timeout=43200""" } + sql """drop catalog ${catalog_name}"""; } } diff --git a/regression-test/suites/external_table_p2/hive/test_hive_to_array.groovy b/regression-test/suites/external_table_p0/hive/test_hive_to_array.groovy similarity index 79% rename from regression-test/suites/external_table_p2/hive/test_hive_to_array.groovy rename to regression-test/suites/external_table_p0/hive/test_hive_to_array.groovy index b03a83a158e..058afc51430 100644 --- a/regression-test/suites/external_table_p2/hive/test_hive_to_array.groovy +++ b/regression-test/suites/external_table_p0/hive/test_hive_to_array.groovy @@ -15,12 +15,17 @@ // specific language governing permissions and limitations // under the License. -suite("test_hive_to_array", "p2,external,hive,external_remote,external_remote_hive") { - String enabled = context.config.otherConfigs.get("enableExternalHiveTest") - if (enabled != null && enabled.equalsIgnoreCase("true")) { - String extHiveHmsHost = context.config.otherConfigs.get("extHiveHmsHost") - String extHiveHmsPort = context.config.otherConfigs.get("extHiveHmsPort") - String catalog_name = "test_hive_to_array" +suite("test_hive_to_array", "p0,external,hive,external_docker,external_docker_hive") { + String enabled = context.config.otherConfigs.get("enableHiveTest") + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + logger.info("disable Hive test.") + return + } + + for (String hivePrefix : ["hive2", "hive3"]) { + String extHiveHmsHost = context.config.otherConfigs.get("externalEnvIp") + String extHiveHmsPort = context.config.otherConfigs.get(hivePrefix + "HmsPort") + String catalog_name = hivePrefix + "_test_hive_to_array" sql """drop catalog if exists ${catalog_name};""" sql """ create catalog if not exists ${catalog_name} properties ( diff --git a/regression-test/suites/external_table_p0/hive/test_text_garbled_file.groovy b/regression-test/suites/external_table_p0/hive/test_text_garbled_file.groovy new file mode 100644 index 00000000000..27e171bab8c --- /dev/null +++ b/regression-test/suites/external_table_p0/hive/test_text_garbled_file.groovy @@ -0,0 +1,47 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_text_garbled_file", "p0,external,hive,external_docker,external_docker_hive") { + //test hive garbled files , prevent be hanged + + String enabled = context.config.otherConfigs.get("enableHiveTest") + if (enabled != null && enabled.equalsIgnoreCase("true")) { + + for (String hivePrefix : ["hive2", "hive3"]) { + String extHiveHmsHost = context.config.otherConfigs.get("externalEnvIp") + String extHiveHmsPort = context.config.otherConfigs.get(hivePrefix + "HmsPort") + String catalog_name = hivePrefix + "_test_text_garbled_file" + sql """drop catalog if exists ${catalog_name};""" + sql """ + create catalog if not exists ${catalog_name} properties ( + 'type'='hms', + 'hadoop.username' = 'hadoop', + 'hive.metastore.uris' = 'thrift://${extHiveHmsHost}:${extHiveHmsPort}' + ); + """ + logger.info("catalog " + catalog_name + " created") + sql """switch ${catalog_name};""" + logger.info("switched to catalog " + catalog_name) + + + order_qt_garbled_file """ + select * from ${catalog_name}.multi_catalog.test_csv_format_error; + """ + } + } +} + diff --git a/regression-test/suites/external_table_p2/hive/test_hive_statistic.groovy b/regression-test/suites/external_table_p2/hive/test_hive_statistic.groovy deleted file mode 100644 index 4fcd70fe732..00000000000 --- a/regression-test/suites/external_table_p2/hive/test_hive_statistic.groovy +++ /dev/null @@ -1,338 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -suite("test_hive_statistic", "p2,external,hive,external_remote,external_remote_hive") { - String enabled = context.config.otherConfigs.get("enableExternalHiveTest") - if (enabled != null && enabled.equalsIgnoreCase("true")) { - String extHiveHmsHost = context.config.otherConfigs.get("extHiveHmsHost") - String extHiveHmsPort = context.config.otherConfigs.get("extHiveHmsPort") - String catalog_name = "test_hive_statistic" - sql """drop catalog if exists ${catalog_name};""" - sql """ - create catalog if not exists ${catalog_name} properties ( - 'type'='hms', - 'hadoop.username' = 'hadoop', - 'hive.metastore.uris' = 'thrift://${extHiveHmsHost}:${extHiveHmsPort}' - ); - """ - logger.info("catalog " + catalog_name + " created") - - // Test analyze table without init. - sql """analyze table ${catalog_name}.tpch_1000_parquet.region with sync""" - - sql """switch ${catalog_name};""" - logger.info("switched to catalog " + catalog_name) - sql """use statistics;""" - sql """analyze table `statistics` with sync""" - def result = sql """show column stats `statistics` (lo_quantity)""" - assertEquals(result.size(), 1) - assertEquals(result[0][0], "lo_quantity") - assertEquals(result[0][2], "100.0") - assertEquals(result[0][3], "46.0") - assertEquals(result[0][4], "0.0") - assertEquals(result[0][5], "400.0") - assertEquals(result[0][6], "4.0") - assertEquals(result[0][7], "1") - assertEquals(result[0][8], "50") - - result = sql """show column stats `statistics` (lo_orderkey)""" - assertEquals(result.size(), 1) - assertEquals(result[0][0], "lo_orderkey") - assertEquals(result[0][2], "100.0") - assertEquals(result[0][3], "26.0") - assertEquals(result[0][4], "0.0") - assertEquals(result[0][5], "400.0") - assertEquals(result[0][6], "4.0") - assertEquals(result[0][7], "1") - assertEquals(result[0][8], "98") - - result = sql """show column stats `statistics` (lo_linenumber)""" - assertEquals(result.size(), 1) - assertEquals(result[0][0], "lo_linenumber") - assertEquals(result[0][2], "100.0") - assertEquals(result[0][3], "7.0") - assertEquals(result[0][4], "0.0") - assertEquals(result[0][5], "400.0") - assertEquals(result[0][6], "4.0") - assertEquals(result[0][7], "1") - assertEquals(result[0][8], "7") - - result = sql """show column stats `statistics` (lo_custkey)""" - assertEquals(result.size(), 1) - assertEquals(result[0][0], "lo_custkey") - assertEquals(result[0][2], "100.0") - assertEquals(result[0][3], "26.0") - assertEquals(result[0][4], "0.0") - assertEquals(result[0][5], "400.0") - assertEquals(result[0][6], "4.0") - assertEquals(result[0][7], "67423") - assertEquals(result[0][8], "2735521") - - result = sql """show column stats `statistics` (lo_partkey)""" - assertEquals(result.size(), 1) - assertEquals(result[0][0], "lo_partkey") - assertEquals(result[0][2], "100.0") - assertEquals(result[0][3], "100.0") - assertEquals(result[0][4], "0.0") - assertEquals(result[0][5], "400.0") - assertEquals(result[0][6], "4.0") - assertEquals(result[0][7], "2250") - assertEquals(result[0][8], "989601") - - result = sql """show column stats `statistics` (lo_suppkey)""" - assertEquals(result.size(), 1) - assertEquals(result[0][0], "lo_suppkey") - assertEquals(result[0][2], "100.0") - assertEquals(result[0][3], "100.0") - assertEquals(result[0][4], "0.0") - assertEquals(result[0][5], "400.0") - assertEquals(result[0][6], "4.0") - assertEquals(result[0][7], "4167") - assertEquals(result[0][8], "195845") - - result = sql """show column stats `statistics` (lo_orderdate)""" - assertEquals(result.size(), 1) - assertEquals(result[0][0], "lo_orderdate") - assertEquals(result[0][2], "100.0") - assertEquals(result[0][3], "26.0") - assertEquals(result[0][4], "0.0") - assertEquals(result[0][5], "400.0") - assertEquals(result[0][6], "4.0") - assertEquals(result[0][7], "19920221") - assertEquals(result[0][8], "19980721") - - result = sql """show column stats `statistics` (lo_orderpriority)""" - assertEquals(result.size(), 1) - assertEquals(result[0][0], "lo_orderpriority") - assertEquals(result[0][2], "100.0") - assertEquals(result[0][3], "5.0") - assertEquals(result[0][4], "0.0") - assertEquals(result[0][5], "880.0") - assertEquals(result[0][6], "8.8") - assertEquals(result[0][7], "'1-URGENT'") - assertEquals(result[0][8], "'5-LOW'") - - result = sql """show column stats `statistics` (lo_shippriority)""" - assertEquals(result.size(), 1) - assertEquals(result[0][0], "lo_shippriority") - assertEquals(result[0][2], "100.0") - assertEquals(result[0][3], "1.0") - assertEquals(result[0][4], "0.0") - assertEquals(result[0][5], "400.0") - assertEquals(result[0][6], "4.0") - assertEquals(result[0][7], "0") - assertEquals(result[0][8], "0") - - result = sql """show column stats `statistics` (lo_extendedprice)""" - assertEquals(result.size(), 1) - assertEquals(result[0][0], "lo_extendedprice") - assertEquals(result[0][2], "100.0") - assertEquals(result[0][3], "100.0") - assertEquals(result[0][4], "0.0") - assertEquals(result[0][5], "400.0") - assertEquals(result[0][6], "4.0") - assertEquals(result[0][7], "104300") - assertEquals(result[0][8], "9066094") - - result = sql """show column stats `statistics` (lo_ordtotalprice)""" - assertEquals(result.size(), 1) - assertEquals(result[0][0], "lo_ordtotalprice") - assertEquals(result[0][2], "100.0") - assertEquals(result[0][3], "26.0") - assertEquals(result[0][4], "0.0") - assertEquals(result[0][5], "400.0") - assertEquals(result[0][6], "4.0") - assertEquals(result[0][7], "3428256") - assertEquals(result[0][8], "36771805") - - result = sql """show column stats `statistics` (lo_discount)""" - assertEquals(result.size(), 1) - assertEquals(result[0][0], "lo_discount") - assertEquals(result[0][2], "100.0") - assertEquals(result[0][3], "11.0") - assertEquals(result[0][4], "0.0") - assertEquals(result[0][5], "400.0") - assertEquals(result[0][6], "4.0") - assertEquals(result[0][7], "0") - assertEquals(result[0][8], "10") - - result = sql """show column stats `statistics` (lo_revenue)""" - assertEquals(result.size(), 1) - assertEquals(result[0][0], "lo_revenue") - assertEquals(result[0][2], "100.0") - assertEquals(result[0][3], "100.0") - assertEquals(result[0][4], "0.0") - assertEquals(result[0][5], "400.0") - assertEquals(result[0][6], "4.0") - assertEquals(result[0][7], "101171") - assertEquals(result[0][8], "8703450") - - result = sql """show column stats `statistics` (lo_supplycost)""" - assertEquals(result.size(), 1) - assertEquals(result[0][0], "lo_supplycost") - assertEquals(result[0][2], "100.0") - assertEquals(result[0][3], "100.0") - assertEquals(result[0][4], "0.0") - assertEquals(result[0][5], "400.0") - assertEquals(result[0][6], "4.0") - assertEquals(result[0][7], "58023") - assertEquals(result[0][8], "121374") - - result = sql """show column stats `statistics` (lo_tax)""" - assertEquals(result.size(), 1) - assertEquals(result[0][0], "lo_tax") - assertEquals(result[0][2], "100.0") - assertEquals(result[0][3], "9.0") - assertEquals(result[0][4], "0.0") - assertEquals(result[0][5], "400.0") - assertEquals(result[0][6], "4.0") - assertEquals(result[0][7], "0") - assertEquals(result[0][8], "8") - - result = sql """show column stats `statistics` (lo_commitdate)""" - assertEquals(result.size(), 1) - assertEquals(result[0][0], "lo_commitdate") - assertEquals(result[0][2], "100.0") - assertEquals(result[0][3], "95.0") - assertEquals(result[0][4], "0.0") - assertEquals(result[0][5], "400.0") - assertEquals(result[0][6], "4.0") - assertEquals(result[0][7], "19920515") - assertEquals(result[0][8], "19981016") - - result = sql """show column stats `statistics` (lo_shipmode)""" - assertEquals(result.size(), 1) - assertEquals(result[0][0], "lo_shipmode") - assertEquals(result[0][2], "100.0") - assertEquals(result[0][3], "7.0") - assertEquals(result[0][4], "0.0") - assertEquals(result[0][5], "421.0") - assertEquals(result[0][6], "4.21") - assertEquals(result[0][7], "'AIR'") - assertEquals(result[0][8], "'TRUCK'") - - sql """ALTER TABLE statistics MODIFY COLUMN lo_shipmode SET STATS ('row_count'='6001215')""" - result = sql "show column stats `statistics` (lo_shipmode)" - assertEquals(result.size(), 1) - assertEquals(result[0][0], "lo_shipmode") - assertEquals(result[0][2], "6001215.0") - - sql """drop stats statistics""" - result = sql """show column stats statistics""" - assertEquals(result.size(), 0) - - sql """analyze database `statistics` with sync""" - result = sql """show table stats statistics""" - assertEquals(result.size(), 1) - assertEquals(result[0][2], "100") - - result = sql """show table cached stats statistics""" - assertEquals(result.size(), 1) - assertEquals(result[0][2], "100") - - sql """drop stats statistics""" - result = sql """show column cached stats statistics""" - assertEquals(result.size(), 0) - - sql """use multi_catalog""" - sql """analyze table logs1_parquet (log_time) with sync""" - def ctlId - def dbId - def tblId - result = sql """show catalogs""" - - for (int i = 0; i < result.size(); i++) { - if (result[i][1] == catalog_name) { - ctlId = result[i][0] - } - } - result = sql """show proc '/catalogs/$ctlId'""" - for (int i = 0; i < result.size(); i++) { - if (result[i][1] == 'multi_catalog') { - dbId = result[i][0] - } - } - result = sql """show proc '/catalogs/$ctlId/$dbId'""" - for (int i = 0; i < result.size(); i++) { - if (result[i][1] == 'logs1_parquet') { - tblId = result[i][0] - } - } - - result = sql """select * from internal.__internal_schema.column_statistics where id = '${tblId}--1-log_time'""" - assertEquals(result.size(), 1) - def id = result[0][0] - def catalog_id = result[0][1] - def db_id = result[0][2] - def tbl_id = result[0][3] - def idx_id = result[0][4] - def col_id = result[0][5] - def count = result[0][7] - def ndv = result[0][8] - def null_count = result[0][9] - def data_size_in_bytes = result[0][12] - def update_time = result[0][13] - - sql """insert into internal.__internal_schema.column_statistics values ('$id', '$catalog_id', '$db_id', '$tbl_id', '$idx_id', '$col_id', NULL, $count, $ndv, $null_count, '', '', '$data_size_in_bytes', '$update_time')""" - - result = sql """show column stats logs1_parquet (log_time)""" - assertEquals(result.size(), 1) - assertEquals(result[0][7], "N/A") - assertEquals(result[0][8], "N/A") - - sql """use tpch1_parquet;""" - sql """drop stats region""" - sql """alter table region modify column r_comment set stats ('row_count'='5.0', 'ndv'='5.0', 'num_nulls'='0.0', 'data_size'='330.0', 'min_value'='ges. thinly even pinto beans ca', 'max_value'='uickly special accounts cajole carefully blithely close requests. carefully final asymptotes haggle furiousl');""" - sql """alter table region modify column r_name set stats ('row_count'='5.0', 'ndv'='5.0', 'num_nulls'='0.0', 'data_size'='34.0', 'min_value'='AFRICA', 'max_value'='MIDDLE EAST');""" - sql """alter table region modify column r_regionkey set stats ('row_count'='5.0', 'ndv'='5.0', 'num_nulls'='0.0', 'data_size'='20.0', 'min_value'='0', 'max_value'='4');""" - result = sql """show column stats region(r_regionkey)""" - assertEquals(result.size(), 1) - assertEquals(result[0][0], "r_regionkey") - assertEquals(result[0][2], "5.0") - assertEquals(result[0][3], "5.0") - assertEquals(result[0][4], "0.0") - assertEquals(result[0][5], "20.0") - assertEquals(result[0][6], "4.0") - assertEquals(result[0][7], "0") - assertEquals(result[0][8], "4") - - result = sql """show column stats region(r_comment)""" - assertEquals(result.size(), 1) - assertEquals(result[0][0], "r_comment") - assertEquals(result[0][2], "5.0") - assertEquals(result[0][3], "5.0") - assertEquals(result[0][4], "0.0") - assertEquals(result[0][5], "330.0") - assertEquals(result[0][6], "66.0") - assertEquals(result[0][7], "\'ges. thinly even pinto beans ca\'") - assertEquals(result[0][8], "\'uickly special accounts cajole carefully blithely close requests. carefully final asymptotes haggle furiousl\'") - - result = sql """show column stats region(r_name)""" - assertEquals(result.size(), 1) - assertEquals(result[0][0], "r_name") - assertEquals(result[0][2], "5.0") - assertEquals(result[0][3], "5.0") - assertEquals(result[0][4], "0.0") - assertEquals(result[0][5], "34.0") - assertEquals(result[0][6], "6.8") - assertEquals(result[0][7], "\'AFRICA\'") - assertEquals(result[0][8], "\'MIDDLE EAST\'") - - sql """drop catalog ${catalog_name}""" - } -} - diff --git a/regression-test/suites/external_table_p2/hive/test_text_garbled_file.groovy b/regression-test/suites/external_table_p2/hive/test_text_garbled_file.groovy deleted file mode 100644 index a3ea6a3bcc2..00000000000 --- a/regression-test/suites/external_table_p2/hive/test_text_garbled_file.groovy +++ /dev/null @@ -1,46 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -suite("test_text_garbled_file", "p2,external,hive,external_remote,external_remote_hive") { - //test hive garbled files , prevent be hanged - - String enabled = context.config.otherConfigs.get("enableExternalHiveTest") - if (enabled != null && enabled.equalsIgnoreCase("true")) { - String extHiveHmsHost = context.config.otherConfigs.get("extHiveHmsHost") - String extHiveHmsPort = context.config.otherConfigs.get("extHiveHmsPort") - String catalog_name = "test_text_garbled_file" - sql """drop catalog if exists ${catalog_name};""" - sql """ - create catalog if not exists ${catalog_name} properties ( - 'type'='hms', - 'hadoop.username' = 'hadoop', - 'hive.metastore.uris' = 'thrift://${extHiveHmsHost}:${extHiveHmsPort}' - ); - """ - logger.info("catalog " + catalog_name + " created") - sql """switch ${catalog_name};""" - logger.info("switched to catalog " + catalog_name) - - - order_qt_garbled_file """ - select * from ${catalog_name}.multi_catalog.test_csv_format_error; - """ - - - } -} - --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org