Jibing-Li commented on code in PR #17372: URL: https://github.com/apache/doris/pull/17372#discussion_r1142837327
########## docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql: ########## @@ -31,6 +31,289 @@ TBLPROPERTIES ( msck repair table partition_table; + +CREATE TABLE `delta_byte_array`( + `c_salutation` string, + `c_first_name` string, + `c_last_name` string, + `c_preferred_cust_flag` string, + `c_birth_country` string, + `c_login` string, + `c_email_address` string, + `c_last_review_date` string, + `c_customer_id` string + ) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION + '/user/doris/preinstalled_data/different_types_parquet/delta_byte_array' +TBLPROPERTIES ( + 'transient_lastDdlTime'='1661955829'); + + +CREATE TABLE `delta_length_byte_array`( + `FRUIT` string + ) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION + '/user/doris/preinstalled_data/different_types_parquet/delta_length_byte_array' +TBLPROPERTIES ( + 'transient_lastDdlTime'='1661955829'); + +msck repair table delta_length_byte_array; + +CREATE EXTERNAL TABLE `delta_binary_packed`( + bitwidth0 bigint, + bitwidth1 bigint, + bitwidth2 bigint, + bitwidth3 bigint, + bitwidth4 bigint, + bitwidth5 bigint, + bitwidth6 bigint, + bitwidth7 bigint, + bitwidth8 bigint, + bitwidth9 bigint, + bitwidth10 bigint, + bitwidth11 bigint, + bitwidth12 bigint, + bitwidth13 bigint, + bitwidth14 bigint, + bitwidth15 bigint, + bitwidth16 bigint, + bitwidth17 bigint, + bitwidth18 bigint, + bitwidth19 bigint, + bitwidth20 bigint, + bitwidth21 bigint, + bitwidth22 bigint, + bitwidth23 bigint, + bitwidth24 bigint, + bitwidth25 bigint, + bitwidth26 bigint, + bitwidth27 bigint, + bitwidth28 bigint, + bitwidth29 bigint, + bitwidth30 bigint, + bitwidth31 bigint, + bitwidth32 bigint, + bitwidth33 bigint, + bitwidth34 bigint, + bitwidth35 bigint, + bitwidth36 bigint, + bitwidth37 bigint, + bitwidth38 bigint, + bitwidth39 bigint, + bitwidth40 bigint, + bitwidth41 bigint, + bitwidth42 bigint, + bitwidth43 bigint, + bitwidth44 bigint, + bitwidth45 bigint, + bitwidth46 bigint, + bitwidth47 bigint, + bitwidth48 bigint, + bitwidth49 bigint, + bitwidth50 bigint, + bitwidth51 bigint, + bitwidth52 bigint, + bitwidth53 bigint, + bitwidth54 bigint, + bitwidth55 bigint, + bitwidth56 bigint, + bitwidth57 bigint, + bitwidth58 bigint, + bitwidth59 bigint, + bitwidth60 bigint, + bitwidth61 bigint, + bitwidth62 bigint, + bitwidth63 bigint, + bitwidth64 bigint, + int_value int + ) +LOCATION + '/user/doris/preinstalled_data/different_types_parquet/delta_binary_packed' Review Comment: Need to specify the file format, for example, `stored as parquet`. Otherwise, hive will try to decode it as text format. ########## regression-test/suites/external_catalog_p0/hive/test_different_parquet_types.groovy: ########## @@ -0,0 +1,227 @@ +package suites.external_catalog_p0.hive +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_different_parquet_types", "p0") { + + String hms_port = context.config.otherConfigs.get("hms_port") + String hdfs_port = context.config.otherConfigs.get("hdfs_port") + + // problem 01 :in hive execute "select * from delta_byte_array limit 10" ,there will be some valid data return,but doris query return nothing + def q01 = { + def res1_1 = sql """ + select * from delta_byte_array limit 10 + """ + logger.info("record res" + res1_1.toString()) + + def res1_2 = sql """ + select count(*) from delta_byte_array + """ + logger.info("record res" + res1_2.toString()) + + def res1_3 = sql """ + select * from hdfs(\"uri" = \"hdfs://127.0.0.1:${hdfs_port}/user/doris/preinstalled_data/different_types_parquet/delta_byte_array/delta_byte_array.parquet\",\"fs.defaultFS\" = \"hdfs://127.0.0.1:${hdfs_port}\",\"format\" = \"parquet\") limit 10 + """ + logger.info("record res" + res1_3.toString()) + } + + // this case is invalid,ignore + // def q02 = { + // def res2_1 = sql """ + // select * from delta_length_byte_array limit 10; + // """ + // logger.info("record res" + res2_1.toString()) + + // def res2_2 = sql """ + // select count(*) from delta_length_byte_array; + // """ + // logger.info("record res" + res2_2.toString()) + + // def res2_3 = sql """ + // select * from hdfs(\"uri" = \"hdfs://127.0.0.1:${hdfs_port}/user/doris/preinstalled_data/different_types_parquet/delta_length_byte_array/delta_length_byte_array.parquet\",\"fs.defaultFS\" = \"hdfs://127.0.0.1:${hdfs_port}\",\"format\" = \"parquet\") limit 10 + // """ + // logger.info("record res" + res2_3.toString()) + // } + + + // problem 2: hive query return null, doris catalog query return exception, use tvf to query return null, but no exception + + def q03 = { + + //exception info: [INTERNAL_ERROR]Only support csv data in utf8 codec + def res3_1 = sql """ + select * from delta_binary_packed limit 10; + """ + logger.info("record res" + res3_1.toString()) + + def res3_2 = sql """ + select count(*) from delta_binary_packed; + """ + logger.info("record res" + res3_1.toString()) + + //return nothing,but no exception + def res3_3 = sql """ + select * from hdfs(\"uri" = \"hdfs://127.0.0.1:${hdfs_port}/user/doris/preinstalled_data/different_types_parquet/delta_binary_packed/delta_binary_packed.parquet\",\"fs.defaultFS\" = \"hdfs://127.0.0.1:${hdfs_port}\",\"format\" = \"parquet\") limit 10 + """ + logger.info("record res" + res3_3.toString()) + } + + //problem 3: hive query exception, doris query return nothing Review Comment: Rebase your code with latest master, this problem has been solved. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org