This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push: new 395e882edf1 [fix](Tvf) return empty set when tvf queries an empty file or an error uri (#25280) (#31110) 395e882edf1 is described below commit 395e882edf159c24ff4157e8cc37856d1e9ffcc2 Author: zxealous <zhouchang...@baidu.com> AuthorDate: Mon Feb 19 17:41:31 2024 +0800 [fix](Tvf) return empty set when tvf queries an empty file or an error uri (#25280) (#31110) return errors when tvf queries an empty file or an error uri: 1. get parsed schema failed, empty csv file 2. Can not get first file, please check uri. we just return empty set when tvf queries an empty file or an error uri. ```sql mysql> select * from s3( "uri" = "https://error_uri/exp_1.csv", "s3.access_key"= "xx", "s3.secret_key" = "yy", "format" = "csv") limit 10; Empty set (1.29 sec) ``` Co-authored-by: Tiewei Fang <43782773+bepppo...@users.noreply.github.com> --- .../ExternalFileTableValuedFunction.java | 56 ++++++++++-------- .../tvf/test_hdfs_tvf_error_uri.out | 6 ++ .../data/load_p0/tvf/test_tvf_empty_file.out | 17 ++++++ .../data/load_p0/tvf/test_tvf_error_url.out | 11 ++++ .../tvf/test_hdfs_tvf_error_uri.groovy | 43 ++++++++++++++ .../suites/load_p0/tvf/test_tvf_empty_file.groovy | 69 ++++++++++++++++++++++ .../suites/load_p0/tvf/test_tvf_error_url.groovy | 61 +++++++++++++++++++ 7 files changed, 240 insertions(+), 23 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java index 1e89fd41a5d..eabe82804c3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java +++ b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java @@ -37,7 +37,6 @@ import org.apache.doris.common.UserException; import org.apache.doris.common.util.BrokerUtil; import org.apache.doris.common.util.FileFormatConstants; import org.apache.doris.common.util.FileFormatUtils; -import org.apache.doris.common.util.NetUtils; import org.apache.doris.common.util.Util; import org.apache.doris.planner.PlanNodeId; import org.apache.doris.planner.ScanNode; @@ -316,23 +315,27 @@ public abstract class ExternalFileTableValuedFunction extends TableValuedFunctio TNetworkAddress address = new TNetworkAddress(be.getHost(), be.getBrpcPort()); try { PFetchTableSchemaRequest request = getFetchTableStructureRequest(); - Future<InternalService.PFetchTableSchemaResult> future = BackendServiceProxy.getInstance() - .fetchTableStructureAsync(address, request); - - InternalService.PFetchTableSchemaResult result = future.get(); - TStatusCode code = TStatusCode.findByValue(result.getStatus().getStatusCode()); - String errMsg; - if (code != TStatusCode.OK) { - if (!result.getStatus().getErrorMsgsList().isEmpty()) { - errMsg = result.getStatus().getErrorMsgsList().get(0); - } else { - errMsg = "fetchTableStructureAsync failed. backend address: " - + NetUtils - .getHostPortInAccessibleFormat(address.getHostname(), address.getPort()); + InternalService.PFetchTableSchemaResult result = null; + + // `request == null` means we don't need to get schemas from BE, + // and we fill a dummy col for this table. + if (request != null) { + Future<InternalService.PFetchTableSchemaResult> future = BackendServiceProxy.getInstance() + .fetchTableStructureAsync(address, request); + + result = future.get(); + TStatusCode code = TStatusCode.findByValue(result.getStatus().getStatusCode()); + String errMsg; + if (code != TStatusCode.OK) { + if (!result.getStatus().getErrorMsgsList().isEmpty()) { + errMsg = result.getStatus().getErrorMsgsList().get(0); + } else { + errMsg = "fetchTableStructureAsync failed. backend address: " + + address.getHostname() + ":" + address.getPort(); + } + throw new AnalysisException(errMsg); } - throw new AnalysisException(errMsg); } - fillColumns(result); } catch (RpcException e) { throw new AnalysisException("fetchTableStructureResult rpc exception", e); @@ -393,10 +396,12 @@ public abstract class ExternalFileTableValuedFunction extends TableValuedFunctio return Pair.of(type, parsedNodes); } - private void fillColumns(InternalService.PFetchTableSchemaResult result) - throws AnalysisException { - if (result.getColumnNums() == 0) { - throw new AnalysisException("The amount of column is 0"); + private void fillColumns(InternalService.PFetchTableSchemaResult result) { + // `result == null` means we don't need to get schemas from BE, + // and we fill a dummy col for this table. + if (result == null) { + columns.add(new Column("__dummy_col", ScalarType.createStringType(), true)); + return; } // add fetched file columns for (int idx = 0; idx < result.getColumnNums(); ++idx) { @@ -412,7 +417,7 @@ public abstract class ExternalFileTableValuedFunction extends TableValuedFunctio } } - private PFetchTableSchemaRequest getFetchTableStructureRequest() throws AnalysisException, TException { + private PFetchTableSchemaRequest getFetchTableStructureRequest() throws TException { // set TFileScanRangeParams TFileScanRangeParams fileScanRangeParams = new TFileScanRangeParams(); fileScanRangeParams.setFormatType(fileFormatType); @@ -429,14 +434,19 @@ public abstract class ExternalFileTableValuedFunction extends TableValuedFunctio // get first file, used to parse table schema TBrokerFileStatus firstFile = null; for (TBrokerFileStatus fileStatus : fileStatuses) { - if (fileStatus.isIsDir()) { + if (fileStatus.isIsDir() || fileStatus.size == 0) { continue; } firstFile = fileStatus; break; } + + // `firstFile == null` means: + // 1. No matching file path exists + // 2. All matched files have a size of 0 + // For these two situations, we don't need to get schema from BE if (firstFile == null) { - throw new AnalysisException("Can not get first file, please check uri."); + return null; } // set TFileRangeDesc diff --git a/regression-test/data/external_table_p0/tvf/test_hdfs_tvf_error_uri.out b/regression-test/data/external_table_p0/tvf/test_hdfs_tvf_error_uri.out new file mode 100644 index 00000000000..115f42f2a0e --- /dev/null +++ b/regression-test/data/external_table_p0/tvf/test_hdfs_tvf_error_uri.out @@ -0,0 +1,6 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select1 -- + +-- !desc1 -- +__dummy_col TEXT Yes false \N NONE + diff --git a/regression-test/data/load_p0/tvf/test_tvf_empty_file.out b/regression-test/data/load_p0/tvf/test_tvf_empty_file.out new file mode 100644 index 00000000000..59822770e2c --- /dev/null +++ b/regression-test/data/load_p0/tvf/test_tvf_empty_file.out @@ -0,0 +1,17 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select -- + +-- !desc -- +__dummy_col TEXT Yes false \N NONE + +-- !select2 -- +1 doris 18 +2 nereids 20 +3 xxx 22 +4 yyy 21 + +-- !des2 -- +c1 TEXT Yes false \N NONE +c2 TEXT Yes false \N NONE +c3 TEXT Yes false \N NONE + diff --git a/regression-test/data/load_p0/tvf/test_tvf_error_url.out b/regression-test/data/load_p0/tvf/test_tvf_error_url.out new file mode 100644 index 00000000000..468a50ff85d --- /dev/null +++ b/regression-test/data/load_p0/tvf/test_tvf_error_url.out @@ -0,0 +1,11 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select -- + +-- !desc -- +__dummy_col TEXT Yes false \N NONE + +-- !select2 -- + +-- !desc2 -- +__dummy_col TEXT Yes false \N NONE + diff --git a/regression-test/suites/external_table_p0/tvf/test_hdfs_tvf_error_uri.groovy b/regression-test/suites/external_table_p0/tvf/test_hdfs_tvf_error_uri.groovy new file mode 100644 index 00000000000..3f663c25e73 --- /dev/null +++ b/regression-test/suites/external_table_p0/tvf/test_hdfs_tvf_error_uri.groovy @@ -0,0 +1,43 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_hdfs_tvf_error_uri","external,hive,tvf,external_docker") { + String hdfs_port = context.config.otherConfigs.get("hdfs_port") + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + + // It's okay to use random `hdfsUser`, but can not be empty. + def hdfsUserName = "doris" + def format = "csv" + def defaultFS = "hdfs://${externalEnvIp}:${hdfs_port}" + def uri = "" + + String enabled = context.config.otherConfigs.get("enableHiveTest") + if (enabled != null && enabled.equalsIgnoreCase("true")) { + // test csv format + uri = "${defaultFS}" + "/user/doris/preinstalled_data/csv_format_test/no_exist_file.csv" + format = "csv" + order_qt_select1 """ select * from HDFS( + "uri" = "${uri}", + "hadoop.username" = "${hdfsUserName}", + "format" = "${format}"); """ + + order_qt_desc1 """ desc function HDFS( + "uri" = "${uri}", + "hadoop.username" = "${hdfsUserName}", + "format" = "${format}"); """ + } +} \ No newline at end of file diff --git a/regression-test/suites/load_p0/tvf/test_tvf_empty_file.groovy b/regression-test/suites/load_p0/tvf/test_tvf_empty_file.groovy new file mode 100644 index 00000000000..9877716ae8c --- /dev/null +++ b/regression-test/suites/load_p0/tvf/test_tvf_empty_file.groovy @@ -0,0 +1,69 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_tvf_empty_file", "p0") { + String ak = getS3AK() + String sk = getS3SK() + String s3_endpoint = getS3Endpoint() + String region = getS3Region() + String bucket = context.config.otherConfigs.get("s3BucketName"); + + String path = "regression/datalake" + + // ${path}/empty_file_test.csv is an empty file + // so it should return empty sets. + order_qt_select """ SELECT * FROM S3 ( + "uri" = "http://${bucket}.${s3_endpoint}/${path}/empty_file_test.csv", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "csv", + "region" = "${region}" + ); + """ + + order_qt_desc """ desc function S3 ( + "uri" = "http://${bucket}.${s3_endpoint}/${path}/empty_file_test.csv", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "csv", + "region" = "${region}" + ); + """ + + // ${path}/empty_file_test*.csv matches 3 files: + // empty_file_test.csv, empty_file_test_1.csv, empty_file_test_2.csv + // empty_file_test.csv is an empty file, but + // empty_file_test_1.csv and empty_file_test_2.csv have data + // so it should return data of empty_file_test_1.csv and empty_file_test_2.cs + order_qt_select2 """ SELECT * FROM S3 ( + "uri" = "http://${bucket}.${s3_endpoint}/${path}/empty_file_test*.csv", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "csv", + "region" = "${region}" + ) order by c1; + """ + + order_qt_des2 """ desc function S3 ( + "uri" = "http://${bucket}.${s3_endpoint}/${path}/empty_file_test*.csv", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "csv", + "region" = "${region}" + ); + """ +} diff --git a/regression-test/suites/load_p0/tvf/test_tvf_error_url.groovy b/regression-test/suites/load_p0/tvf/test_tvf_error_url.groovy new file mode 100644 index 00000000000..d1dcff4d530 --- /dev/null +++ b/regression-test/suites/load_p0/tvf/test_tvf_error_url.groovy @@ -0,0 +1,61 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_tvf_error_url", "p0") { + String ak = getS3AK() + String sk = getS3SK() + String s3_endpoint = getS3Endpoint() + String region = getS3Region() + String bucket = context.config.otherConfigs.get("s3BucketName"); + + String path = "select_tvf/no_exists_file_test" + order_qt_select """ SELECT * FROM S3 ( + "uri" = "http://${s3_endpoint}/${bucket}/${path}/no_exist_file1.csv", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "csv", + "region" = "${region}" + ); + """ + + order_qt_desc """ desc function S3 ( + "uri" = "http://${s3_endpoint}/${bucket}/${path}/no_exist_file1.csv", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "csv", + "region" = "${region}" + ); + """ + + order_qt_select2 """ SELECT * FROM S3 ( + "uri" = "http://${s3_endpoint}/${bucket}/${path}/*.csv", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "csv", + "region" = "${region}" + ); + """ + + order_qt_desc2 """ desc function S3 ( + "uri" = "http://${s3_endpoint}/${bucket}/${path}/*.csv", + "ACCESS_KEY"= "${ak}", + "SECRET_KEY" = "${sk}", + "format" = "csv", + "region" = "${region}" + ); + """ +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org