This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 395e882edf1 [fix](Tvf) return empty set when tvf queries an empty file 
or an error uri (#25280) (#31110)
395e882edf1 is described below

commit 395e882edf159c24ff4157e8cc37856d1e9ffcc2
Author: zxealous <zhouchang...@baidu.com>
AuthorDate: Mon Feb 19 17:41:31 2024 +0800

    [fix](Tvf) return empty set when tvf queries an empty file or an error uri 
(#25280) (#31110)
    
    return errors when tvf queries an empty file or an error uri:
    1. get parsed schema failed, empty csv file
    2. Can not get first file, please check uri.
    
    we just return empty set when tvf queries an empty file or an error uri.
    ```sql
    mysql> select * from s3(
    "uri" = "https://error_uri/exp_1.csv";,
    "s3.access_key"= "xx",
    "s3.secret_key" = "yy",
    "format" = "csv") limit 10;
    
    Empty set (1.29 sec)
    ```
    
    Co-authored-by: Tiewei Fang <43782773+bepppo...@users.noreply.github.com>
---
 .../ExternalFileTableValuedFunction.java           | 56 ++++++++++--------
 .../tvf/test_hdfs_tvf_error_uri.out                |  6 ++
 .../data/load_p0/tvf/test_tvf_empty_file.out       | 17 ++++++
 .../data/load_p0/tvf/test_tvf_error_url.out        | 11 ++++
 .../tvf/test_hdfs_tvf_error_uri.groovy             | 43 ++++++++++++++
 .../suites/load_p0/tvf/test_tvf_empty_file.groovy  | 69 ++++++++++++++++++++++
 .../suites/load_p0/tvf/test_tvf_error_url.groovy   | 61 +++++++++++++++++++
 7 files changed, 240 insertions(+), 23 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java
 
b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java
index 1e89fd41a5d..eabe82804c3 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java
@@ -37,7 +37,6 @@ import org.apache.doris.common.UserException;
 import org.apache.doris.common.util.BrokerUtil;
 import org.apache.doris.common.util.FileFormatConstants;
 import org.apache.doris.common.util.FileFormatUtils;
-import org.apache.doris.common.util.NetUtils;
 import org.apache.doris.common.util.Util;
 import org.apache.doris.planner.PlanNodeId;
 import org.apache.doris.planner.ScanNode;
@@ -316,23 +315,27 @@ public abstract class ExternalFileTableValuedFunction 
extends TableValuedFunctio
         TNetworkAddress address = new TNetworkAddress(be.getHost(), 
be.getBrpcPort());
         try {
             PFetchTableSchemaRequest request = getFetchTableStructureRequest();
-            Future<InternalService.PFetchTableSchemaResult> future = 
BackendServiceProxy.getInstance()
-                    .fetchTableStructureAsync(address, request);
-
-            InternalService.PFetchTableSchemaResult result = future.get();
-            TStatusCode code = 
TStatusCode.findByValue(result.getStatus().getStatusCode());
-            String errMsg;
-            if (code != TStatusCode.OK) {
-                if (!result.getStatus().getErrorMsgsList().isEmpty()) {
-                    errMsg = result.getStatus().getErrorMsgsList().get(0);
-                } else {
-                    errMsg = "fetchTableStructureAsync failed. backend 
address: "
-                            + NetUtils
-                            
.getHostPortInAccessibleFormat(address.getHostname(), address.getPort());
+            InternalService.PFetchTableSchemaResult result = null;
+
+            // `request == null` means we don't need to get schemas from BE,
+            // and we fill a dummy col for this table.
+            if (request != null) {
+                Future<InternalService.PFetchTableSchemaResult> future = 
BackendServiceProxy.getInstance()
+                        .fetchTableStructureAsync(address, request);
+
+                result = future.get();
+                TStatusCode code = 
TStatusCode.findByValue(result.getStatus().getStatusCode());
+                String errMsg;
+                if (code != TStatusCode.OK) {
+                    if (!result.getStatus().getErrorMsgsList().isEmpty()) {
+                        errMsg = result.getStatus().getErrorMsgsList().get(0);
+                    } else {
+                        errMsg = "fetchTableStructureAsync failed. backend 
address: "
+                                + address.getHostname() + ":" + 
address.getPort();
+                    }
+                    throw new AnalysisException(errMsg);
                 }
-                throw new AnalysisException(errMsg);
             }
-
             fillColumns(result);
         } catch (RpcException e) {
             throw new AnalysisException("fetchTableStructureResult rpc 
exception", e);
@@ -393,10 +396,12 @@ public abstract class ExternalFileTableValuedFunction 
extends TableValuedFunctio
         return Pair.of(type, parsedNodes);
     }
 
-    private void fillColumns(InternalService.PFetchTableSchemaResult result)
-            throws AnalysisException {
-        if (result.getColumnNums() == 0) {
-            throw new AnalysisException("The amount of column is 0");
+    private void fillColumns(InternalService.PFetchTableSchemaResult result) {
+        // `result == null` means we don't need to get schemas from BE,
+        // and we fill a dummy col for this table.
+        if (result == null) {
+            columns.add(new Column("__dummy_col", 
ScalarType.createStringType(), true));
+            return;
         }
         // add fetched file columns
         for (int idx = 0; idx < result.getColumnNums(); ++idx) {
@@ -412,7 +417,7 @@ public abstract class ExternalFileTableValuedFunction 
extends TableValuedFunctio
         }
     }
 
-    private PFetchTableSchemaRequest getFetchTableStructureRequest() throws 
AnalysisException, TException {
+    private PFetchTableSchemaRequest getFetchTableStructureRequest() throws 
TException {
         // set TFileScanRangeParams
         TFileScanRangeParams fileScanRangeParams = new TFileScanRangeParams();
         fileScanRangeParams.setFormatType(fileFormatType);
@@ -429,14 +434,19 @@ public abstract class ExternalFileTableValuedFunction 
extends TableValuedFunctio
         // get first file, used to parse table schema
         TBrokerFileStatus firstFile = null;
         for (TBrokerFileStatus fileStatus : fileStatuses) {
-            if (fileStatus.isIsDir()) {
+            if (fileStatus.isIsDir() || fileStatus.size == 0) {
                 continue;
             }
             firstFile = fileStatus;
             break;
         }
+
+        // `firstFile == null` means:
+        // 1. No matching file path exists
+        // 2. All matched files have a size of 0
+        // For these two situations, we don't need to get schema from BE
         if (firstFile == null) {
-            throw new AnalysisException("Can not get first file, please check 
uri.");
+            return null;
         }
 
         // set TFileRangeDesc
diff --git 
a/regression-test/data/external_table_p0/tvf/test_hdfs_tvf_error_uri.out 
b/regression-test/data/external_table_p0/tvf/test_hdfs_tvf_error_uri.out
new file mode 100644
index 00000000000..115f42f2a0e
--- /dev/null
+++ b/regression-test/data/external_table_p0/tvf/test_hdfs_tvf_error_uri.out
@@ -0,0 +1,6 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !select1 --
+
+-- !desc1 --
+__dummy_col    TEXT    Yes     false   \N      NONE
+
diff --git a/regression-test/data/load_p0/tvf/test_tvf_empty_file.out 
b/regression-test/data/load_p0/tvf/test_tvf_empty_file.out
new file mode 100644
index 00000000000..59822770e2c
--- /dev/null
+++ b/regression-test/data/load_p0/tvf/test_tvf_empty_file.out
@@ -0,0 +1,17 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !select --
+
+-- !desc --
+__dummy_col    TEXT    Yes     false   \N      NONE
+
+-- !select2 --
+1      doris   18
+2      nereids 20
+3      xxx     22
+4      yyy     21
+
+-- !des2 --
+c1     TEXT    Yes     false   \N      NONE
+c2     TEXT    Yes     false   \N      NONE
+c3     TEXT    Yes     false   \N      NONE
+
diff --git a/regression-test/data/load_p0/tvf/test_tvf_error_url.out 
b/regression-test/data/load_p0/tvf/test_tvf_error_url.out
new file mode 100644
index 00000000000..468a50ff85d
--- /dev/null
+++ b/regression-test/data/load_p0/tvf/test_tvf_error_url.out
@@ -0,0 +1,11 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !select --
+
+-- !desc --
+__dummy_col    TEXT    Yes     false   \N      NONE
+
+-- !select2 --
+
+-- !desc2 --
+__dummy_col    TEXT    Yes     false   \N      NONE
+
diff --git 
a/regression-test/suites/external_table_p0/tvf/test_hdfs_tvf_error_uri.groovy 
b/regression-test/suites/external_table_p0/tvf/test_hdfs_tvf_error_uri.groovy
new file mode 100644
index 00000000000..3f663c25e73
--- /dev/null
+++ 
b/regression-test/suites/external_table_p0/tvf/test_hdfs_tvf_error_uri.groovy
@@ -0,0 +1,43 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_hdfs_tvf_error_uri","external,hive,tvf,external_docker") {
+    String hdfs_port = context.config.otherConfigs.get("hdfs_port")
+    String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
+
+    // It's okay to use random `hdfsUser`, but can not be empty.
+    def hdfsUserName = "doris"
+    def format = "csv"
+    def defaultFS = "hdfs://${externalEnvIp}:${hdfs_port}"
+    def uri = ""
+
+    String enabled = context.config.otherConfigs.get("enableHiveTest")
+    if (enabled != null && enabled.equalsIgnoreCase("true")) {
+        // test csv format
+        uri = "${defaultFS}" + 
"/user/doris/preinstalled_data/csv_format_test/no_exist_file.csv"
+        format = "csv"
+        order_qt_select1 """ select * from HDFS(
+                    "uri" = "${uri}",
+                    "hadoop.username" = "${hdfsUserName}",
+                    "format" = "${format}"); """
+
+        order_qt_desc1 """ desc function HDFS(
+                    "uri" = "${uri}",
+                    "hadoop.username" = "${hdfsUserName}",
+                    "format" = "${format}"); """
+    }
+}
\ No newline at end of file
diff --git a/regression-test/suites/load_p0/tvf/test_tvf_empty_file.groovy 
b/regression-test/suites/load_p0/tvf/test_tvf_empty_file.groovy
new file mode 100644
index 00000000000..9877716ae8c
--- /dev/null
+++ b/regression-test/suites/load_p0/tvf/test_tvf_empty_file.groovy
@@ -0,0 +1,69 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_tvf_empty_file", "p0") {
+       String ak = getS3AK()
+       String sk = getS3SK()
+       String s3_endpoint = getS3Endpoint()
+       String region = getS3Region()
+       String bucket = context.config.otherConfigs.get("s3BucketName");
+
+       String path = "regression/datalake"
+
+       // ${path}/empty_file_test.csv is an empty file
+       // so it should return empty sets.
+       order_qt_select """ SELECT * FROM S3 (
+                                   "uri" = 
"http://${bucket}.${s3_endpoint}/${path}/empty_file_test.csv";,
+                                   "ACCESS_KEY"= "${ak}",
+                                   "SECRET_KEY" = "${sk}",
+                                   "format" = "csv",
+                                   "region" = "${region}"
+                                   );
+                            """
+
+       order_qt_desc """ desc function S3 (
+                                   "uri" = 
"http://${bucket}.${s3_endpoint}/${path}/empty_file_test.csv";,
+                                   "ACCESS_KEY"= "${ak}",
+                                   "SECRET_KEY" = "${sk}",
+                                   "format" = "csv",
+                                   "region" = "${region}"
+                                   );
+                            """
+
+       // ${path}/empty_file_test*.csv matches 3 files:
+       // empty_file_test.csv, empty_file_test_1.csv, empty_file_test_2.csv
+       // empty_file_test.csv is an empty file, but
+       // empty_file_test_1.csv and empty_file_test_2.csv have data
+       // so it should return data of empty_file_test_1.csv and 
empty_file_test_2.cs
+       order_qt_select2 """ SELECT * FROM S3 (
+                                   "uri" = 
"http://${bucket}.${s3_endpoint}/${path}/empty_file_test*.csv";,
+                                   "ACCESS_KEY"= "${ak}",
+                                   "SECRET_KEY" = "${sk}",
+                                   "format" = "csv",
+                                   "region" = "${region}"
+                                   ) order by c1;
+                            """
+
+       order_qt_des2 """ desc function S3 (
+                                   "uri" = 
"http://${bucket}.${s3_endpoint}/${path}/empty_file_test*.csv";,
+                                   "ACCESS_KEY"= "${ak}",
+                                   "SECRET_KEY" = "${sk}",
+                                   "format" = "csv",
+                                   "region" = "${region}"
+                                   );
+                            """
+}
diff --git a/regression-test/suites/load_p0/tvf/test_tvf_error_url.groovy 
b/regression-test/suites/load_p0/tvf/test_tvf_error_url.groovy
new file mode 100644
index 00000000000..d1dcff4d530
--- /dev/null
+++ b/regression-test/suites/load_p0/tvf/test_tvf_error_url.groovy
@@ -0,0 +1,61 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_tvf_error_url", "p0") {
+    String ak = getS3AK()
+    String sk = getS3SK()
+    String s3_endpoint = getS3Endpoint()
+    String region = getS3Region()
+    String bucket = context.config.otherConfigs.get("s3BucketName");
+
+    String path = "select_tvf/no_exists_file_test"
+    order_qt_select """ SELECT * FROM S3 (
+                            "uri" = 
"http://${s3_endpoint}/${bucket}/${path}/no_exist_file1.csv";,
+                            "ACCESS_KEY"= "${ak}",
+                            "SECRET_KEY" = "${sk}",
+                            "format" = "csv",
+                            "region" = "${region}"
+                            );
+                     """
+
+    order_qt_desc """ desc function S3 (
+                            "uri" = 
"http://${s3_endpoint}/${bucket}/${path}/no_exist_file1.csv";,
+                            "ACCESS_KEY"= "${ak}",
+                            "SECRET_KEY" = "${sk}",
+                            "format" = "csv",
+                            "region" = "${region}"
+                            );
+                     """
+
+    order_qt_select2 """ SELECT * FROM S3 (
+                            "uri" = 
"http://${s3_endpoint}/${bucket}/${path}/*.csv";,
+                            "ACCESS_KEY"= "${ak}",
+                            "SECRET_KEY" = "${sk}",
+                            "format" = "csv",
+                            "region" = "${region}"
+                            );
+                     """
+
+    order_qt_desc2 """ desc function S3 (
+                            "uri" = 
"http://${s3_endpoint}/${bucket}/${path}/*.csv";,
+                            "ACCESS_KEY"= "${ak}",
+                            "SECRET_KEY" = "${sk}",
+                            "format" = "csv",
+                            "region" = "${region}"
+                            );
+                     """
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to