This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit bbbebdee3012b577c2bec6756666b8b72a2e248f
Author: Xiangyu Wang <dut.xian...@gmail.com>
AuthorDate: Sun Sep 10 21:55:09 2023 +0800

    [Fix](multi-catalog) Do not throw exceptions when file not exists for 
external hive tables. (#23799)
    
    A similar bug compares to #22140 .
    
    When executing a query with hms catalog, the query maybe failed because 
some hdfs files are not existed. We should just distinguish this kind of errors 
and skip it.
    
    ```
    errCode = 2, detailMessage = 
(xxx.xxx.xxx.xxx)[CANCELLED][INTERNAL_ERROR]failed to init reader for file 
hdfs://xxx/dwd_tmp.db/check_dam_table_relation_record_day_data/part-00000-c4ee3118-ae94-4bf7-8c40-1f12da07a292-c000.snappy.orc,
 err: [INTERNAL_ERROR]Init OrcReader failed. reason = Failed to read 
hdfs://xxx/dwd_tmp.db/check_dam_table_relation_record_day_data/part-00000-c4ee3118-ae94-4bf7-8c40-1f12da07a292-c000.snappy.orc:
 [INTERNAL_ERROR]Read hdfs file failed. (BE: xxx.xxx.xxx.xxx)  [...]
    at 
org.apache.hadoop.hdfs.server.namenode.INodeFile.valueOf(INodeFile.java:76)
    at 
org.apache.hadoop.hdfs.server.namenode.FSDirStatAndListingOp.getBlockLocations(FSDirStatAndListingOp.java:158)
 at 
org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getBlockLocations(FSNamesystem.java:1927)
    at 
org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.getBlockLocations(NameNodeRpcServer.java:738)
    at 
org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.getBlockLocations(ClientNamenodeProtocolServerSideTranslatorPB.java:426)
    at 
org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
    at 
org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:524)
    at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1025) at 
org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:876) at 
org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:822) at 
java.security.AccessController.doPrivileged(Native Method) at 
javax.security.auth.Subject.doAs(Subject.java:422)
    at 
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1730)
    at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2682)
    ```
---
 be/src/io/fs/file_handle_cache.cpp         |  2 ++
 be/src/io/fs/hdfs_file_reader.cpp          | 26 ++++++++++++++++++++++----
 be/src/vec/exec/format/orc/vorc_reader.cpp |  8 +++++++-
 3 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/be/src/io/fs/file_handle_cache.cpp 
b/be/src/io/fs/file_handle_cache.cpp
index 7b31ccce6a..815be0f99b 100644
--- a/be/src/io/fs/file_handle_cache.cpp
+++ b/be/src/io/fs/file_handle_cache.cpp
@@ -42,6 +42,8 @@ Status HdfsFileHandle::init(int64_t file_size) {
     _hdfs_file = hdfsOpenFile(_fs, _fname.c_str(), O_RDONLY, 0, 0, 0);
     if (_hdfs_file == nullptr) {
         std::string _err_msg = hdfs_error();
+        // invoker maybe just skip Status.NotFound and continue
+        // so we need distinguish between it and other kinds of errors
         if (_err_msg.find("No such file or directory") != std::string::npos) {
             return Status::NotFound(_err_msg);
         }
diff --git a/be/src/io/fs/hdfs_file_reader.cpp 
b/be/src/io/fs/hdfs_file_reader.cpp
index 6c4f456e37..d344447ae5 100644
--- a/be/src/io/fs/hdfs_file_reader.cpp
+++ b/be/src/io/fs/hdfs_file_reader.cpp
@@ -138,9 +138,15 @@ Status HdfsFileReader::read_at_impl(size_t offset, Slice 
result, size_t* bytes_r
         tSize loop_read = hdfsPread(_handle->fs(), _handle->file(), offset + 
has_read,
                                     to + has_read, bytes_req - has_read);
         if (loop_read < 0) {
+            // invoker maybe just skip Status.NotFound and continue
+            // so we need distinguish between it and other kinds of errors
+            std::string _err_msg = hdfs_error();
+            if (_err_msg.find("No such file or directory") != 
std::string::npos) {
+                return Status::NotFound(_err_msg);
+            }
             return Status::InternalError(
                     "Read hdfs file failed. (BE: {}) namenode:{}, path:{}, 
err: {}",
-                    BackendOptions::get_localhost(), _name_node, 
_path.string(), hdfs_error());
+                    BackendOptions::get_localhost(), _name_node, 
_path.string(), _err_msg);
         }
         if (loop_read == 0) {
             break;
@@ -153,7 +159,7 @@ Status HdfsFileReader::read_at_impl(size_t offset, Slice 
result, size_t* bytes_r
 
 #else
 // The hedged read only support hdfsPread().
-// TODO: rethink here to see if there are some difference betwenn hdfsPread() 
and hdfsRead()
+// TODO: rethink here to see if there are some difference between hdfsPread() 
and hdfsRead()
 Status HdfsFileReader::read_at_impl(size_t offset, Slice result, size_t* 
bytes_read,
                                     const IOContext* /*io_ctx*/) {
     DCHECK(!closed());
@@ -164,8 +170,14 @@ Status HdfsFileReader::read_at_impl(size_t offset, Slice 
result, size_t* bytes_r
 
     int res = hdfsSeek(_handle->fs(), _handle->file(), offset);
     if (res != 0) {
+        // invoker maybe just skip Status.NotFound and continue
+        // so we need distinguish between it and other kinds of errors
+        std::string _err_msg = hdfs_error();
+        if (_err_msg.find("No such file or directory") != std::string::npos) {
+            return Status::NotFound(_err_msg);
+        }
         return Status::InternalError("Seek to offset failed. (BE: {}) 
offset={}, err: {}",
-                                     BackendOptions::get_localhost(), offset, 
hdfs_error());
+                                     BackendOptions::get_localhost(), offset, 
_err_msg);
     }
 
     size_t bytes_req = result.size;
@@ -181,9 +193,15 @@ Status HdfsFileReader::read_at_impl(size_t offset, Slice 
result, size_t* bytes_r
         int64_t loop_read =
                 hdfsRead(_handle->fs(), _handle->file(), to + has_read, 
bytes_req - has_read);
         if (loop_read < 0) {
+            // invoker maybe just skip Status.NotFound and continue
+            // so we need distinguish between it and other kinds of errors
+            std::string _err_msg = hdfs_error();
+            if (_err_msg.find("No such file or directory") != 
std::string::npos) {
+                return Status::NotFound(_err_msg);
+            }
             return Status::InternalError(
                     "Read hdfs file failed. (BE: {}) namenode:{}, path:{}, 
err: {}",
-                    BackendOptions::get_localhost(), _name_node, 
_path.string(), hdfs_error());
+                    BackendOptions::get_localhost(), _name_node, 
_path.string(), _err_msg);
         }
         if (loop_read == 0) {
             break;
diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp 
b/be/src/vec/exec/format/orc/vorc_reader.cpp
index 3c81a118f4..37d3e6ac7e 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.cpp
+++ b/be/src/vec/exec/format/orc/vorc_reader.cpp
@@ -237,7 +237,13 @@ Status OrcReader::_create_file_reader() {
         _reader = orc::createReader(
                 
std::unique_ptr<ORCFileInputStream>(_file_input_stream.release()), options);
     } catch (std::exception& e) {
-        return Status::InternalError("Init OrcReader failed. reason = {}", 
e.what());
+        // invoker maybe just skip Status.NotFound and continue
+        // so we need distinguish between it and other kinds of errors
+        std::string _err_msg = e.what();
+        if (_err_msg.find("No such file or directory") != std::string::npos) {
+            return Status::NotFound(_err_msg);
+        }
+        return Status::InternalError("Init OrcReader failed. reason = {}", 
_err_msg);
     }
     _remaining_rows = _reader->getNumberOfRows();
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to