This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
commit bbbebdee3012b577c2bec6756666b8b72a2e248f Author: Xiangyu Wang <dut.xian...@gmail.com> AuthorDate: Sun Sep 10 21:55:09 2023 +0800 [Fix](multi-catalog) Do not throw exceptions when file not exists for external hive tables. (#23799) A similar bug compares to #22140 . When executing a query with hms catalog, the query maybe failed because some hdfs files are not existed. We should just distinguish this kind of errors and skip it. ``` errCode = 2, detailMessage = (xxx.xxx.xxx.xxx)[CANCELLED][INTERNAL_ERROR]failed to init reader for file hdfs://xxx/dwd_tmp.db/check_dam_table_relation_record_day_data/part-00000-c4ee3118-ae94-4bf7-8c40-1f12da07a292-c000.snappy.orc, err: [INTERNAL_ERROR]Init OrcReader failed. reason = Failed to read hdfs://xxx/dwd_tmp.db/check_dam_table_relation_record_day_data/part-00000-c4ee3118-ae94-4bf7-8c40-1f12da07a292-c000.snappy.orc: [INTERNAL_ERROR]Read hdfs file failed. (BE: xxx.xxx.xxx.xxx) [...] at org.apache.hadoop.hdfs.server.namenode.INodeFile.valueOf(INodeFile.java:76) at org.apache.hadoop.hdfs.server.namenode.FSDirStatAndListingOp.getBlockLocations(FSDirStatAndListingOp.java:158) at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getBlockLocations(FSNamesystem.java:1927) at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.getBlockLocations(NameNodeRpcServer.java:738) at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.getBlockLocations(ClientNamenodeProtocolServerSideTranslatorPB.java:426) at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java) at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:524) at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1025) at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:876) at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:822) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1730) at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2682) ``` --- be/src/io/fs/file_handle_cache.cpp | 2 ++ be/src/io/fs/hdfs_file_reader.cpp | 26 ++++++++++++++++++++++---- be/src/vec/exec/format/orc/vorc_reader.cpp | 8 +++++++- 3 files changed, 31 insertions(+), 5 deletions(-) diff --git a/be/src/io/fs/file_handle_cache.cpp b/be/src/io/fs/file_handle_cache.cpp index 7b31ccce6a..815be0f99b 100644 --- a/be/src/io/fs/file_handle_cache.cpp +++ b/be/src/io/fs/file_handle_cache.cpp @@ -42,6 +42,8 @@ Status HdfsFileHandle::init(int64_t file_size) { _hdfs_file = hdfsOpenFile(_fs, _fname.c_str(), O_RDONLY, 0, 0, 0); if (_hdfs_file == nullptr) { std::string _err_msg = hdfs_error(); + // invoker maybe just skip Status.NotFound and continue + // so we need distinguish between it and other kinds of errors if (_err_msg.find("No such file or directory") != std::string::npos) { return Status::NotFound(_err_msg); } diff --git a/be/src/io/fs/hdfs_file_reader.cpp b/be/src/io/fs/hdfs_file_reader.cpp index 6c4f456e37..d344447ae5 100644 --- a/be/src/io/fs/hdfs_file_reader.cpp +++ b/be/src/io/fs/hdfs_file_reader.cpp @@ -138,9 +138,15 @@ Status HdfsFileReader::read_at_impl(size_t offset, Slice result, size_t* bytes_r tSize loop_read = hdfsPread(_handle->fs(), _handle->file(), offset + has_read, to + has_read, bytes_req - has_read); if (loop_read < 0) { + // invoker maybe just skip Status.NotFound and continue + // so we need distinguish between it and other kinds of errors + std::string _err_msg = hdfs_error(); + if (_err_msg.find("No such file or directory") != std::string::npos) { + return Status::NotFound(_err_msg); + } return Status::InternalError( "Read hdfs file failed. (BE: {}) namenode:{}, path:{}, err: {}", - BackendOptions::get_localhost(), _name_node, _path.string(), hdfs_error()); + BackendOptions::get_localhost(), _name_node, _path.string(), _err_msg); } if (loop_read == 0) { break; @@ -153,7 +159,7 @@ Status HdfsFileReader::read_at_impl(size_t offset, Slice result, size_t* bytes_r #else // The hedged read only support hdfsPread(). -// TODO: rethink here to see if there are some difference betwenn hdfsPread() and hdfsRead() +// TODO: rethink here to see if there are some difference between hdfsPread() and hdfsRead() Status HdfsFileReader::read_at_impl(size_t offset, Slice result, size_t* bytes_read, const IOContext* /*io_ctx*/) { DCHECK(!closed()); @@ -164,8 +170,14 @@ Status HdfsFileReader::read_at_impl(size_t offset, Slice result, size_t* bytes_r int res = hdfsSeek(_handle->fs(), _handle->file(), offset); if (res != 0) { + // invoker maybe just skip Status.NotFound and continue + // so we need distinguish between it and other kinds of errors + std::string _err_msg = hdfs_error(); + if (_err_msg.find("No such file or directory") != std::string::npos) { + return Status::NotFound(_err_msg); + } return Status::InternalError("Seek to offset failed. (BE: {}) offset={}, err: {}", - BackendOptions::get_localhost(), offset, hdfs_error()); + BackendOptions::get_localhost(), offset, _err_msg); } size_t bytes_req = result.size; @@ -181,9 +193,15 @@ Status HdfsFileReader::read_at_impl(size_t offset, Slice result, size_t* bytes_r int64_t loop_read = hdfsRead(_handle->fs(), _handle->file(), to + has_read, bytes_req - has_read); if (loop_read < 0) { + // invoker maybe just skip Status.NotFound and continue + // so we need distinguish between it and other kinds of errors + std::string _err_msg = hdfs_error(); + if (_err_msg.find("No such file or directory") != std::string::npos) { + return Status::NotFound(_err_msg); + } return Status::InternalError( "Read hdfs file failed. (BE: {}) namenode:{}, path:{}, err: {}", - BackendOptions::get_localhost(), _name_node, _path.string(), hdfs_error()); + BackendOptions::get_localhost(), _name_node, _path.string(), _err_msg); } if (loop_read == 0) { break; diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp index 3c81a118f4..37d3e6ac7e 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.cpp +++ b/be/src/vec/exec/format/orc/vorc_reader.cpp @@ -237,7 +237,13 @@ Status OrcReader::_create_file_reader() { _reader = orc::createReader( std::unique_ptr<ORCFileInputStream>(_file_input_stream.release()), options); } catch (std::exception& e) { - return Status::InternalError("Init OrcReader failed. reason = {}", e.what()); + // invoker maybe just skip Status.NotFound and continue + // so we need distinguish between it and other kinds of errors + std::string _err_msg = e.what(); + if (_err_msg.find("No such file or directory") != std::string::npos) { + return Status::NotFound(_err_msg); + } + return Status::InternalError("Init OrcReader failed. reason = {}", _err_msg); } _remaining_rows = _reader->getNumberOfRows(); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org