This is an automated email from the ASF dual-hosted git repository. dataroaring pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 8e364fb848 [fix](load) skip empty orc file (#10593) 8e364fb848 is described below commit 8e364fb84828d483b51eac8d9aaca393ee85b7f8 Author: Mingyu Chen <morningman....@gmail.com> AuthorDate: Tue Jul 5 22:18:56 2022 +0800 [fix](load) skip empty orc file (#10593) Something the upstream system(eg, hive) may create empty orc file which only has a header and footer, without schema. And if we call `_reader->createRowReader()` with selected columns, it will throw ParserError: Invalid column selected xx. So here we first check its number of rows and skip these kind of files. This is only a fix for non-vec load, for vec load, it use arrow scanner to read orc file, which does not have this problem. --- be/src/exec/orc_scanner.cpp | 9 +++++++++ be/src/vec/exec/vorc_scanner.cpp | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/be/src/exec/orc_scanner.cpp b/be/src/exec/orc_scanner.cpp index 549132cb17..5edf9e75e3 100644 --- a/be/src/exec/orc_scanner.cpp +++ b/be/src/exec/orc_scanner.cpp @@ -391,6 +391,15 @@ Status ORCScanner::open_next_reader() { new ORCFileStream(file_reader.release(), range.path)); _reader = orc::createReader(std::move(inStream), _options); + // Something the upstream system(eg, hive) may create empty orc file + // which only has a header and footer, without schema. + // And if we call `_reader->createRowReader()` with selected columns, + // it will throw ParserError: Invalid column selected xx. + // So here we first check its number of rows and skip these kind of files. + if (_reader->getNumberOfRows() == 0) { + continue; + } + _total_groups = _reader->getNumberOfStripes(); _current_group = 0; _rows_of_group = 0; diff --git a/be/src/vec/exec/vorc_scanner.cpp b/be/src/vec/exec/vorc_scanner.cpp index 7521634183..ca5c7c2aef 100644 --- a/be/src/vec/exec/vorc_scanner.cpp +++ b/be/src/vec/exec/vorc_scanner.cpp @@ -34,4 +34,4 @@ ArrowReaderWrap* VORCScanner::_new_arrow_reader(FileReader* file_reader, int64_t return new ORCReaderWrap(file_reader, batch_size, num_of_columns_from_file); } -} // namespace doris::vectorized \ No newline at end of file +} // namespace doris::vectorized --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org