This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch orc in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git
The following commit(s) were added to refs/heads/orc by this push: new a7c0af50 [feature](orc) add getNumberOfRows in RowReader (#122) a7c0af50 is described below commit a7c0af50f8ca8ff7cddaf8675473a037f8b13143 Author: Mingyu Chen <morning...@163.com> AuthorDate: Fri Sep 15 16:01:09 2023 +0800 [feature](orc) add getNumberOfRows in RowReader (#122) add `getNumberOfRows()`, so that we can get the number of rows in given range --- c++/include/orc/Reader.hh | 5 +++++ c++/src/Reader.cc | 6 ++++++ c++/src/Reader.hh | 4 ++++ 3 files changed, 15 insertions(+) diff --git a/c++/include/orc/Reader.hh b/c++/include/orc/Reader.hh index 81e3bf1d..5843d88c 100644 --- a/c++/include/orc/Reader.hh +++ b/c++/include/orc/Reader.hh @@ -695,6 +695,11 @@ namespace orc { * @param rowNumber the next row the reader should return */ virtual void seekToRow(uint64_t rowNumber) = 0; + + /** + * Get number of rows in this range. + */ + virtual uint64_t getNumberOfRows() const = 0; }; } // namespace orc diff --git a/c++/src/Reader.cc b/c++/src/Reader.cc index f69f27db..80a5cfd4 100644 --- a/c++/src/Reader.cc +++ b/c++/src/Reader.cc @@ -268,6 +268,7 @@ namespace orc { numRowGroupsInStripeRange = 0; useTightNumericVector = opts.getUseTightNumericVector(); uint64_t rowTotal = 0; + rowTotalInRange = 0; firstRowOfStripe.resize(numberOfStripes); for (size_t i = 0; i < numberOfStripes; ++i) { @@ -277,6 +278,7 @@ namespace orc { bool isStripeInRange = stripeInfo.offset() >= opts.getOffset() && stripeInfo.offset() < opts.getOffset() + opts.getLength(); if (isStripeInRange) { + rowTotalInRange += stripeInfo.numberofrows(); if (i < currentStripe) { currentStripe = i; } @@ -502,6 +504,10 @@ namespace orc { } } + uint64_t RowReaderImpl::getNumberOfRows() const { + return rowTotalInRange; + } + void RowReaderImpl::loadStripeIndex() { // reset all previous row indexes rowIndexes.clear(); diff --git a/c++/src/Reader.hh b/c++/src/Reader.hh index 2ff3bbe8..c0f891ef 100644 --- a/c++/src/Reader.hh +++ b/c++/src/Reader.hh @@ -184,6 +184,8 @@ namespace orc { uint64_t rowsInCurrentStripe; // number of row groups between first stripe and last stripe uint64_t numRowGroupsInStripeRange; + // numbfer of rows in range + uint64_t rowTotalInRange; proto::StripeInformation currentStripeInfo; proto::StripeFooter currentStripeFooter; std::unique_ptr<ColumnReader> reader; @@ -291,6 +293,8 @@ namespace orc { void seekToRow(uint64_t rowNumber) override; + uint64_t getNumberOfRows() const override; + const FileContents& getFileContents() const; bool getThrowOnHive11DecimalOverflow() const; bool getIsDecimalAsLong() const; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org