This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch orc
in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git


The following commit(s) were added to refs/heads/orc by this push:
     new a7c0af50 [feature](orc) add getNumberOfRows in RowReader (#122)
a7c0af50 is described below

commit a7c0af50f8ca8ff7cddaf8675473a037f8b13143
Author: Mingyu Chen <morning...@163.com>
AuthorDate: Fri Sep 15 16:01:09 2023 +0800

    [feature](orc) add getNumberOfRows in RowReader (#122)
    
    add `getNumberOfRows()`, so that we can get the number of rows in given 
range
---
 c++/include/orc/Reader.hh | 5 +++++
 c++/src/Reader.cc         | 6 ++++++
 c++/src/Reader.hh         | 4 ++++
 3 files changed, 15 insertions(+)

diff --git a/c++/include/orc/Reader.hh b/c++/include/orc/Reader.hh
index 81e3bf1d..5843d88c 100644
--- a/c++/include/orc/Reader.hh
+++ b/c++/include/orc/Reader.hh
@@ -695,6 +695,11 @@ namespace orc {
      * @param rowNumber the next row the reader should return
      */
     virtual void seekToRow(uint64_t rowNumber) = 0;
+
+    /**
+     * Get number of rows in this range.
+     */
+    virtual uint64_t getNumberOfRows() const = 0;
   };
 }  // namespace orc
 
diff --git a/c++/src/Reader.cc b/c++/src/Reader.cc
index f69f27db..80a5cfd4 100644
--- a/c++/src/Reader.cc
+++ b/c++/src/Reader.cc
@@ -268,6 +268,7 @@ namespace orc {
     numRowGroupsInStripeRange = 0;
     useTightNumericVector = opts.getUseTightNumericVector();
     uint64_t rowTotal = 0;
+    rowTotalInRange = 0;
 
     firstRowOfStripe.resize(numberOfStripes);
     for (size_t i = 0; i < numberOfStripes; ++i) {
@@ -277,6 +278,7 @@ namespace orc {
       bool isStripeInRange = stripeInfo.offset() >= opts.getOffset() &&
                              stripeInfo.offset() < opts.getOffset() + 
opts.getLength();
       if (isStripeInRange) {
+        rowTotalInRange += stripeInfo.numberofrows();
         if (i < currentStripe) {
           currentStripe = i;
         }
@@ -502,6 +504,10 @@ namespace orc {
     }
   }
 
+  uint64_t RowReaderImpl::getNumberOfRows() const {
+    return rowTotalInRange;
+  }
+
   void RowReaderImpl::loadStripeIndex() {
     // reset all previous row indexes
     rowIndexes.clear();
diff --git a/c++/src/Reader.hh b/c++/src/Reader.hh
index 2ff3bbe8..c0f891ef 100644
--- a/c++/src/Reader.hh
+++ b/c++/src/Reader.hh
@@ -184,6 +184,8 @@ namespace orc {
     uint64_t rowsInCurrentStripe;
     // number of row groups between first stripe and last stripe
     uint64_t numRowGroupsInStripeRange;
+    // numbfer of rows in range
+    uint64_t rowTotalInRange;
     proto::StripeInformation currentStripeInfo;
     proto::StripeFooter currentStripeFooter;
     std::unique_ptr<ColumnReader> reader;
@@ -291,6 +293,8 @@ namespace orc {
 
     void seekToRow(uint64_t rowNumber) override;
 
+    uint64_t getNumberOfRows() const override;
+
     const FileContents& getFileContents() const;
     bool getThrowOnHive11DecimalOverflow() const;
     bool getIsDecimalAsLong() const;


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to