This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch orc
in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git


The following commit(s) were added to refs/heads/orc by this push:
     new 0ec2e079cd6 [Optimize] Optimize stripe footer multiple reads. (#315)
0ec2e079cd6 is described below

commit 0ec2e079cd6ed78a87929d28c33e1b136933c69d
Author: Qi Chen <che...@selectdb.com>
AuthorDate: Wed May 21 15:07:09 2025 +0800

    [Optimize] Optimize stripe footer multiple reads. (#315)
---
 c++/src/Reader.cc       | 9 ++-------
 c++/src/StripeStream.cc | 5 +++--
 c++/src/StripeStream.hh | 8 +++++---
 3 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/c++/src/Reader.cc b/c++/src/Reader.cc
index 4c0144da89e..13e1b82d4e3 100644
--- a/c++/src/Reader.cc
+++ b/c++/src/Reader.cc
@@ -716,7 +716,7 @@ namespace orc {
     return std::unique_ptr<StripeInformation>(new StripeInformationImpl(
         stripeInfo.offset(), stripeInfo.indexlength(), stripeInfo.datalength(),
         stripeInfo.footerlength(), stripeInfo.numberofrows(), 
contents->stream.get(),
-        *contents->pool, contents->compression, contents->blockSize, 
contents->readerMetrics));
+        *contents->pool, contents->compression, contents->blockSize, 
contents->readerMetrics, nullptr));
   }
 
   FileVersion ReaderImpl::getFormatVersion() const {
@@ -1228,7 +1228,7 @@ namespace orc {
           currentStripeInfo.offset(), currentStripeInfo.indexlength(),
           currentStripeInfo.datalength(), currentStripeInfo.footerlength(),
           currentStripeInfo.numberofrows(), contents->stream.get(), 
*contents->pool,
-          contents->compression, contents->blockSize, 
contents->readerMetrics));
+          contents->compression, contents->blockSize, contents->readerMetrics, 
&currentStripeFooter));
       streams.clear();
       contents->stream->beforeReadStripe(std::move(currentStripeInformation), 
selectedColumns,
                                          streams);
@@ -1266,11 +1266,6 @@ namespace orc {
 
       if (stringDictFilter != nullptr) {
         std::list<std::string> dictFilterColumnNames;
-        std::unique_ptr<StripeInformation> currentStripeInformation(new 
StripeInformationImpl(
-            currentStripeInfo.offset(), currentStripeInfo.indexlength(),
-            currentStripeInfo.datalength(), currentStripeInfo.footerlength(),
-            currentStripeInfo.numberofrows(), contents->stream.get(), 
*contents->pool,
-            contents->compression, contents->blockSize, 
contents->readerMetrics));
         
stringDictFilter->fillDictFilterColumnNames(std::move(currentStripeInformation),
                                                     dictFilterColumnNames);
         std::unordered_map<uint64_t, std::string> columnIdToNameMap;
diff --git a/c++/src/StripeStream.cc b/c++/src/StripeStream.cc
index 8efa23efa86..56cde6bfe8d 100644
--- a/c++/src/StripeStream.cc
+++ b/c++/src/StripeStream.cc
@@ -133,13 +133,14 @@ namespace orc {
   }
 
   void StripeInformationImpl::ensureStripeFooterLoaded() const {
-    if (stripeFooter.get() == nullptr) {
+    if (stripeFooter == nullptr && managedStripeFooter.get() == nullptr) {
       std::unique_ptr<SeekableInputStream> pbStream =
           createDecompressor(compression,
                              std::make_unique<SeekableFileInputStream>(
                                  stream, offset + indexLength + dataLength, 
footerLength, memory),
                              blockSize, memory, metrics);
-      stripeFooter = std::make_unique<proto::StripeFooter>();
+      managedStripeFooter = std::make_unique<proto::StripeFooter>();
+      stripeFooter = managedStripeFooter.get();
       if (!stripeFooter->ParseFromZeroCopyStream(pbStream.get())) {
         throw ParseError("Failed to parse the stripe footer");
       }
diff --git a/c++/src/StripeStream.hh b/c++/src/StripeStream.hh
index 57e51ef76f0..296305091a1 100644
--- a/c++/src/StripeStream.hh
+++ b/c++/src/StripeStream.hh
@@ -134,7 +134,8 @@ namespace orc {
     MemoryPool& memory;
     CompressionKind compression;
     uint64_t blockSize;
-    mutable std::unique_ptr<proto::StripeFooter> stripeFooter;
+    mutable proto::StripeFooter* stripeFooter;
+    mutable std::unique_ptr<proto::StripeFooter> managedStripeFooter;
     ReaderMetrics* metrics;
     void ensureStripeFooterLoaded() const;
 
@@ -142,7 +143,7 @@ namespace orc {
     StripeInformationImpl(uint64_t _offset, uint64_t _indexLength, uint64_t 
_dataLength,
                           uint64_t _footerLength, uint64_t _numRows, 
InputStream* _stream,
                           MemoryPool& _memory, CompressionKind _compression, 
uint64_t _blockSize,
-                          ReaderMetrics* _metrics)
+                          ReaderMetrics* _metrics, proto::StripeFooter* 
_stripeFooter)
         : offset(_offset),
           indexLength(_indexLength),
           dataLength(_dataLength),
@@ -152,7 +153,8 @@ namespace orc {
           memory(_memory),
           compression(_compression),
           blockSize(_blockSize),
-          metrics(_metrics) {
+          metrics(_metrics),
+          stripeFooter(_stripeFooter) {
       // PASS
     }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to