kaka11chen commented on code in PR #59307:
URL: https://github.com/apache/doris/pull/59307#discussion_r2707877681
##########
be/src/vec/exec/format/parquet/vparquet_page_reader.cpp:
##########
@@ -77,11 +83,79 @@ Status PageReader<IN_COLLECTION,
OFFSET_INDEX>::parse_page_header() {
return Status::IOError("Should skip or load current page to get next
page");
}
+ _page_statistics.page_read_counter += 1;
+
+ // Parse page header from file; header bytes are saved for possible cache
insertion
const uint8_t* page_header_buf = nullptr;
size_t max_size = _end_offset - _offset;
size_t header_size = std::min(INIT_PAGE_HEADER_SIZE, max_size);
const size_t MAX_PAGE_HEADER_SIZE = config::parquet_header_max_size_mb <<
20;
uint32_t real_header_size = 0;
+
+ // Try a header-only lookup in the page cache. Cached pages store
+ // header + optional v2 levels + uncompressed payload, so we can
+ // parse the page header directly from the cached bytes and avoid
+ // a file read for the header.
+ if (_page_read_ctx.enable_parquet_file_page_cache &&
!config::disable_storage_page_cache &&
+ StoragePageCache::instance() != nullptr) {
+ PageCacheHandle handle;
+ StoragePageCache::CacheKey key(fmt::format("{}::{}", _reader->path(),
_reader->mtime()),
+ _end_offset, _offset);
+ if (StoragePageCache::instance()->lookup(key, &handle,
segment_v2::DATA_PAGE)) {
+ // Parse header directly from cached data
+ _page_cache_handle = std::move(handle);
+ Slice s = _page_cache_handle.data();
+ real_header_size = cast_set<uint32_t>(s.size);
+ SCOPED_RAW_TIMER(&_page_statistics.decode_header_time);
+ auto st = deserialize_thrift_msg(reinterpret_cast<const
uint8_t*>(s.data),
+ &real_header_size, true,
&_cur_page_header);
+ if (!st.ok()) return st;
+ // Increment page cache counters for a true cache hit on
header+payload
+ _page_statistics.page_cache_hit_counter += 1;
+ // Detect whether the cached payload is compressed or decompressed
and record
+ bool is_cache_payload_decompressed = true;
+ if (_cur_page_header.compressed_page_size > 0) {
Review Comment:
ok
##########
be/src/io/file_factory.cpp:
##########
@@ -203,6 +203,21 @@ Result<io::FileReaderSPtr> FileFactory::create_file_reader(
const io::FileSystemProperties& system_properties,
const io::FileDescription& file_description, const
io::FileReaderOptions& reader_options,
RuntimeProfile* profile) {
+ auto reader_res = _create_file_reader_internal(system_properties,
file_description,
+ reader_options, profile);
+ if (!reader_res.has_value()) {
+ return unexpected(std::move(reader_res).error());
+ }
+ auto file_reader = std::move(reader_res).value();
+ LOG_INFO("create file reader for path={}, size={}, mtime={}",
file_description.path,
Review Comment:
ok
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]