wsjz commented on code in PR #11415: URL: https://github.com/apache/doris/pull/11415#discussion_r936178092
########## be/src/vec/exec/format/parquet/vparquet_reader.h: ########## @@ -48,40 +44,46 @@ namespace doris::vectorized { class ParquetReader { public: - ParquetReader(FileReader* file_reader, int64_t batch_size, int32_t num_of_columns_from_file, + ParquetReader(FileReader* file_reader, int32_t num_of_columns_from_file, int64_t range_start_offset, int64_t range_size); + ~ParquetReader(); - virtual Status init_reader(const TupleDescriptor* tuple_desc, - const std::vector<SlotDescriptor*>& tuple_slot_descs, - const std::vector<ExprContext*>& conjunct_ctxs, - const std::string& timezone) = 0; - virtual Status next_batch(bool* eof) = 0; + + Status init_reader(const TupleDescriptor* tuple_desc, + const std::vector<SlotDescriptor*>& tuple_slot_descs, + const std::vector<ExprContext*>& conjunct_ctxs, const std::string& timezone); + + Status read_next_batch(Block* block); + + bool has_next() const { return !_batch_eof; }; + // std::shared_ptr<Statistics>& statistics() { return _statistics; } - void close() {}; - int64_t size(int64_t* size) { return _file_reader->size(); } + void close(); + + int64_t size() const { return _file_reader->size(); } private: - int64_t _get_row_group_start_offset(const tparquet::RowGroup& row_group); + Status _column_indices(const std::vector<SlotDescriptor*>& tuple_slot_descs); + void _init_row_group_reader(); + void _fill_block_data(std::vector<tparquet::ColumnChunk> columns); + bool _has_page_index(std::vector<tparquet::ColumnChunk> columns); + Status _process_page_index(std::vector<tparquet::ColumnChunk> columns); private: FileReader* _file_reader; std::shared_ptr<FileMetaData> _file_metadata; - // const int64_t _batch_size; - // const int32_t _num_of_columns_from_file; + std::shared_ptr<RowGroupReader> _row_group_reader; Review Comment: stay implement after this version -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org