github-actions[bot] commented on code in PR #26709: URL: https://github.com/apache/doris/pull/26709#discussion_r1413304786
########## be/src/vec/exec/format/arrow/arrow_pip_input_stream.h: ########## @@ -0,0 +1,65 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <cstddef> +#include <cstdint> +#include <memory> +#include <string> +#include <unordered_map> +#include <unordered_set> +#include <vector> + +#include "io/file_factory.h" + +namespace doris { + +namespace io { +class FileSystem; +struct IOContext; +} // namespace io + +namespace vectorized { + +class ArrowPipInputStream : public arrow::io::InputStream { + ENABLE_FACTORY_CREATOR(ArrowPipInputStream); + +public: + ArrowPipInputStream(io::FileReaderSPtr file_reader); + ~ArrowPipInputStream() override {} Review Comment: warning: use '= default' to define a trivial destructor [modernize-use-equals-default] ```suggestion ~ArrowPipInputStream() override = default; ``` ########## be/src/vec/exec/format/arrow/arrow_pip_input_stream.cpp: ########## @@ -0,0 +1,101 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow_pip_input_stream.h" + +#include "arrow/array.h" +#include "arrow/buffer.h" +#include "arrow/io/buffered.h" +#include "arrow/io/stdio.h" +#include "arrow/ipc/options.h" +#include "arrow/ipc/reader.h" +#include "arrow/record_batch.h" +#include "arrow/result.h" +#include "common/logging.h" +#include "io/fs/stream_load_pipe.h" +#include "olap/wal_manager.h" +#include "runtime/runtime_state.h" + +namespace doris::vectorized { + +ArrowPipInputStream::ArrowPipInputStream(io::FileReaderSPtr file_reader) + : _file_reader(file_reader), _pos(0), _begin(true), _read_buf(new uint8_t[4]) { + set_mode(arrow::io::FileMode::READ); +} + +arrow::Status ArrowPipInputStream::Close() { + return arrow::Status::OK(); +} + +bool ArrowPipInputStream::closed() const { + return false; +} + +arrow::Result<int64_t> ArrowPipInputStream::Tell() const { + return _pos; +} + +Status ArrowPipInputStream::HasNext(bool* get) { + // 1. Arrow's serialization uses a 4-byte data to specify the length of the data that follows, + // so there must be 4-byte data here. + // 2. If it is not determined whether there is a next batch of data (the data has already been transmitted), + // then the `_file_reader->read_at` will return a buff with a read length of 0, + // and the `RecordBatchStreamReader::Open` function will directly report an error when it gets this buff + Slice file_slice(_read_buf, 4); + size_t read_length = 0; + RETURN_IF_ERROR(_file_reader->read_at(0, file_slice, &read_length, NULL)); + if (read_length == 0) { + *get = false; + } else { + *get = true; + } + return Status::OK(); +} + +arrow::Result<int64_t> ArrowPipInputStream::Read(int64_t nbytes, void* out) { + // RecordBatchStreamReader::Open will create a new reader that will stream a batch of arrow data. + // But the first four bytes of this batch of data were taken by the HasNext function, so they need to be copied back here. + uint8_t* out_ptr = (uint8_t*)out; Review Comment: warning: use auto when initializing with a cast to avoid duplicating the type name [modernize-use-auto] ```suggestion auto* out_ptr = (uint8_t*)out; ``` ########## be/src/vec/exec/format/arrow/arrow_pip_input_stream.cpp: ########## @@ -0,0 +1,101 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow_pip_input_stream.h" + +#include "arrow/array.h" +#include "arrow/buffer.h" +#include "arrow/io/buffered.h" +#include "arrow/io/stdio.h" +#include "arrow/ipc/options.h" +#include "arrow/ipc/reader.h" +#include "arrow/record_batch.h" +#include "arrow/result.h" +#include "common/logging.h" +#include "io/fs/stream_load_pipe.h" +#include "olap/wal_manager.h" +#include "runtime/runtime_state.h" + +namespace doris::vectorized { + +ArrowPipInputStream::ArrowPipInputStream(io::FileReaderSPtr file_reader) + : _file_reader(file_reader), _pos(0), _begin(true), _read_buf(new uint8_t[4]) { + set_mode(arrow::io::FileMode::READ); +} + +arrow::Status ArrowPipInputStream::Close() { + return arrow::Status::OK(); +} + +bool ArrowPipInputStream::closed() const { Review Comment: warning: method 'closed' can be made static [readability-convert-member-functions-to-static] be/src/vec/exec/format/arrow/arrow_pip_input_stream.h:46: ```diff - bool closed() const override; + static bool closed() override; ``` ```suggestion bool ArrowPipInputStream::closed() { ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org