HappenLee commented on code in PR #9856:
URL: https://github.com/apache/incubator-doris/pull/9856#discussion_r889882792
##########
be/src/vec/utils/arrow_column_to_doris_column.cpp:
##########
@@ -236,19 +237,51 @@ static Status convert_column_with_decimal_data(const
arrow::Array* array, size_t
return Status::OK();
}
+static Status convert_offset_from_list_column(const arrow::Array* array,
size_t array_idx,
+ MutableColumnPtr& data_column,
size_t num_elements,
+ size_t* start_idx_for_data,
size_t* num_for_data) {
+ auto& offsets_data = static_cast<ColumnArray&>(*data_column).get_offsets();
+ auto concrete_array = down_cast<const arrow::ListArray*>(array);
+ auto arrow_offsets_array = concrete_array->offsets();
+ auto arrow_offsets =
down_cast<arrow::Int32Array*>(arrow_offsets_array.get());
+ for (int64_t i = array_idx + 1; i < array_idx + num_elements + 1; ++i) {
+ offsets_data.emplace_back(arrow_offsets->Value(i));
+ }
+ *start_idx_for_data = arrow_offsets->Value(array_idx);
+ *num_for_data = offsets_data.back() - *start_idx_for_data;
+
+ return Status::OK();
+}
+
+static Status convert_column_with_list_data(const arrow::Array* array, size_t
array_idx,
+ MutableColumnPtr& data_column,
size_t num_elements,
+ const std::string& timezone) {
+ size_t start_idx_of_data = 0;
+ size_t num_of_data = 0;
+ // get start idx and num of values from arrow offsets
+ RETURN_IF_ERROR(convert_offset_from_list_column(array, array_idx,
data_column, num_elements,
+ &start_idx_of_data,
&num_of_data));
+ auto& data_column_ptr =
static_cast<ColumnArray&>(*data_column).get_data_ptr();
+ auto concrete_array = down_cast<const arrow::ListArray*>(array);
+ std::shared_ptr<arrow::Array> arrow_data = concrete_array->values();
+
+ return arrow_column_to_doris_column(arrow_data.get(), start_idx_of_data,
data_column_ptr,
+ num_of_data, timezone);
Review Comment:
do offset valid in this function,do not affects other processing logic which
no need concern the arrow offset
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]