This is an automated email from the ASF dual-hosted git repository. xuyang pushed a commit to branch struct-map-type-rebase in repository https://gitbox.apache.org/repos/asf/doris.git
commit c02a706c9aad619ed7112f5f5670d1cc057295a6 Author: xy720 <xuyan...@baidu.com> AuthorDate: Mon Feb 6 14:12:13 2023 +0800 rebase master and fix compile error --- be/src/olap/field.h | 106 +----------------------- be/src/olap/rowset/segment_v2/column_reader.cpp | 8 -- be/src/olap/rowset/segment_v2/column_reader.h | 4 - be/src/olap/types.h | 96 +-------------------- be/src/vec/sink/vmysql_result_writer.cpp | 57 ++++++------- 5 files changed, 29 insertions(+), 242 deletions(-) diff --git a/be/src/olap/field.h b/be/src/olap/field.h index dd3c455b90..198b371bf8 100644 --- a/be/src/olap/field.h +++ b/be/src/olap/field.h @@ -260,113 +260,10 @@ private: int32_t _unique_id; }; -<<<<<<< HEAD -======= -template <typename LhsCellType, typename RhsCellType> -int Field::index_cmp(const LhsCellType& lhs, const RhsCellType& rhs) const { - bool l_null = lhs.is_null(); - bool r_null = rhs.is_null(); - if (l_null != r_null) { - return l_null ? -1 : 1; - } else if (l_null) { - return 0; - } - - int32_t res = 0; - if (type() == OLAP_FIELD_TYPE_VARCHAR || type() == OLAP_FIELD_TYPE_STRING) { - const Slice* l_slice = reinterpret_cast<const Slice*>(lhs.cell_ptr()); - const Slice* r_slice = reinterpret_cast<const Slice*>(rhs.cell_ptr()); - uint32_t max_bytes = - type() == OLAP_FIELD_TYPE_VARCHAR ? OLAP_VARCHAR_MAX_BYTES : OLAP_STRING_MAX_BYTES; - if (r_slice->size + max_bytes > _index_size || l_slice->size + max_bytes > _index_size) { - // if the actual length of the field is longer than the short key, only the prefix is compared, - // make sure all blocks with the same short key are scanned - // Otherwise, the short key and field can be compared directly - int compare_size = _index_size - max_bytes; - // l_slice size and r_slice size may be less than compare_size - // so calculate the min of the three size as new compare_size - compare_size = std::min(std::min(compare_size, (int)l_slice->size), (int)r_slice->size); - - // This function is used to compare prefix index. - // Only the fixed length of prefix index should be compared. - // If r_slice->size > l_slice->size, ignore the extra parts directly. - res = strncmp(l_slice->data, r_slice->data, compare_size); - if (res == 0 && compare_size != (_index_size - max_bytes)) { - if (l_slice->size < r_slice->size) { - res = -1; - } else if (l_slice->size > r_slice->size) { - res = 1; - } else { - res = 0; - } - } - } else { - res = l_slice->compare(*r_slice); - } - } else { - res = _type_info->cmp(lhs.cell_ptr(), rhs.cell_ptr()); - } - - return res; -} - -template <typename DstCellType, typename SrcCellType> -void Field::to_index(DstCellType* dst, const SrcCellType& src) const { - bool is_null = src.is_null(); - dst->set_is_null(is_null); - if (is_null) { - return; - } - - if (type() == OLAP_FIELD_TYPE_VARCHAR) { - // clear before copy - memset(dst->mutable_cell_ptr(), 0, _index_size); - const Slice* slice = reinterpret_cast<const Slice*>(src.cell_ptr()); - size_t copy_size = slice->size < _index_size - OLAP_VARCHAR_MAX_BYTES - ? slice->size - : _index_size - OLAP_VARCHAR_MAX_BYTES; - *reinterpret_cast<VarcharLengthType*>(dst->mutable_cell_ptr()) = copy_size; - memory_copy((char*)dst->mutable_cell_ptr() + OLAP_VARCHAR_MAX_BYTES, slice->data, - copy_size); - } else if (type() == OLAP_FIELD_TYPE_STRING) { - // clear before copy - memset(dst->mutable_cell_ptr(), 0, _index_size); - const Slice* slice = reinterpret_cast<const Slice*>(src.cell_ptr()); - size_t copy_size = slice->size < _index_size - OLAP_STRING_MAX_BYTES - ? slice->size - : _index_size - OLAP_STRING_MAX_BYTES; - *reinterpret_cast<StringLengthType*>(dst->mutable_cell_ptr()) = copy_size; - memory_copy((char*)dst->mutable_cell_ptr() + OLAP_STRING_MAX_BYTES, slice->data, copy_size); - } else if (type() == OLAP_FIELD_TYPE_CHAR) { - // clear before copy - memset(dst->mutable_cell_ptr(), 0, _index_size); - const Slice* slice = reinterpret_cast<const Slice*>(src.cell_ptr()); - memory_copy(dst->mutable_cell_ptr(), slice->data, _index_size); - } else { - memory_copy(dst->mutable_cell_ptr(), src.cell_ptr(), size()); - } -} - -template <typename CellType> -uint32_t Field::hash_code(const CellType& cell, uint32_t seed) const { - bool is_null = cell.is_null(); - if (is_null) { - return HashUtil::hash(&is_null, sizeof(is_null), seed); - } - return _type_info->hash_code(cell.cell_ptr(), seed); -} - class MapField : public Field { public: explicit MapField(const TabletColumn& column) : Field(column) {} - void consume(RowCursorCell* dst, const char* src, bool src_null, MemPool* mem_pool, - ObjectPool* agg_pool) const override { - dst->set_is_null(src_null); - if (src_null) { - return; - } - _type_info->deep_copy(dst->mutable_cell_ptr(), src, mem_pool); - } + // make variable_ptr memory allocate to cell_ptr as MapValue char* allocate_memory(char* cell_ptr, char* variable_ptr) const override { return variable_ptr + _length; @@ -397,7 +294,6 @@ public: } }; ->>>>>>> [WIP](struct-type) support struct-type in vectorize engine (#15665) class ArrayField : public Field { public: explicit ArrayField(const TabletColumn& column) : Field(column) {} diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp b/be/src/olap/rowset/segment_v2/column_reader.cpp index 3bc4895da1..95e83ee049 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/column_reader.cpp @@ -561,10 +561,6 @@ Status MapFileColumnIterator::init(const ColumnIteratorOptions& opts) { return Status::OK(); } -Status MapFileColumnIterator::next_batch(size_t* n, ColumnBlockView* dst, bool* has_null) { - return Status::NotSupported("Not support next_batch"); -} - Status MapFileColumnIterator::seek_to_ordinal(ordinal_t ord) { RETURN_IF_ERROR(_key_iterator->seek_to_ordinal(ord)); RETURN_IF_ERROR(_val_iterator->seek_to_ordinal(ord)); @@ -631,10 +627,6 @@ Status StructFileColumnIterator::init(const ColumnIteratorOptions& opts) { return Status::OK(); } -Status StructFileColumnIterator::next_batch(size_t* n, ColumnBlockView* dst, bool* has_null) { - return Status::NotSupported("not supported"); -} - Status StructFileColumnIterator::next_batch(size_t* n, vectorized::MutableColumnPtr& dst, bool* has_null) { const auto* column_struct = vectorized::check_and_get_column<vectorized::ColumnStruct>( diff --git a/be/src/olap/rowset/segment_v2/column_reader.h b/be/src/olap/rowset/segment_v2/column_reader.h index 5a0d4e8bdd..b72f22bb4a 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.h +++ b/be/src/olap/rowset/segment_v2/column_reader.h @@ -386,8 +386,6 @@ public: Status init(const ColumnIteratorOptions& opts) override; - Status next_batch(size_t* n, ColumnBlockView* dst, bool* has_null) override; - Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst, bool* has_null) override; Status read_by_rowids(const rowid_t* rowids, const size_t count, @@ -420,8 +418,6 @@ public: Status init(const ColumnIteratorOptions& opts) override; - Status next_batch(size_t* n, ColumnBlockView* dst, bool* has_null) override; - Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst, bool* has_null) override; Status read_by_rowids(const rowid_t* rowids, const size_t count, diff --git a/be/src/olap/types.h b/be/src/olap/types.h index 73a0e4c080..cb71ef2228 100644 --- a/be/src/olap/types.h +++ b/be/src/olap/types.h @@ -332,12 +332,6 @@ public: _value_type_info(std::move(value_type_info)) {} ~MapTypeInfo() override = default; - inline bool equal(const void* left, const void* right) const override { - auto l_value = reinterpret_cast<const MapValue*>(left); - auto r_value = reinterpret_cast<const MapValue*>(right); - return l_value->size() == r_value->size(); - } - int cmp(const void* left, const void* right) const override { auto l_value = reinterpret_cast<const MapValue*>(left); auto r_value = reinterpret_cast<const MapValue*>(right); @@ -352,29 +346,14 @@ public: } } - void shallow_copy(void* dest, const void* src) const override { - auto dest_value = reinterpret_cast<MapValue*>(dest); - auto src_value = reinterpret_cast<const MapValue*>(src); - dest_value->shallow_copy(src_value); - } - void deep_copy(void* dest, const void* src, MemPool* mem_pool) const override { DCHECK(false); } - void copy_object(void* dest, const void* src, MemPool* mem_pool) const override { - deep_copy(dest, src, mem_pool); - } - void direct_copy(void* dest, const void* src) const override { CHECK(false); } void direct_copy(uint8_t** base, void* dest, const void* src) const { CHECK(false); } void direct_copy_may_cut(void* dest, const void* src) const override { direct_copy(dest, src); } - Status convert_from(void* dest, const void* src, const TypeInfo* src_type, MemPool* mem_pool, - size_t variable_len = 0) const override { - return Status::Error<ErrorCode::NOT_IMPLEMENTED_ERROR>(); - } - Status from_string(void* buf, const std::string& scan_key, const int precision = 0, const int scale = 0) const override { return Status::Error<ErrorCode::NOT_IMPLEMENTED_ERROR>(); @@ -390,17 +369,8 @@ public: DCHECK(false) << "set_to_min of list is not implemented."; } - uint32_t hash_code(const void* data, uint32_t seed) const override { - auto map_value = reinterpret_cast<const MapValue*>(data); - auto size = map_value->size(); - uint32_t result = HashUtil::hash(&size, sizeof(size), seed); - result = seed * result + _key_type_info->hash_code(map_value->key_data(), seed) + - _value_type_info->hash_code(map_value->value_data(), seed); - return result; - } - // todo . is here only to need return 16 for two ptr? - const size_t size() const override { return sizeof(MapValue); } + size_t size() const override { return sizeof(MapValue); } FieldType type() const override { return OLAP_FIELD_TYPE_MAP; } @@ -421,39 +391,6 @@ public: } ~StructTypeInfo() override = default; - bool equal(const void* left, const void* right) const override { - auto l_value = reinterpret_cast<const StructValue*>(left); - auto r_value = reinterpret_cast<const StructValue*>(right); - if (l_value->size() != r_value->size()) { - return false; - } - uint32_t size = l_value->size(); - - if (!l_value->has_null() && !r_value->has_null()) { - for (size_t i = 0; i < size; ++i) { - if (!_type_infos[i]->equal(l_value->child_value(i), r_value->child_value(i))) { - return false; - } - } - } else { - for (size_t i = 0; i < size; ++i) { - if (l_value->is_null_at(i)) { - if (r_value->is_null_at(i)) { // both are null - continue; - } else { // left is null & right is not null - return false; - } - } else if (r_value->is_null_at(i)) { // left is not null & right is null - return false; - } - if (!_type_infos[i]->equal(l_value->child_value(i), r_value->child_value(i))) { - return false; - } - } - } - return true; - } - int cmp(const void* left, const void* right) const override { auto l_value = reinterpret_cast<const StructValue*>(left); auto r_value = reinterpret_cast<const StructValue*>(right); @@ -498,12 +435,6 @@ public: } } - void shallow_copy(void* dest, const void* src) const override { - auto dest_value = reinterpret_cast<StructValue*>(dest); - auto src_value = reinterpret_cast<const StructValue*>(src); - dest_value->shallow_copy(src_value); - } - void deep_copy(void* dest, const void* src, MemPool* mem_pool) const override { auto dest_value = reinterpret_cast<StructValue*>(dest); auto src_value = reinterpret_cast<const StructValue*>(src); @@ -542,10 +473,6 @@ public: } } - void copy_object(void* dest, const void* src, MemPool* mem_pool) const override { - deep_copy(dest, src, mem_pool); - } - void direct_copy(void* dest, const void* src) const override { auto dest_value = static_cast<StructValue*>(dest); auto base = reinterpret_cast<uint8_t*>(dest_value->mutable_values()); @@ -594,11 +521,6 @@ public: void direct_copy_may_cut(void* dest, const void* src) const override { direct_copy(dest, src); } - Status convert_from(void* dest, const void* src, const TypeInfo* src_type, MemPool* mem_pool, - size_t variable_len = 0) const override { - return Status::Error<ErrorCode::NOT_IMPLEMENTED_ERROR>(); - } - Status from_string(void* buf, const std::string& scan_key, const int precision = 0, const int scale = 0) const override { return Status::Error<ErrorCode::NOT_IMPLEMENTED_ERROR>(); @@ -627,21 +549,7 @@ public: DCHECK(false) << "set_to_min of list is not implemented."; } - uint32_t hash_code(const void* data, uint32_t seed) const override { - auto struct_value = reinterpret_cast<const StructValue*>(data); - auto size = struct_value->size(); - uint32_t result = HashUtil::hash(&size, sizeof(size), seed); - for (size_t i = 0; i < size; ++i) { - if (struct_value->is_null_at(i)) { - result = seed * result; - } else { - result = seed * result + _type_infos[i]->hash_code(struct_value->values()[i], seed); - } - } - return result; - } - - const size_t size() const override { return sizeof(StructValue); } + size_t size() const override { return sizeof(StructValue); } FieldType type() const override { return OLAP_FIELD_TYPE_STRUCT; } diff --git a/be/src/vec/sink/vmysql_result_writer.cpp b/be/src/vec/sink/vmysql_result_writer.cpp index 6ec225f961..849db2c58d 100644 --- a/be/src/vec/sink/vmysql_result_writer.cpp +++ b/be/src/vec/sink/vmysql_result_writer.cpp @@ -175,11 +175,11 @@ Status VMysqlResultWriter<is_binary_format>::_add_one_column( buf_ret = rows_buffer[i].push_string("NULL", strlen("NULL")); } else { if (WhichDataType(remove_nullable(sub_types[0])).is_string()) { - buf_ret = _buffer.push_string("'", 1); - buf_ret = _add_one_cell(data, j, sub_types[0], _buffer); - buf_ret = _buffer.push_string("'", 1); + buf_ret = rows_buffer[i].push_string("'", 1); + buf_ret = _add_one_cell(data, j, sub_types[0], rows_buffer[i]); + buf_ret = rows_buffer[i].push_string("'", 1); } else { - buf_ret = _add_one_cell(data, j, sub_types[0], _buffer); + buf_ret = _add_one_cell(data, j, sub_types[0], rows_buffer[i]); } } begin = false; @@ -194,14 +194,12 @@ Status VMysqlResultWriter<is_binary_format>::_add_one_column( if (0 != buf_ret) { return Status::InternalError("pack mysql buffer failed."); } - _buffer.reset(); - _buffer.open_dynamic_mode(); + rows_buffer[i].open_dynamic_mode(); std::string cell_str = map_type.to_string(*column, i); - buf_ret = _buffer.push_string(cell_str.c_str(), strlen(cell_str.c_str())); + buf_ret = rows_buffer[i].push_string(cell_str.c_str(), strlen(cell_str.c_str())); - _buffer.close_dynamic_mode(); - result->result_batch.rows[i].append(_buffer.buf(), _buffer.length()); + rows_buffer[i].close_dynamic_mode(); } } else if constexpr (type == TYPE_STRUCT) { DCHECK_GE(sub_types.size(), 1); @@ -210,40 +208,37 @@ Status VMysqlResultWriter<is_binary_format>::_add_one_column( if (0 != buf_ret) { return Status::InternalError("pack mysql buffer failed."); } - _buffer.reset(); if constexpr (is_nullable) { if (column_ptr->is_null_at(i)) { - buf_ret = _buffer.push_null(); - result->result_batch.rows[i].append(_buffer.buf(), _buffer.length()); + buf_ret = rows_buffer[i].push_null(); continue; } } - _buffer.open_dynamic_mode(); - buf_ret = _buffer.push_string("{", 1); + rows_buffer[i].open_dynamic_mode(); + buf_ret = rows_buffer[i].push_string("{", 1); bool begin = true; for (size_t j = 0; j < sub_types.size(); ++j) { if (!begin) { - buf_ret = _buffer.push_string(", ", 2); + buf_ret = rows_buffer[i].push_string(", ", 2); } const auto& data = column_struct.get_column_ptr(j); if (data->is_null_at(i)) { - buf_ret = _buffer.push_string("NULL", strlen("NULL")); + buf_ret = rows_buffer[i].push_string("NULL", strlen("NULL")); } else { if (WhichDataType(remove_nullable(sub_types[j])).is_string()) { - buf_ret = _buffer.push_string("'", 1); - buf_ret = _add_one_cell(data, i, sub_types[j], _buffer); - buf_ret = _buffer.push_string("'", 1); + buf_ret = rows_buffer[i].push_string("'", 1); + buf_ret = _add_one_cell(data, i, sub_types[j], rows_buffer[i]); + buf_ret = rows_buffer[i].push_string("'", 1); } else { - buf_ret = _add_one_cell(data, i, sub_types[j], _buffer); + buf_ret = _add_one_cell(data, i, sub_types[j], rows_buffer[i]); } } begin = false; } - buf_ret = _buffer.push_string("}", 1); - _buffer.close_dynamic_mode(); - result->result_batch.rows[i].append(_buffer.buf(), _buffer.length()); + buf_ret = rows_buffer[i].push_string("}", 1); + rows_buffer[i].close_dynamic_mode(); } } else if constexpr (type == TYPE_DECIMAL32 || type == TYPE_DECIMAL64 || type == TYPE_DECIMAL128I) { @@ -758,12 +753,12 @@ Status VMysqlResultWriter<is_binary_format>::append_block(Block& input_block) { auto& nested_type = assert_cast<const DataTypeNullable&>(*type_ptr).get_nested_type(); auto& sub_types = assert_cast<const DataTypeStruct&>(*nested_type).get_elements(); - status = _add_one_column<PrimitiveType::TYPE_STRUCT, true>(column_ptr, result, - scale, sub_types); + status = _add_one_column<PrimitiveType::TYPE_STRUCT, true>( + column_ptr, result, rows_buffer, scale, sub_types); } else { auto& sub_types = assert_cast<const DataTypeStruct&>(*type_ptr).get_elements(); - status = _add_one_column<PrimitiveType::TYPE_STRUCT, false>(column_ptr, result, - scale, sub_types); + status = _add_one_column<PrimitiveType::TYPE_STRUCT, false>( + column_ptr, result, rows_buffer, scale, sub_types); } break; } @@ -771,11 +766,11 @@ Status VMysqlResultWriter<is_binary_format>::append_block(Block& input_block) { if (type_ptr->is_nullable()) { auto& nested_type = assert_cast<const DataTypeNullable&>(*type_ptr).get_nested_type(); //for map - status = _add_one_column<PrimitiveType::TYPE_MAP, true>(column_ptr, result, scale, - {nested_type}); + status = _add_one_column<PrimitiveType::TYPE_MAP, true>( + column_ptr, result, rows_buffer, scale, {nested_type}); } else { - status = _add_one_column<PrimitiveType::TYPE_MAP, false>(column_ptr, result, scale, - {type_ptr}); + status = _add_one_column<PrimitiveType::TYPE_MAP, false>( + column_ptr, result, rows_buffer, scale, {type_ptr}); } break; } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org