This is an automated email from the ASF dual-hosted git repository. lihaopeng pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push: new 536d8ca1ed [Bug][Vectorized] Fix insert bimmap column with nullable column (#9408) 536d8ca1ed is described below commit 536d8ca1ed2c5897859fd6d029555d1bbf8ddad2 Author: HappenLee <happen...@hotmail.com> AuthorDate: Tue May 17 14:42:20 2022 +0800 [Bug][Vectorized] Fix insert bimmap column with nullable column (#9408) Co-authored-by: lihaopeng <lihaop...@baidu.com> --- be/src/vec/sink/vtablet_sink.cpp | 215 +++++++++++++++---------------- be/src/vec/sink/vtablet_sink.h | 6 +- regression-test/suites/query/load.groovy | 22 ++++ 3 files changed, 132 insertions(+), 111 deletions(-) diff --git a/be/src/vec/sink/vtablet_sink.cpp b/be/src/vec/sink/vtablet_sink.cpp index a86ef57cf2..16edd09e77 100644 --- a/be/src/vec/sink/vtablet_sink.cpp +++ b/be/src/vec/sink/vtablet_sink.cpp @@ -393,6 +393,7 @@ Status VOlapTableSink::send(RuntimeState* state, vectorized::Block* input_block) // because of "data unqualified" return Status::EndOfFile("Encountered unqualified data, stop processing"); } + _convert_to_dest_desc_block(&block); } BlockRow block_row; @@ -474,132 +475,105 @@ Status VOlapTableSink::_validate_data(RuntimeState* state, vectorized::Block* bl block->get_by_position(i).column->convert_to_full_column_if_const(); const auto& column = block->get_by_position(i).column; - if (desc->type() == TYPE_OBJECT && column->is_nullable()) { - const auto& null_map = - vectorized::check_and_get_column<vectorized::ColumnNullable>(*column) - ->get_null_map_data(); - fmt::format_to(error_msg, "null is not allowed for bitmap column, column_name: {}; ", - desc->col_name()); + auto column_ptr = vectorized::check_and_get_column<vectorized::ColumnNullable>(*column); + auto& real_column_ptr = + column_ptr == nullptr ? column : (column_ptr->get_nested_column_ptr()); + switch (desc->type().type) { + case TYPE_CHAR: + case TYPE_VARCHAR: + case TYPE_STRING: { + const auto column_string = + assert_cast<const vectorized::ColumnString*>(real_column_ptr.get()); + + size_t limit = std::min(config::string_type_length_soft_limit_bytes, desc->type().len); for (int j = 0; j < num_rows; ++j) { if (!filter_bitmap->Get(j)) { - if (null_map[j]) { - RETURN_IF_ERROR(set_invalid_and_append_error_msg(j)); + auto str_val = column_string->get_data_at(j); + bool invalid = str_val.size > limit; + + error_msg.clear(); + if (str_val.size > desc->type().len) { + fmt::format_to(error_msg, "{}", + "the length of input is too long than schema. "); + fmt::format_to(error_msg, "column_name: {}; ", desc->col_name()); + fmt::format_to(error_msg, "input str: [{}] ", str_val.to_prefix(10)); + fmt::format_to(error_msg, "schema length: {}; ", desc->type().len); + fmt::format_to(error_msg, "actual length: {}; ", str_val.size); + } else if (str_val.size > limit) { + fmt::format_to(error_msg, "{}", + "the length of input string is too long than vec schema. "); + fmt::format_to(error_msg, "column_name: {}; ", desc->col_name()); + fmt::format_to(error_msg, "input str: [{}] ", str_val.to_prefix(10)); + fmt::format_to(error_msg, "schema length: {}; ", desc->type().len); + fmt::format_to(error_msg, "limit length: {}; ", limit); + fmt::format_to(error_msg, "actual length: {}; ", str_val.size); } - } - } - } else { - auto column_ptr = vectorized::check_and_get_column<vectorized::ColumnNullable>(*column); - auto& real_column_ptr = - column_ptr == nullptr ? column : (column_ptr->get_nested_column_ptr()); - - switch (desc->type().type) { - case TYPE_CHAR: - case TYPE_VARCHAR: - case TYPE_STRING: { - const auto column_string = - assert_cast<const vectorized::ColumnString*>(real_column_ptr.get()); - - size_t limit = - std::min(config::string_type_length_soft_limit_bytes, desc->type().len); - for (int j = 0; j < num_rows; ++j) { - if (!filter_bitmap->Get(j)) { - auto str_val = column_string->get_data_at(j); - bool invalid = str_val.size > limit; - - error_msg.clear(); - if (str_val.size > desc->type().len) { - fmt::format_to(error_msg, "{}", - "the length of input is too long than schema. "); - fmt::format_to(error_msg, "column_name: {}; ", desc->col_name()); - fmt::format_to(error_msg, "input str: [{}] ", str_val.to_prefix(10)); - fmt::format_to(error_msg, "schema length: {}; ", desc->type().len); - fmt::format_to(error_msg, "actual length: {}; ", str_val.size); - } else if (str_val.size > limit) { - fmt::format_to( - error_msg, "{}", - "the length of input string is too long than vec schema. "); - fmt::format_to(error_msg, "column_name: {}; ", desc->col_name()); - fmt::format_to(error_msg, "input str: [{}] ", str_val.to_prefix(10)); - fmt::format_to(error_msg, "schema length: {}; ", desc->type().len); - fmt::format_to(error_msg, "limit length: {}; ", limit); - fmt::format_to(error_msg, "actual length: {}; ", str_val.size); - } - if (invalid) { - RETURN_IF_ERROR(set_invalid_and_append_error_msg(j)); - } + if (invalid) { + RETURN_IF_ERROR(set_invalid_and_append_error_msg(j)); } } - break; } - case TYPE_DECIMALV2: { - auto column_decimal = const_cast< - vectorized::ColumnDecimal<vectorized::Decimal128>*>( - assert_cast<const vectorized::ColumnDecimal<vectorized::Decimal128>*>( - real_column_ptr.get())); - - for (int j = 0; j < num_rows; ++j) { - if (!filter_bitmap->Get(j)) { - auto dec_val = binary_cast<vectorized::Int128, DecimalV2Value>( - column_decimal->get_data()[j]); - error_msg.clear(); - bool invalid = false; - - if (dec_val.greater_than_scale(desc->type().scale)) { - auto code = dec_val.round(&dec_val, desc->type().scale, HALF_UP); - column_decimal->get_data()[j] = - binary_cast<DecimalV2Value, vectorized::Int128>(dec_val); - - if (code != E_DEC_OK) { - fmt::format_to(error_msg, "round one decimal failed.value={}; ", - dec_val.to_string()); - invalid = true; - } - } - if (dec_val > _max_decimalv2_val[i] || dec_val < _min_decimalv2_val[i]) { - fmt::format_to(error_msg, - "decimal value is not valid for definition, column={}", - desc->col_name()); - fmt::format_to(error_msg, ", value={}", dec_val.to_string()); - fmt::format_to(error_msg, ", precision={}, scale={}; ", - desc->type().precision, desc->type().scale); + break; + } + case TYPE_DECIMALV2: { + auto column_decimal = const_cast<vectorized::ColumnDecimal<vectorized::Decimal128>*>( + assert_cast<const vectorized::ColumnDecimal<vectorized::Decimal128>*>( + real_column_ptr.get())); + + for (int j = 0; j < num_rows; ++j) { + if (!filter_bitmap->Get(j)) { + auto dec_val = binary_cast<vectorized::Int128, DecimalV2Value>( + column_decimal->get_data()[j]); + error_msg.clear(); + bool invalid = false; + + if (dec_val.greater_than_scale(desc->type().scale)) { + auto code = dec_val.round(&dec_val, desc->type().scale, HALF_UP); + column_decimal->get_data()[j] = + binary_cast<DecimalV2Value, vectorized::Int128>(dec_val); + + if (code != E_DEC_OK) { + fmt::format_to(error_msg, "round one decimal failed.value={}; ", + dec_val.to_string()); invalid = true; } + } + if (dec_val > _max_decimalv2_val[i] || dec_val < _min_decimalv2_val[i]) { + fmt::format_to(error_msg, + "decimal value is not valid for definition, column={}", + desc->col_name()); + fmt::format_to(error_msg, ", value={}", dec_val.to_string()); + fmt::format_to(error_msg, ", precision={}, scale={}; ", + desc->type().precision, desc->type().scale); + invalid = true; + } - if (invalid) { - RETURN_IF_ERROR(set_invalid_and_append_error_msg(j)); - } + if (invalid) { + RETURN_IF_ERROR(set_invalid_and_append_error_msg(j)); } } - break; - } - default: - break; - } - - // Dispose the nullable column not match problem here, convert to nullable column - if (desc->is_nullable() && !column_ptr) { - block->get_by_position(i).column = vectorized::make_nullable(column); - block->get_by_position(i).type = - vectorized::make_nullable(block->get_by_position(i).type); } + break; + } + default: + break; + } - // Dispose the nullable column not match problem here, convert to not nullable column - if (!desc->is_nullable() && column_ptr) { - const auto& null_map = column_ptr->get_null_map_data(); - for (int j = 0; j < null_map.size(); ++j) { - fmt::format_to(error_msg, "null value for not null column, column={}; ", - desc->col_name()); - if (null_map[j] && !filter_bitmap->Get(j)) { - RETURN_IF_ERROR(set_invalid_and_append_error_msg(j)); - } + // Dispose the the column should do not contain the NULL value + // Only tow case: + // 1. column is nullable but the desc is not nullable + // 2. desc->type is BITMAP + if ((!desc->is_nullable() || desc->type() == TYPE_OBJECT) && column_ptr) { + const auto& null_map = column_ptr->get_null_map_data(); + for (int j = 0; j < null_map.size(); ++j) { + fmt::format_to(error_msg, + "null value for not null column/or bitmap column, column={}; ", + desc->col_name()); + if (null_map[j] && !filter_bitmap->Get(j)) { + RETURN_IF_ERROR(set_invalid_and_append_error_msg(j)); } - block->get_by_position(i).column = column_ptr->get_nested_column_ptr(); - block->get_by_position(i).type = - (reinterpret_cast<const vectorized::DataTypeNullable*>( - block->get_by_position(i).type.get())) - ->get_nested_type(); } } } @@ -611,5 +585,26 @@ Status VOlapTableSink::_validate_data(RuntimeState* state, vectorized::Block* bl return Status::OK(); } +void VOlapTableSink::_convert_to_dest_desc_block(doris::vectorized::Block* block) { + for (int i = 0; i < _output_tuple_desc->slots().size(); ++i) { + SlotDescriptor* desc = _output_tuple_desc->slots()[i]; + if (desc->is_nullable() != block->get_by_position(i).type->is_nullable()) { + if (desc->is_nullable()) { + block->get_by_position(i).type = + vectorized::make_nullable(block->get_by_position(i).type); + block->get_by_position(i).column = + vectorized::make_nullable(block->get_by_position(i).column); + } else { + block->get_by_position(i).type = assert_cast<const vectorized::DataTypeNullable&>( + *block->get_by_position(i).type) + .get_nested_type(); + block->get_by_position(i).column = assert_cast<const vectorized::ColumnNullable&>( + *block->get_by_position(i).column) + .get_nested_column_ptr(); + } + } + } +} + } // namespace stream_load } // namespace doris diff --git a/be/src/vec/sink/vtablet_sink.h b/be/src/vec/sink/vtablet_sink.h index 08b3f54434..65441d8069 100644 --- a/be/src/vec/sink/vtablet_sink.h +++ b/be/src/vec/sink/vtablet_sink.h @@ -102,9 +102,13 @@ private: Status _validate_data(RuntimeState* state, vectorized::Block* block, Bitmap* filter_bitmap, int* filtered_rows, bool* stop_processing); + // some output column of output expr may have different nullable property with dest slot desc + // so here need to do the convert operation + void _convert_to_dest_desc_block(vectorized::Block* block); + VOlapTablePartitionParam* _vpartition = nullptr; std::vector<vectorized::VExprContext*> _output_vexpr_ctxs; }; } // namespace stream_load -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/regression-test/suites/query/load.groovy b/regression-test/suites/query/load.groovy index 93bf46be60..75e01c8c41 100644 --- a/regression-test/suites/query/load.groovy +++ b/regression-test/suites/query/load.groovy @@ -67,6 +67,28 @@ suite("load") { } sql "insert into ${dbName}.test select * from ${dbName}.baseall where k1 <= 3" + // table for compaction + sql """ + CREATE TABLE compaction_tbl + ( + user_id LARGEINT NOT NULL, + date DATE NOT NULL, + city VARCHAR(20), + age SMALLINT, + sex TINYINT, + last_visit_date DATETIME REPLACE DEFAULT "1970-01-01 00:00:00", + last_update_date DATETIME REPLACE_IF_NOT_NULL DEFAULT "1970-01-01 00:00:00", + last_visit_date_not_null DATETIME REPLACE NOT NULL DEFAULT "1970-01-01 00:00:00", + cost BIGINT SUM DEFAULT "0", + max_dwell_time INT MAX DEFAULT "0", + min_dwell_time INT MIN DEFAULT "99999", + hll_col HLL HLL_UNION NOT NULL, + bitmap_col Bitmap BITMAP_UNION NOT NULL + ) AGGREGATE KEY(user_id, date, city, age, sex) + DISTRIBUTED BY HASH(user_id) PROPERTIES("replication_num" = "1");""" + + sql """insert into compaction_tbl values(123,"1999-10-10",'aaa',123,123,"1970-01-01 00:00:00","1970-01-01 00:00:00","1970-01-01 00:00:00",123,123,123,hll_hash(""),bitmap_from_string(""));""" + def baseall_count = sql "select count(*) from ${dbName}.baseall" assertEquals(16, baseall_count[0][0]) def test_count = sql "select count(*) from ${dbName}.test" --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org