This is an automated email from the ASF dual-hosted git repository. mrhhsg pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 09cd0cc6c3e [fix](load) avoid inserting too large string with type of agg_state (#36117) 09cd0cc6c3e is described below commit 09cd0cc6c3ecdf6a68ee04852149ab3103cbd0da Author: Jerry Hu <mrh...@gmail.com> AuthorDate: Mon Jun 17 09:55:15 2024 +0800 [fix](load) avoid inserting too large string with type of agg_state (#36117) ## Proposed changes <!--Describe your changes.--> --- be/src/vec/sink/vtablet_block_convertor.cpp | 25 +++++++--- .../data/query_p1/test_insert_limit.out | 4 ++ .../suites/query_p1/test_insert_limit.groovy | 58 ++++++++++++++++++++++ 3 files changed, 80 insertions(+), 7 deletions(-) diff --git a/be/src/vec/sink/vtablet_block_convertor.cpp b/be/src/vec/sink/vtablet_block_convertor.cpp index d93a654728d..7f7f4c76008 100644 --- a/be/src/vec/sink/vtablet_block_convertor.cpp +++ b/be/src/vec/sink/vtablet_block_convertor.cpp @@ -209,13 +209,7 @@ Status OlapTableBlockConvertor::_validate_column(RuntimeState* state, const Type return !_filter_map[row] && (null_map == nullptr || null_map[j] == 0); }; - switch (type.type) { - case TYPE_CHAR: - case TYPE_VARCHAR: - case TYPE_STRING: { - const auto column_string = - assert_cast<const vectorized::ColumnString*>(real_column_ptr.get()); - + auto string_column_checker = [&](const ColumnString* column_string) { size_t limit = config::string_type_length_soft_limit_bytes; // when type.len is negative, std::min will return overflow value, so we need to check it if (type.len > 0) { @@ -257,6 +251,16 @@ Status OlapTableBlockConvertor::_validate_column(RuntimeState* state, const Type } } } + return Status::OK(); + }; + + switch (type.type) { + case TYPE_CHAR: + case TYPE_VARCHAR: + case TYPE_STRING: { + const auto column_string = + assert_cast<const vectorized::ColumnString*>(real_column_ptr.get()); + RETURN_IF_ERROR(string_column_checker(column_string)); break; } case TYPE_JSONB: { @@ -418,6 +422,13 @@ Status OlapTableBlockConvertor::_validate_column(RuntimeState* state, const Type } break; } + case TYPE_AGG_STATE: { + auto* column_string = vectorized::check_and_get_column<ColumnString>(*real_column_ptr); + if (column_string) { + RETURN_IF_ERROR(string_column_checker(column_string)); + } + break; + } default: break; } diff --git a/regression-test/data/query_p1/test_insert_limit.out b/regression-test/data/query_p1/test_insert_limit.out new file mode 100644 index 00000000000..b3620441e4f --- /dev/null +++ b/regression-test/data/query_p1/test_insert_limit.out @@ -0,0 +1,4 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select -- +1 1310728 + diff --git a/regression-test/suites/query_p1/test_insert_limit.groovy b/regression-test/suites/query_p1/test_insert_limit.groovy new file mode 100644 index 00000000000..d2d11b1a28d --- /dev/null +++ b/regression-test/suites/query_p1/test_insert_limit.groovy @@ -0,0 +1,58 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_insert_limit") { + sql "set enable_agg_state=true" + sql "DROP TABLE IF EXISTS `tbl_test_insert_limit`" + sql """ + create table `tbl_test_insert_limit`( + k1 int null, + k2 agg_state<group_concat(string)> generic + ) + aggregate key (k1) + distributed BY hash(k1) buckets 3 + properties("replication_num" = "1"); + """ + + sql """ + insert into `tbl_test_insert_limit` values(1, group_concat_state('abcd')); + """ + + def error = "" + for (i in 1..24) { + test { + sql " insert into `tbl_test_insert_limit` select * from tbl_test_insert_limit; " + check{result, exception, startTime, endTime -> + if (exception != null) { + error = exception + } + } + } + + if (error != "") { + break + } + } + + assertTrue(error != "") + + qt_select """ + select k1, length(k2) from `tbl_test_insert_limit`; + """ + + sql "DROP TABLE IF EXISTS `tbl_test_insert_limit`" +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org