This is an automated email from the ASF dual-hosted git repository. xuyang pushed a commit to branch branch-1.2-lts in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-1.2-lts by this push: new 3e14c984f1c [branch-1.2](bug) fix unexpected be core in string search function (#31411) 3e14c984f1c is described below commit 3e14c984f1c0fac89aae2f90654bb87b0cd51843 Author: xy720 <22125576+xy...@users.noreply.github.com> AuthorDate: Mon Feb 26 20:18:23 2024 +0800 [branch-1.2](bug) fix unexpected be core in string search function (#31411) cherry-pick #31312 --- .../functions/functions_multi_string_position.cpp | 15 +++++++------ .../functions/functions_multi_string_search.cpp | 17 +++++++------- .../apache/doris/analysis/FunctionCallExpr.java | 4 ++++ .../java/org/apache/doris/catalog/FunctionSet.java | 6 +++++ .../test_multi_string_position.groovy | 12 ++++++++++ .../test_multi_string_search.groovy | 26 +++++++++++++++++++++- 6 files changed, 64 insertions(+), 16 deletions(-) diff --git a/be/src/vec/functions/functions_multi_string_position.cpp b/be/src/vec/functions/functions_multi_string_position.cpp index 69e84ec4b3f..27f29776eaf 100644 --- a/be/src/vec/functions/functions_multi_string_position.cpp +++ b/be/src/vec/functions/functions_multi_string_position.cpp @@ -55,26 +55,22 @@ public: Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) override { auto haystack_column = block.get_by_position(arguments[0]).column; - auto haystack_ptr = haystack_column; - auto needles_column = block.get_by_position(arguments[1]).column; - auto needles_ptr = needles_column; bool haystack_nullable = false; bool needles_nullable = false; if (haystack_column->is_nullable()) { - haystack_ptr = check_and_get_column<ColumnNullable>(haystack_column.get()) - ->get_nested_column_ptr(); haystack_nullable = true; } if (needles_column->is_nullable()) { - needles_ptr = check_and_get_column<ColumnNullable>(needles_column.get()) - ->get_nested_column_ptr(); needles_nullable = true; } + auto haystack_ptr = remove_nullable(haystack_column); + auto needles_ptr = remove_nullable(needles_column); + const ColumnString* col_haystack_vector = check_and_get_column<ColumnString>(&*haystack_ptr); const ColumnConst* col_haystack_const = @@ -85,6 +81,11 @@ public: const ColumnConst* col_needles_const = check_and_get_column_const<ColumnArray>(needles_ptr.get()); + if (!col_needles_const && !col_needles_vector) + return Status::InvalidArgument( + "function '{}' encountered unsupported needles column, found {}", name, + needles_column->get_name()); + if (col_haystack_const && col_needles_vector) { return Status::InvalidArgument( "function '{}' doesn't support search with non-constant needles " diff --git a/be/src/vec/functions/functions_multi_string_search.cpp b/be/src/vec/functions/functions_multi_string_search.cpp index 7a3d842cfbe..04c73375cad 100644 --- a/be/src/vec/functions/functions_multi_string_search.cpp +++ b/be/src/vec/functions/functions_multi_string_search.cpp @@ -58,26 +58,22 @@ public: Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) override { auto haystack_column = block.get_by_position(arguments[0]).column; - auto haystack_ptr = haystack_column; - auto needles_column = block.get_by_position(arguments[1]).column; - auto needles_ptr = needles_column; bool haystack_nullable = false; bool needles_nullable = false; if (haystack_column->is_nullable()) { - haystack_ptr = check_and_get_column<ColumnNullable>(haystack_column.get()) - ->get_nested_column_ptr(); haystack_nullable = true; } if (needles_column->is_nullable()) { - needles_ptr = check_and_get_column<ColumnNullable>(needles_column.get()) - ->get_nested_column_ptr(); needles_nullable = true; } + auto haystack_ptr = remove_nullable(haystack_column); + auto needles_ptr = remove_nullable(needles_column); + const ColumnString* col_haystack_vector = check_and_get_column<ColumnString>(&*haystack_ptr); const ColumnConst* col_haystack_const = @@ -88,6 +84,11 @@ public: const ColumnConst* col_needles_const = check_and_get_column_const<ColumnArray>(needles_ptr.get()); + if (!col_needles_const && !col_needles_vector) + return Status::InvalidArgument( + "function '{}' encountered unsupported needles column, found {}", name, + needles_column->get_name()); + if (col_haystack_const && col_needles_vector) return Status::InvalidArgument( "function '{}' doesn't support search with non-constant needles " @@ -323,4 +324,4 @@ void register_function_multi_string_search(SimpleFunctionFactory& factory) { factory.register_function<FunctionMultiMatchAny>(); } -} // namespace doris::vectorized \ No newline at end of file +} // namespace doris::vectorized diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java index 45ae5751025..568d6e32de0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java @@ -78,6 +78,10 @@ public class FunctionCallExpr extends Expr { .add("round").add("round_bankers").add("ceil").add("floor") .add("truncate").add("dround").add("dceil").add("dfloor").build(); + public static final ImmutableSet<String> STRING_SEARCH_FUNCTION_SET = new ImmutableSortedSet.Builder( + String.CASE_INSENSITIVE_ORDER) + .add("multi_search_all_positions").add("multi_match_any").build(); + static { java.util.function.BiFunction<ArrayList<Expr>, Type, Type> sumRule = (children, returnType) -> { Preconditions.checkArgument(children != null && children.size() > 0); diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java index af5600078eb..af27685bce4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java @@ -1290,6 +1290,12 @@ public class FunctionSet<T> { return false; } } + if (FunctionCallExpr.STRING_SEARCH_FUNCTION_SET.contains(desc.functionName())) { + if (descArgTypes[1].isStringType() && candicateArgTypes[1].isArrayType()) { + // The needles arg of search functions should not be allowed to cast from string. + return false; + } + } // If set `roundPreciseDecimalV2Value`, only use decimalv3 as target type to execute round function if (ConnectContext.get() != null && ConnectContext.get().getSessionVariable().roundPreciseDecimalV2Value diff --git a/regression-test/suites/query_p0/sql_functions/search_functions/test_multi_string_position.groovy b/regression-test/suites/query_p0/sql_functions/search_functions/test_multi_string_position.groovy index 28cc08efb88..86d207b9d74 100644 --- a/regression-test/suites/query_p0/sql_functions/search_functions/test_multi_string_position.groovy +++ b/regression-test/suites/query_p0/sql_functions/search_functions/test_multi_string_position.groovy @@ -28,4 +28,16 @@ suite("test_multi_string_position") { qt_select "select multi_search_all_positions('jnckhtjqwycyihuejibqmddrdxe', ['tajzx', 'vuddoylclxatcjvinusdwt', 'spxkhxvzsljkmnzpeubszjnhqczavgtqopxn', 'ckhtjqwycyi', 'xlbfzdxspldoes', 'u', 'czosfebeznt', 'gzhabdsuyreisxvyfrfrkq', 'yihuejibqmd', 'jqwycyihuejibqm', 'cfbvprgzx', 'hxu', 'vxbhrfpzacgd', 'afoaij', 'htjqwycyihu', 'httzbskqd'])" qt_select "select multi_search_all_positions('dzejajvpoojdkqbnayahygidyrjmb', ['khwxxvtnqhobbvwgwkpusjlhlzifiuclycml', 'nzvuhtwdaivo', 'dkqbnayahygidyr', 'jajvpoo', 'j', 'wdtbvwmeqgyvetu', 'kqbn', 'idyrjmb', 'tsnxuxevsxrxpgpfdgrkhwqpkse', '', 'efsdgzuefhdzkmquxu', 'zejajvpoojdkqbnayahyg', 'ugwfuighbygrxyctop', 'fcbxzbdugc', 'dxmzzrcplob', 'ejaj', 'wmmupyxrylvawsyfccluiiene', 'ohzmsqhpzbafvbzqwzftbvftei'])" qt_select "select multi_search_all_positions('ffaujlverosspbzaqefjzql', ['lvero', 'erossp', 'f', 'ujlverosspbz', 'btfimgklzzxlbkbuqyrmnud', 'osspb', 'muqexvtjuaar', 'f', 'bzaq', 'lprihswhwkdhqciqhfaowarn', 'ffaujlve', 'uhbbjrqjb', 'jlver', 'umucyhbbu', 'pjthtzmgxhvpbdphesnnztuu', 'xfqhfdfsbbazactpastzvzqudgk', 'lvovjfoatc', 'z', 'givejzhoqsd', ''])" + + try { + sql "select multi_search_all_positions('ffaujlverosspbzaqefjzql', 'lvero, erossp, f, ujlverosspbz, btfimgklzzxlbkbuqyrmnud, osspb, muqexvtjuaar, f, bzaq, lprihswhwkdhqciqhfaowarn, ffaujlve, uhbbjrqjb, jlver, umucyhbbu, pjthtzmgxhvpbdphesnnztuu, xfqhfdfsbbazactpastzvzqudgk, lvovjfoatc, z, givejzhoqsd')" + } catch (Exception ex) { + assert("${ex}".contains("errCode = 2, detailMessage = No matching function with signature: multi_search_all_positions")) + } + + try { + sql "select multi_search_all_positions('ffaujlverosspbzaqefjzql', '[lvero, erossp, f, ujlverosspbz, btfimgklzzxlbkbuqyrmnud, osspb, muqexvtjuaar, f, bzaq, lprihswhwkdhqciqhfaowarn, ffaujlve, uhbbjrqjb, jlver, umucyhbbu, pjthtzmgxhvpbdphesnnztuu, xfqhfdfsbbazactpastzvzqudgk, lvovjfoatc, z, givejzhoqsd]')" + } catch (Exception ex) { + assert("${ex}".contains("errCode = 2, detailMessage = No matching function with signature: multi_search_all_positions")) + } } diff --git a/regression-test/suites/query_p0/sql_functions/search_functions/test_multi_string_search.groovy b/regression-test/suites/query_p0/sql_functions/search_functions/test_multi_string_search.groovy index 5a3229ce361..3a0a9b061fe 100644 --- a/regression-test/suites/query_p0/sql_functions/search_functions/test_multi_string_search.groovy +++ b/regression-test/suites/query_p0/sql_functions/search_functions/test_multi_string_search.groovy @@ -71,4 +71,28 @@ suite("test_multi_string_search") { qt_select "select multi_match_any('ldrzgttlqaphekkkdukgngl', ['gttlqaphekkkdukgn', 'ekkkd', 'gttlqaphe', 'qaphek', 'h', 'kdu', 'he', 'phek', '', 'drzgttlqaphekkkd'])" qt_select "select multi_match_any('ololo', ['ololo', 'ololo', 'ololo'])" qt_select "select multi_match_any('khljxzxlpcrxpkrfybbfk', ['k'])" -} \ No newline at end of file + + try { + sql "select multi_match_any(content, 'hello') from ${table_name} order by col1" + } catch (Exception ex) { + assert("${ex}".contains("errCode = 2, detailMessage = No matching function with signature: multi_match_any")) + } + + try { + sql "select multi_match_any(content, 'hello, !, world, Hello, World') from ${table_name} order by col1" + } catch (Exception ex) { + assert("${ex}".contains("errCode = 2, detailMessage = No matching function with signature: multi_match_any")) + } + + try { + sql "select multi_match_any(content, '[hello]') from ${table_name} order by col1" + } catch (Exception ex) { + assert("${ex}".contains("errCode = 2, detailMessage = No matching function with signature: multi_match_any")) + } + + try { + sql "select multi_match_any(content, '[hello, !, world, Hello, World]') from ${table_name} order by col1" + } catch (Exception ex) { + assert("${ex}".contains("errCode = 2, detailMessage = No matching function with signature: multi_match_any")) + } +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org