This is an automated email from the ASF dual-hosted git repository. eldenmoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new b4a27ed3478 [fix](array_map) fix array_map functions avoid core (#50201) b4a27ed3478 is described below commit b4a27ed3478a8371edae9f78d12203e104f55ab0 Author: amory <wangqian...@selectdb.com> AuthorDate: Wed Apr 23 16:28:16 2025 +0800 [fix](array_map) fix array_map functions avoid core (#50201) This PR is to solve the problem of BE core caused by passing non-array type data to the array_map function. like: ``` start BE in local mode F20250418 16:45:55.593696 2398820 status.h:461] Bad cast from type:doris::vectorized::ColumnStr<unsigned int> to doris::vectorized::ColumnArray *** Check failure stack trace: *** @ 0x5591a896fbc6 google::LogMessage::SendToLog() @ 0x5591a896c610 google::LogMessage::Flush() @ 0x5591a8970409 google::LogMessageFatal::~LogMessageFatal() @ 0x5591539d1f8d doris::Status::FatalError<>() @ 0x559173e5fe56 _ZZ11assert_castIRKN5doris10vectorized11ColumnArrayEL18TypeCheckOnRelease1ERKNS1_7IColumnEET_OT1_ENKUlOS9_E_clIS8_EES4_SC_ @ 0x559173e5f1ae assert_cast<>() @ 0x55918427e98b doris::vectorized::ArrayMapFunction::execute() @ 0x5591842157c2 doris::vectorized::VLambdaFunctionCallExpr::execute() @ 0x5591842b3a65 doris::vectorized::VExprContext::execute() @ 0x5591a6427f1d doris::vectorized::FileScanner::_convert_to_output_block() @ 0x5591a6401a61 doris::vectorized::FileScanner::_get_block_wrapped() @ 0x5591a63ff8d4 doris::vectorized::FileScanner::_get_block_impl() @ 0x55918408c4ce doris::vectorized::Scanner::get_block() @ 0x55918408b037 doris::vectorized::Scanner::get_block_after_projects() @ 0x55918400adb6 doris::vectorized::ScannerScheduler::_scanner_scan() @ 0x559184012509 _ZZZZN5doris10vectorized16ScannerScheduler6submitESt10shared_ptrINS0_14ScannerContextEES2_INS0_8ScanTaskEEENK3$_1clEvENKUlvE_clEvENKUlvE_clEv @ 0x559184011dc0 _ZZZN5doris10vectorized16ScannerScheduler6submitESt10shared_ptrINS0_14ScannerContextEES2_INS0_8ScanTaskEEENK3$_1clEvENKUlvE_clEv @ 0x559184011bcf _ZSt13__invoke_implIvRZZN5doris10vectorized16ScannerScheduler6submitESt10shared_ptrINS1_14ScannerContextEES3_INS1_8ScanTaskEEENK3$_1clEvEUlvE_JEET_St14__invoke_otherOT0_DpOT1_ @ 0x559184011b0f _ZSt10__invoke_rIvRZZN5doris10vectorized16ScannerScheduler6submitESt10shared_ptrINS1_14ScannerContextEES3_INS1_8ScanTaskEEENK3$_1clEvEUlvE_JEENSt9enable_ifIX16is_invocable_r_vIT_T0_DpT1_EESC_E4typeEOSD_DpOSE_ @ 0x5591840116d6 _ZNSt17_Function_handlerIFvvEZZN5doris10vectorized16ScannerScheduler6submitESt10shared_ptrINS2_14ScannerContextEES4_INS2_8ScanTaskEEENK3$_1clEvEUlvE_E9_M_invokeERKSt9_Any_data @ 0x559153aefe1b std::function<>::operator()() @ 0x559184026187 _ZZN5doris10vectorized23SimplifiedScanScheduler16submit_scan_taskENS0_18SimplifiedScanTaskEENKUlvE_clEv @ 0x55918402613f _ZSt13__invoke_implIvRZN5doris10vectorized23SimplifiedScanScheduler16submit_scan_taskENS1_18SimplifiedScanTaskEEUlvE_JEET_St14__invoke_otherOT0_DpOT1_ @ 0x55918402607f _ZSt10__invoke_rIvRZN5doris10vectorized23SimplifiedScanScheduler16submit_scan_taskENS1_18SimplifiedScanTaskEEUlvE_JEENSt9enable_ifIX16is_invocable_r_vIT_T0_DpT1_EES7_E4typeEOS8_DpOS9_ @ 0x559184025bb6 _ZNSt17_Function_handlerIFvvEZN5doris10vectorized23SimplifiedScanScheduler16submit_scan_taskENS2_18SimplifiedScanTaskEEUlvE_E9_M_invokeERKSt9_Any_data @ 0x559153aefe1b std::function<>::operator()() @ 0x55915b47e595 doris::FunctionRunnable::run() ``` --- .../doris/analysis/LambdaFunctionCallExpr.java | 3 + .../data/datatype_p0/complex_types/test.json | 3 + .../complex_types/test_load_with_functions.groovy | 81 ++++++++++++++++++++++ 3 files changed, 87 insertions(+) diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/LambdaFunctionCallExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/LambdaFunctionCallExpr.java index 707f76f9672..500f0585dc8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/LambdaFunctionCallExpr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/LambdaFunctionCallExpr.java @@ -107,6 +107,9 @@ public class LambdaFunctionCallExpr extends FunctionCallExpr { Expr lastChild = getChild(childSize - 1); for (int i = childSize - 1; i > 0; --i) { argTypes[i] = getChild(i - 1).getType(); + if (!argTypes[i].isArrayType()) { + throw new AnalysisException("array_map function only support array type as input params"); + } this.setChild(i, getChild(i - 1)); } argTypes[0] = lastType; diff --git a/regression-test/data/datatype_p0/complex_types/test.json b/regression-test/data/datatype_p0/complex_types/test.json new file mode 100644 index 00000000000..598a193f1a0 --- /dev/null +++ b/regression-test/data/datatype_p0/complex_types/test.json @@ -0,0 +1,3 @@ +[{"arr": ["a\nb", "a\nb","a\tb","a"b","a\rb","a\bb"]}, +{"arr": ["\\A"]}, +{"arr": ["a"b", "c","d"]}] diff --git a/regression-test/suites/datatype_p0/complex_types/test_load_with_functions.groovy b/regression-test/suites/datatype_p0/complex_types/test_load_with_functions.groovy new file mode 100644 index 00000000000..7194e9ded52 --- /dev/null +++ b/regression-test/suites/datatype_p0/complex_types/test_load_with_functions.groovy @@ -0,0 +1,81 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_load_with_functions") { + sql "DROP TABLE IF EXISTS `test_table`" + sql """ + create table IF NOT EXISTS `test_table` ( + `id` int NULL, + `arr` array<text> NULL + ) ENGINE=OLAP + DUPLICATE KEY(`id`) distributed by hash(`id`) buckets 1 properties("replication_num" = "1"); + """ + + // curl -v --location-trusted -u root: -H "format:json" -H "strip_outer_array:true" -H "read_json_by_line: true" -H "group_mode: sync_mode" -H "columns:arr=ARRAY_MAP(x -> IFNULL(x, '$'), arr)" -T test.json + streamLoad { + table "test_table" + set 'strip_outer_array', 'true' + set 'read_json_by_line', 'true' + set 'group_mode', 'sync_mode' + set 'columns', 'arr=ARRAY_MAP(x -> IFNULL(x, \'$\'), arr)' + set 'format', 'json' + file "test.json" + time 60 + + // if declared a check callback, the default check condition will ignore. + // So you must check all condition + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + log.info("Stream load result: ${result}".toString()) + def json = parseJson(result) + assertEquals("fail", json.Status.toLowerCase()) + } + } + + // test array_map with non-array arg for nereids which should throw exception + // literal + + test { + sql """ + select array_map(x -> x is null, "sss"); + """ + exception "lambda argument must be array" + } + // column + sql """ insert into test_table values(1, ["a", "b", "c"]) """ + sql """ insert into test_table values(2, ["a", "b", "c"]) """ + + + test { + sql """ + select array_map(x -> x is null, id) from test_table; + """ + exception "lambda argument must be array" + } + + + test { + sql """ + select array_map(x -> x is null, arr[0]) from test_table; + """ + exception "lambda argument must be array" + } + +} + --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org