This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push: new adfef1d790e [fix](ES Catalog)Check isArray before parse json to array (#39104) (#39275) adfef1d790e is described below commit adfef1d790e2474fabad98a68f8bceaa1ebdec73 Author: qiye <jianliang5...@gmail.com> AuthorDate: Wed Aug 14 15:21:27 2024 +0800 [fix](ES Catalog)Check isArray before parse json to array (#39104) (#39275) --- be/src/exec/es/es_scroll_parser.cpp | 25 +++++----- .../scripts/data/composite_type_array_bulk.json | 10 ++++ .../elasticsearch/scripts/es_init.sh | 58 ++++++++++++++++++++++ .../index/array_meta_composite_type_array.json | 9 ++++ .../scripts/index/es6_composite_type_array.json | 14 ++++++ .../scripts/index/es7_composite_type_array.json | 12 +++++ .../external_table_p0/es/test_es_query.groovy | 25 ++++++++++ 7 files changed, 141 insertions(+), 12 deletions(-) diff --git a/be/src/exec/es/es_scroll_parser.cpp b/be/src/exec/es/es_scroll_parser.cpp index 138bbb81bc0..22c9d3ee337 100644 --- a/be/src/exec/es/es_scroll_parser.cpp +++ b/be/src/exec/es/es_scroll_parser.cpp @@ -100,9 +100,9 @@ static const std::string ERROR_COL_DATA_IS_ARRAY = static const std::string INVALID_NULL_VALUE = "Invalid null value occurs: Non-null column `$0` contains NULL"; -#define RETURN_ERROR_IF_COL_IS_ARRAY(col, type) \ +#define RETURN_ERROR_IF_COL_IS_ARRAY(col, type, is_array) \ do { \ - if (col.IsArray()) { \ + if (col.IsArray() == is_array) { \ std::stringstream ss; \ ss << "Expected value of type: " << type_to_string(type) \ << "; but found type: " << json_type_to_string(col.GetType()) \ @@ -167,7 +167,7 @@ Status get_int_value(const rapidjson::Value& col, PrimitiveType type, void* slot return Status::OK(); } - RETURN_ERROR_IF_COL_IS_ARRAY(col, type); + RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true); RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); StringParser::ParseResult result; @@ -294,7 +294,7 @@ Status get_date_int(const rapidjson::Value& col, PrimitiveType type, bool pure_d return get_date_value_int<T, RT>(col[0], type, false, slot, time_zone); } else { // this would happened just only when `enable_docvalue_scan = false`, and field has string format date from _source - RETURN_ERROR_IF_COL_IS_ARRAY(col, type); + RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true); RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); return get_date_value_int<T, RT>(col, type, true, slot, time_zone); } @@ -322,7 +322,7 @@ Status get_float_value(const rapidjson::Value& col, PrimitiveType type, void* sl return Status::OK(); } - RETURN_ERROR_IF_COL_IS_ARRAY(col, type); + RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true); RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); StringParser::ParseResult result; @@ -351,7 +351,7 @@ Status insert_float_value(const rapidjson::Value& col, PrimitiveType type, return Status::OK(); } - RETURN_ERROR_IF_COL_IS_ARRAY(col, type); + RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true); RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); StringParser::ParseResult result; @@ -390,7 +390,7 @@ Status insert_int_value(const rapidjson::Value& col, PrimitiveType type, return Status::OK(); } - RETURN_ERROR_IF_COL_IS_ARRAY(col, type); + RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true); RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); StringParser::ParseResult result; @@ -543,7 +543,7 @@ Status ScrollParser::fill_columns(const TupleDescriptor* tuple_desc, val = col[0].GetString(); } } else { - RETURN_ERROR_IF_COL_IS_ARRAY(col, type); + RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true); if (!col.IsString()) { val = json_value_to_string(col); } else { @@ -618,7 +618,7 @@ Status ScrollParser::fill_columns(const TupleDescriptor* tuple_desc, const rapidjson::Value& str_col = is_nested_str ? col[0] : col; - RETURN_ERROR_IF_COL_IS_ARRAY(col, type); + RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true); const std::string& val = str_col.GetString(); size_t val_size = str_col.GetStringLength(); @@ -644,7 +644,7 @@ Status ScrollParser::fill_columns(const TupleDescriptor* tuple_desc, val = col[0].GetString(); } } else { - RETURN_ERROR_IF_COL_IS_ARRAY(col, type); + RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true); if (!col.IsString()) { val = json_value_to_string(col); } else { @@ -676,13 +676,14 @@ Status ScrollParser::fill_columns(const TupleDescriptor* tuple_desc, case TYPE_ARRAY: { vectorized::Array array; const auto& sub_type = tuple_desc->slots()[i]->type().children[0].type; - for (auto& sub_col : col.GetArray()) { + RETURN_ERROR_IF_COL_IS_ARRAY(col, type, false); + for (const auto& sub_col : col.GetArray()) { switch (sub_type) { case TYPE_CHAR: case TYPE_VARCHAR: case TYPE_STRING: { std::string val; - RETURN_ERROR_IF_COL_IS_ARRAY(sub_col, sub_type); + RETURN_ERROR_IF_COL_IS_ARRAY(sub_col, sub_type, true); if (!sub_col.IsString()) { val = json_value_to_string(sub_col); } else { diff --git a/docker/thirdparties/docker-compose/elasticsearch/scripts/data/composite_type_array_bulk.json b/docker/thirdparties/docker-compose/elasticsearch/scripts/data/composite_type_array_bulk.json new file mode 100755 index 00000000000..8654ab25e6f --- /dev/null +++ b/docker/thirdparties/docker-compose/elasticsearch/scripts/data/composite_type_array_bulk.json @@ -0,0 +1,10 @@ +{"name": "Andy", "sports": "soccer"} +{"name": "Betty", "sports": "pingpong ball"} +{"name": "Cindy", "sports": "武术"} +{"name": "David", "sports": ["volleyball"]} +{"name": "Emily", "sports": ["baseball", "golf", "hockey"]} +{"name": "Frank", "sports": ["rugby", "cricket", "boxing"]} +{"name": "Grace", "sports": ["table tennis", "badminton", "athletics"]} +{"name": "Henry", "sports": ["archery", "fencing", "weightlifting"]} +{"name": "Ivy", "sports": ["judo", "karate", "taekwondo"]} +{"name": "Jack", "sports": ["wrestling", "gymnastics", "surfing"]} \ No newline at end of file diff --git a/docker/thirdparties/docker-compose/elasticsearch/scripts/es_init.sh b/docker/thirdparties/docker-compose/elasticsearch/scripts/es_init.sh index 575118d8db9..258c6c83ef5 100755 --- a/docker/thirdparties/docker-compose/elasticsearch/scripts/es_init.sh +++ b/docker/thirdparties/docker-compose/elasticsearch/scripts/es_init.sh @@ -16,6 +16,30 @@ # specific language governing permissions and limitations # under the License. +generate_bulk_request() { + local index_name=$1 + local type_value=$2 + local id_prefix=$3 + local data_file=$4 + local output_file=$5 + + // clear output file + echo "" > "$output_file" + + local id=1 + while IFS= read -r line; do + if [ -n "$type_value" ]; then + echo "{\"index\": {\"_index\": \"$index_name\", \"_type\": \"$type_value\", \"_id\": \"${id_prefix}${id}\"}}" >> "$output_file" + else + echo "{\"index\": {\"_index\": \"$index_name\", \"_id\": \"${id_prefix}${id}\"}}" >> "$output_file" + fi + echo "$line" >> "$output_file" + id=$((id + 1)) + done < "$data_file" +} + +array_data_file="/mnt/scripts/data/composite_type_array_bulk.json" + # es 5 # create index test1 # shellcheck disable=SC2154 @@ -43,6 +67,14 @@ curl "http://${ES_5_HOST}:9200/test2_20220808/doc/_mapping" -H "Content-Type:app curl "http://${ES_5_HOST}:9200/test2_20220809/doc/_mapping" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/array_meta.json" # create index .hide curl "http://${ES_5_HOST}:9200/.hide" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/es6_hide.json" +# create index composite_type_array +curl "http://${ES_5_HOST}:9200/composite_type_array" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/es6_composite_type_array.json" +# put data with bulk for composite_type_array +bulk_request_file="/mnt/scripts/data/bulk_request_es5.json" +generate_bulk_request "composite_type_array" "doc" "item_" "$array_data_file" "$bulk_request_file" +curl -X POST "http://${ES_5_HOST}:9200/_bulk" --data-binary "@$bulk_request_file" -H "Content-Type: application/json" +# put _meta for composite_type_array +curl "http://${ES_5_HOST}:9200/composite_type_array/doc/_mapping" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/array_meta_composite_type_array.json" # es 6 # create index test1 @@ -70,6 +102,14 @@ curl "http://${ES_6_HOST}:9200/test2_20220808/doc/_mapping" -H "Content-Type:app curl "http://${ES_6_HOST}:9200/test2_20220809/doc/_mapping" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/array_meta.json" # create index .hide curl "http://${ES_6_HOST}:9200/.hide" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/es6_hide.json" +# create index composite_type_array +curl "http://${ES_6_HOST}:9200/composite_type_array" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/es6_composite_type_array.json" +# put data with bulk for composite_type_array +bulk_request_file="/mnt/scripts/data/bulk_request_es6.json" +generate_bulk_request "composite_type_array" "doc" "item_" "$array_data_file" "$bulk_request_file" +curl -X POST "http://${ES_6_HOST}:9200/_bulk" --data-binary "@$bulk_request_file" -H "Content-Type: application/json" +# put _meta for composite_type_array +curl "http://${ES_6_HOST}:9200/composite_type_array/doc/_mapping" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/array_meta_composite_type_array.json" # es7 # create index test1 @@ -106,6 +146,15 @@ curl "http://${ES_7_HOST}:9200/test2_20220809/_mapping" -H "Content-Type:applica # create index .hide curl "http://${ES_7_HOST}:9200/.hide" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/es7_hide.json" +# create index composite_type_array +curl "http://${ES_7_HOST}:9200/composite_type_array" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/es7_composite_type_array.json" +# put data with bulk for composite_type_array +bulk_request_file="/mnt/scripts/data/bulk_request_es7.json" +generate_bulk_request "composite_type_array" "_doc" "item_" "$array_data_file" "$bulk_request_file" +curl -X POST "http://${ES_7_HOST}:9200/_bulk" --data-binary "@$bulk_request_file" -H "Content-Type: application/json" +# put _meta for composite_type_array +curl "http://${ES_7_HOST}:9200/composite_type_array/_mapping" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/array_meta_composite_type_array.json" + # es8 # create index test1 curl "http://${ES_8_HOST}:9200/test1" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/es7_test1.json" @@ -138,3 +187,12 @@ curl "http://${ES_8_HOST}:9200/test3_20231005/_doc/1" -H "Content-Type:applicati curl "http://${ES_8_HOST}:9200/test1/_mapping" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/array_meta.json" curl "http://${ES_8_HOST}:9200/test2_20220808/_mapping" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/array_meta.json" curl "http://${ES_8_HOST}:9200/test2_20220809/_mapping" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/array_meta.json" + +# create index composite_type_array +curl "http://${ES_8_HOST}:9200/composite_type_array" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/es7_composite_type_array.json" +# put data with bulk for composite_type_array +bulk_request_file="/mnt/scripts/data/bulk_request_es8.json" +generate_bulk_request "composite_type_array" "" "item_" "$array_data_file" "$bulk_request_file" +curl -X POST "http://${ES_8_HOST}:9200/_bulk" --data-binary "@$bulk_request_file" -H "Content-Type: application/json" +# put _meta for composite_type_array +curl "http://${ES_8_HOST}:9200/composite_type_array/_mapping" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/array_meta_composite_type_array.json" \ No newline at end of file diff --git a/docker/thirdparties/docker-compose/elasticsearch/scripts/index/array_meta_composite_type_array.json b/docker/thirdparties/docker-compose/elasticsearch/scripts/index/array_meta_composite_type_array.json new file mode 100644 index 00000000000..9c55390149d --- /dev/null +++ b/docker/thirdparties/docker-compose/elasticsearch/scripts/index/array_meta_composite_type_array.json @@ -0,0 +1,9 @@ +{ + "_meta": { + "doris":{ + "array_fields":[ + "sports" + ] + } + } +} diff --git a/docker/thirdparties/docker-compose/elasticsearch/scripts/index/es6_composite_type_array.json b/docker/thirdparties/docker-compose/elasticsearch/scripts/index/es6_composite_type_array.json new file mode 100755 index 00000000000..2921628cf38 --- /dev/null +++ b/docker/thirdparties/docker-compose/elasticsearch/scripts/index/es6_composite_type_array.json @@ -0,0 +1,14 @@ +{ + "settings": { + "number_of_shards": 1, + "number_of_replicas": 0 + }, + "mappings": { + "doc": { + "properties": { + "name": { "type": "keyword" }, + "sports": { "type": "keyword", "doc_values": false} + } + } + } +} diff --git a/docker/thirdparties/docker-compose/elasticsearch/scripts/index/es7_composite_type_array.json b/docker/thirdparties/docker-compose/elasticsearch/scripts/index/es7_composite_type_array.json new file mode 100644 index 00000000000..8fdd6c88ce7 --- /dev/null +++ b/docker/thirdparties/docker-compose/elasticsearch/scripts/index/es7_composite_type_array.json @@ -0,0 +1,12 @@ +{ + "settings": { + "number_of_shards": 1, + "number_of_replicas": 0 + }, + "mappings": { + "properties": { + "name": { "type": "keyword" }, + "sports": { "type": "keyword", "doc_values": false} + } + } +} diff --git a/regression-test/suites/external_table_p0/es/test_es_query.groovy b/regression-test/suites/external_table_p0/es/test_es_query.groovy index b3b852831de..6d3c87a7455 100644 --- a/regression-test/suites/external_table_p0/es/test_es_query.groovy +++ b/regression-test/suites/external_table_p0/es/test_es_query.groovy @@ -214,6 +214,13 @@ suite("test_es_query", "p0,external,es,external_docker,external_docker_es") { order_qt_sql_5_18 """select message from test1 where not_null_or_empty(message)""" order_qt_sql_5_19 """select * from test1 where esquery(c_unsigned_long, '{"match":{"c_unsigned_long":0}}')""" order_qt_sql_5_20 """select c_person, c_user, json_extract(c_person, '\$.[0].name'), json_extract(c_user, '\$.[1].last') from test1;""" + try { + sql """select * from composite_type_array;""" + fail("Should not reach here") + } catch (Exception e) { + logger.error(e.getMessage()) + assertTrue(e.getMessage().contains("Expected value of type: ARRAY; but found type: Varchar/Char; Document slice is : \"soccer\"")) + } sql """switch test_es_query_es6""" // order_qt_sql_6_01 """show tables""" @@ -236,6 +243,12 @@ suite("test_es_query", "p0,external,es,external_docker,external_docker_es") { order_qt_sql_6_18 """select message from test1 where not_null_or_empty(message)""" order_qt_sql_6_19 """select * from test1 where esquery(c_person, '{"match":{"c_person.name":"Andy"}}')""" order_qt_sql_6_20 """select c_person, c_user, json_extract(c_person, '\$.[0].name'), json_extract(c_user, '\$.[1].last') from test1;""" + try { + sql """select * from composite_type_array;""" + fail("Should not reach here") + } catch (Exception e) { + assertTrue(e.getMessage().contains("Expected value of type: ARRAY; but found type: Varchar/Char; Document slice is : \"soccer\"")) + } List<List<String>> tables6N = sql """show tables""" boolean notContainHide = true @@ -282,6 +295,12 @@ suite("test_es_query", "p0,external,es,external_docker,external_docker_es") { order_qt_sql_7_23 """select * from test1 where level = 'debug'""" order_qt_sql_7_24 """select * from test1 where esquery(c_float, '{"match":{"c_float":1.1}}')""" order_qt_sql_7_25 """select c_person, c_user, json_extract(c_person, '\$.[0].name'), json_extract(c_user, '\$.[1].last') from test1;""" + try { + sql """select * from composite_type_array;""" + fail("Should not reach here") + } catch (Exception e) { + assertTrue(e.getMessage().contains("Expected value of type: ARRAY; but found type: Varchar/Char; Document slice is : \"soccer\"")) + } List<List<String>> tables7N = sql """show tables""" boolean notContainHide7 = true @@ -328,6 +347,12 @@ suite("test_es_query", "p0,external,es,external_docker,external_docker_es") { order_qt_sql_8_21 """select * from test1 where level = 'debug'""" order_qt_sql_8_22 """select * from test1 where esquery(c_ip, '{"match":{"c_ip":"192.168.0.1"}}')""" order_qt_sql_8_23 """select c_person, c_user, json_extract(c_person, '\$.[0].name'), json_extract(c_user, '\$.[1].last') from test1;""" + try { + sql """select * from composite_type_array;""" + fail("Should not reach here") + } catch (Exception e) { + assertTrue(e.getMessage().contains("Expected value of type: ARRAY; but found type: Varchar/Char; Document slice is : \"soccer\"")) + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org