eldenmoon commented on code in PR #47890: URL: https://github.com/apache/doris/pull/47890#discussion_r1974713808
########## regression-test/suites/variant_p0/insert_into_select.groovy: ########## @@ -55,4 +55,189 @@ suite("regression_test_variant_insert_into_select", "variant_type"){ qt_sql """insert into ${table_name}_var select * from ${table_name}_str""" qt_sql """insert into ${table_name}_var select * from ${table_name}_var""" qt_sql """select v["a"], v["b"], v["c"], v['d'], v['e'], v['f'] from insert_into_select_var order by k limit 215""" -} \ No newline at end of file + + // test struct/map/array/json type into variant + sql """ truncate table ${table_name}_var""" + sql "DROP TABLE IF EXISTS ${table_name}_complex" + sql """ + CREATE TABLE IF NOT EXISTS ${table_name}_complex ( + k bigint, + a array<string>, + b map<int, string>, + c struct<a:int, b:string, c:ipv4, d:decimal>, + d json, + a_s array<struct<a:int, b:string, d:datetime>>, + b_s map<string, struct<a:int, b:string, d:datetime>>, + c_s struct<a:array<int>, b:map<int, string>, c:struct<a:int, b:string, c:ipv4, d:decimal>> + ) + DUPLICATE KEY(`k`) + DISTRIBUTED BY HASH(k) BUCKETS 3 + properties("replication_num" = "1"); + """ + sql """insert into ${table_name}_complex values (1, ['a', 'b', 'c'], map(1, 'a', 2, 'b'), named_struct('a', 1, 'b', 'b', 'c', '192.0.0.1', 'd', 1.0), '{"a": 1}', + array(named_struct('a', 1, 'b', 'b', 'd', '2021-01-01 00:00:00')), map('a', named_struct('a', 1, 'b', 'b', 'd', '2021-01-01 00:00:00')), + named_struct('a', array(1, 2), 'b', map(1, 'a', 2, 'b'), 'c', named_struct('a', 1, 'b', 'b', 'c', '192.0.0.1', 'd', 1.0)) )""" + sql """ INSERT INTO ${table_name}_complex VALUES + (2, + ['a1','b1','c1'], + map(1, 'a1', 2, 'b1'), + named_struct('a', 1, 'b', 'b1', 'c', '192.168.1.1', 'd', 1.0), + '{"a": 1}', + array(named_struct('a', 1, 'b', 'b1', 'd', '2021-01-01 00:00:00')), + map('a', named_struct('a', 1, 'b', 'b1', 'd', '2021-01-01 00:00:00')), + named_struct('a', array(1, 2), 'b', map(1, 'a1', 2, 'b1'), 'c', named_struct('a', 1, 'b', 'b1', 'c', '192.168.1.1', 'd', 1.0)) + )""" + sql """ INSERT INTO ${table_name}_complex VALUES (3, + ['a1', 'b1', 'c1', 'd1', 'e1'], + map(1, 'a1', 2, 'b1', 3, 'c1', 4, 'd1'), + named_struct('a', 1, 'b', 'b1', 'c', '192.168.1.1', 'd', 1.0), + '{"x": 1, "y": "test1", "z": [1,2,3]}', + array(named_struct('a', 1, 'b', 'b1', 'd', '2021-01-01 00:00:00')), + map('a', named_struct('a', 1, 'b', 'b1', 'd', '2021-01-01 00:00:00')), + named_struct('a', [1, 2], 'b', map(1, 'a1', 2, 'b1'), 'c', named_struct('a', 1, 'b', 'b1', 'c', '192.168.1.1', 'd', 1.0)) + ), + (4, + [], + {}, + named_struct('a', 2, 'b', 'b2', 'c', '192.168.1.2', 'd', 2.0), + NULL, + [], + {}, + named_struct('a', NULL, 'b', NULL, 'c', named_struct('a', 2, 'b', 'b2', 'c', '192.168.1.2', 'd', 2.0)) + ), + (5, + ['x1', 'y1', 'z1'], + map(10, 'x1', 20, 'y1', 30, 'z1'), + named_struct('a', 3, 'b', 'b3', 'c', '192.168.1.3', 'd', 3.0), + '{"data": ["alpha", "beta"], "flag": true}', + array(named_struct('a', 3, 'b', 'b3', 'd', '2021-03-01 00:00:00')), + map('b', named_struct('a', 3, 'b', 'b3', 'd', '2021-03-01 00:00:00')), + named_struct('a', [3, 4, 5], 'b', map(5, 'c3', 6, 'd3'), 'c', named_struct('a', 3, 'b', 'b3', 'c', '192.168.1.3', 'd', 3.0)) + ), + (6, + ['m1', 'n1'], + map(50, 'm1', 60, 'n1'), + named_struct('a', 4, 'b', 'b4', 'c', '192.168.1.4', 'd', 4.0), + '{"nested": {"key": "value"}}', + NULL, + map('c', named_struct('a', 4, 'b', 'b4', 'd', '2021-04-01 00:00:00')), + named_struct('a', [4, 5, 6, 7], 'b', map(1, 'm4', 2, 'n4'), 'c', named_struct('a', 4, 'b', 'b4', 'c', '192.168.1.4', 'd', 4.0)) + ), + (7, + ['p1', 'q1', 'r1', 's1', 't1'], + map(100, 'p1', 200, 'q1', 300, 'r1', 400, 's1', 500, 't1'), + named_struct('a', 5, 'b', 'b5', 'c', '192.168.1.5', 'd', 5.0), + '{"info": "random text", "count": 5}', + array(named_struct('a', 5, 'b', 'b5', 'd', '2021-05-01 00:00:00')), + {}, + named_struct('a', [5], 'b', map(3, 'r5'), 'c', named_struct('a', 5, 'b', 'b5', 'c', '192.168.1.5', 'd', 5.0)) + ); + """ + // select the origin table + qt_sql """select * from ${table_name}_complex order by k""" + + // then insert into select from ${table_name}_complex to ${table_name}_var with key increment by 1 + sql """insert into ${table_name}_var select k, a from ${table_name}_complex""" + sql """insert into ${table_name}_var select k+7, b from ${table_name}_complex""" + sql """insert into ${table_name}_var select k+14, c from ${table_name}_complex""" + sql """insert into ${table_name}_var select k+21, d from ${table_name}_complex""" + sql """insert into ${table_name}_var select k+28, a_s from ${table_name}_complex""" + sql """insert into ${table_name}_var select k+35, b_s from ${table_name}_complex""" + sql """insert into ${table_name}_var select k+42, c_s from ${table_name}_complex""" + qt_sql """select * from ${table_name}_var order by k""" + + def load_json_data = {tn, file_name -> + // load the json data + streamLoad { + table """$tn""" + + // set http request header params + set 'read_json_by_line', 'true' + set 'jsonpaths', '[\"$.v.id\", \"$.v.type\", \"$.v.actor\", \"$.v.repo\", \"$.v.payload\", \"$.v.public\", \"$.v.created_at\"]' + set 'format', 'json' + set 'max_filter_ratio', '0.1' + file file_name // import json file + time 10000 // limit inflight 10s + + // if declared a check callback, the default check condition will ignore. + // So you must check all condition + + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + logger.info("Stream load ${file_name} result: ${result}".toString()) + def json = parseJson(result) + assertEquals("success", json.Status.toLowerCase()) + // assertEquals(json.NumberTotalRows, json.NumberLoadedRows + json.NumberUnselectedRows) + assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0) + } + } + } + + def table_name_j = "github_events_j" + sql """DROP TABLE IF EXISTS ${table_name_j}""" + sql """ + CREATE TABLE `${table_name_j}` ( + `id` BIGINT NOT NULL, Review Comment: 1. add schema change case (double write during schema change, like double_write_schema_change) 2. add literal cast like cast(cast('{"a" : 1}' as json) as variant) also, for map, struct 3. add large data set to schema_change_p2 to test stability ########## fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/check/CheckCast.java: ########## @@ -63,6 +63,9 @@ private static boolean check(DataType originalType, DataType targetType) { // variant could cast to primitive types and array return true; } + if ((originalType.isComplexType() || originalType.isJsonType()) && targetType.isVariantType()) { Review Comment: diito ########## fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java: ########## @@ -154,8 +154,8 @@ public CastExpr(Type targetType, Expr e, Void v) { Type from = getActualArgTypes(collectChildReturnTypes())[0]; Type to = getActualType(type); NullableMode nullableMode = TYPE_NULLABLE_MODE.get(Pair.of(from, to)); - // for complex type cast to jsonb we make ret is always nullable - if (from.isComplexType() && type.isJsonbType()) { + // for complex type cast to jsonb|variant we make ret is always nullable + if (from.isComplexType() && (type.isJsonbType() || type.isVariantType())) { Review Comment: what about MAP<INT, STRING> which is not valid json string -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org