github-actions[bot] commented on code in PR #63907:
URL: https://github.com/apache/doris/pull/63907#discussion_r3326622751
##########
be/src/udf/python/python_server.py:
##########
@@ -563,9 +594,14 @@ def _cast_arrow_to_vector(arrow_array: pa.Array, vec_type:
VectorType):
Convert a pa.Array to an instance of the specified VectorType.
"""
if vec_type == VectorType.LIST:
- return arrow_array.to_pylist()
+ return [
+ convert_arrow_value_to_python(value, arrow_array.type)
+ for value in arrow_array.to_pylist()
+ ]
elif vec_type == VectorType.PANDAS_SERIES:
- return arrow_array.to_pandas()
+ return arrow_array.to_pandas().apply(
+ lambda value: convert_arrow_value_to_python(value,
arrow_array.type)
Review Comment:
This now runs a Python-level recursive conversion for every element of every
vectorized UDF argument, even when `arrow_array.type` is a primitive type that
cannot contain a nested MAP. For `pd.Series` this replaces the previous
`arrow_array.to_pandas()` fast path with `.apply(lambda ...)` over the whole
column, so existing vectorized Python UDFs on primitive columns regress even
though they do not need this fix. Please gate the recursive conversion to Arrow
types that can actually contain nested values needing normalization, and keep
the old direct `to_pandas()`/`to_pylist()` path for primitive/non-nested inputs.
##########
regression-test/suites/pythonudf_p0/test_pythonudf_nested_complex_type.groovy:
##########
@@ -0,0 +1,445 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_pythonudf_nested_complex_type") {
+ def runtime_version = getPythonUdfRuntimeVersion()
+
+ try {
+ sql """ DROP TABLE IF EXISTS test_pythonudf_nested_complex_type; """
+ sql """
+ CREATE TABLE test_pythonudf_nested_complex_type (
+ id INT,
+ array_map ARRAY<MAP<STRING, INT>>,
+ map_array_map MAP<STRING, ARRAY<MAP<STRING, INT>>>,
+ struct_nested STRUCT<
+ label: STRING,
+ maps: ARRAY<MAP<STRING, INT>>,
+ attrs: MAP<STRING, ARRAY<INT>>
+ >,
+ map_struct_nested MAP<STRING, STRUCT<
+ tag: STRING,
+ metrics: ARRAY<MAP<STRING, INT>>
+ >>
+ ) ENGINE=OLAP
+ DUPLICATE KEY(id)
+ DISTRIBUTED BY HASH(id) BUCKETS 1
+ PROPERTIES("replication_num" = "1");
+ """
+
+ sql """
+ INSERT INTO test_pythonudf_nested_complex_type VALUES
+ (
+ 1,
+ [{'a': 1, 'b': 2}, {'c': 3}],
+ {'left': [{'x': 10}], 'right': [{'y': 20}, {'z': 30}]},
+ {'row1', [{'s': 7}, {'t': 8}], {'nums': [1, 2], 'empty': []}},
+ {'first': {'tagA', [{'m': 1}, {'n': 2}]}, 'second': {'tagB',
[]}}
+ ),
+ (
+ 2,
+ [],
+ {'empty': []},
+ {'row2', [], {'none': NULL}},
+ {'empty': {'tagEmpty', []}}
+ ),
+ (
+ 3,
+ NULL,
+ NULL,
+ NULL,
+ NULL
+ );
+ """
+
+ sql """
+ DROP FUNCTION IF EXISTS py_nested_complex_scalar(
+ ARRAY<MAP<STRING, INT>>,
+ MAP<STRING, ARRAY<MAP<STRING, INT>>>,
+ STRUCT<label: STRING, maps: ARRAY<MAP<STRING, INT>>, attrs:
MAP<STRING, ARRAY<INT>>>,
+ MAP<STRING, STRUCT<tag: STRING, metrics: ARRAY<MAP<STRING,
INT>>>>
+ );
+ """
+ sql """
+ CREATE FUNCTION py_nested_complex_scalar(
+ ARRAY<MAP<STRING, INT>>,
+ MAP<STRING, ARRAY<MAP<STRING, INT>>>,
+ STRUCT<label: STRING, maps: ARRAY<MAP<STRING, INT>>, attrs:
MAP<STRING, ARRAY<INT>>>,
+ MAP<STRING, STRUCT<tag: STRING, metrics: ARRAY<MAP<STRING,
INT>>>>
+ )
+ RETURNS STRING
+ PROPERTIES (
+ "type" = "PYTHON_UDF",
+ "symbol" = "evaluate",
+ "runtime_version" = "${runtime_version}",
+ "always_nullable" = "true"
+ )
+ AS \$\$
+def format_map(m):
+ if m is None:
+ return 'NULL'
+ if not isinstance(m, dict):
+ return 'BAD_MAP:' + type(m).__name__
+ return '{' + ','.join(f'{k}:{m[k]}' for k in sorted(m)) + '}'
+
+def format_array_map(arr):
+ if arr is None:
+ return 'NULL'
+ return '[' + ','.join(format_map(item) for item in arr) + ']'
+
+def format_map_array_map(m):
+ if m is None:
+ return 'NULL'
+ if not isinstance(m, dict):
+ return 'BAD_MAP_ARRAY_MAP:' + type(m).__name__
+ return '{' + ','.join(f'{k}:{format_array_map(m[k])}' for k in sorted(m))
+ '}'
+
+def format_attrs(attrs):
+ if attrs is None:
+ return 'NULL'
+ if not isinstance(attrs, dict):
+ return 'BAD_ATTRS:' + type(attrs).__name__
+ parts = []
+ for key in sorted(attrs):
+ val = attrs[key]
+ if val is None:
+ parts.append(f'{key}:NULL')
+ else:
+ parts.append(f'{key}:[' + ','.join(str(x) for x in val) + ']')
+ return '{' + ','.join(parts) + '}'
+
+def format_struct(s):
+ if s is None:
+ return 'NULL'
+ if not isinstance(s, dict):
+ return 'BAD_STRUCT:' + type(s).__name__
+ return '(' + str(s.get('label')) + ',' + format_array_map(s.get('maps')) +
',' + format_attrs(s.get('attrs')) + ')'
+
+def format_map_struct_nested(m):
+ if m is None:
+ return 'NULL'
+ if not isinstance(m, dict):
+ return 'BAD_MAP_STRUCT:' + type(m).__name__
+ parts = []
+ for key in sorted(m):
+ val = m[key]
+ if val is None:
+ parts.append(f'{key}:NULL')
+ elif not isinstance(val, dict):
+ parts.append(f'{key}:BAD_STRUCT:' + type(val).__name__)
+ else:
+ parts.append(f'{key}:(' + str(val.get('tag')) + ',' +
format_array_map(val.get('metrics')) + ')')
+ return '{' + ','.join(parts) + '}'
+
+def evaluate(array_map, map_array_map, struct_nested, map_struct_nested):
+ return '|'.join([
+ format_array_map(array_map),
+ format_map_array_map(map_array_map),
+ format_struct(struct_nested),
+ format_map_struct_nested(map_struct_nested),
+ ])
+\$\$;
+ """
+
+ qt_scalar_constant_nested_complex """
+ SELECT py_nested_complex_scalar(
Review Comment:
This `UNION ALL` query has no `ORDER BY` and uses `qt_` rather than
`order_qt_`, so the three expected rows are not guaranteed to be returned in
the order recorded in the `.out` file. Doris regression tests require
deterministic output; please either use
`order_qt_scalar_constant_nested_complex` or add an explicit ordering column
around the union.
##########
regression-test/suites/pythonudf_p0/test_pythonudf_nested_complex_type.groovy:
##########
@@ -0,0 +1,445 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_pythonudf_nested_complex_type") {
+ def runtime_version = getPythonUdfRuntimeVersion()
+
+ try {
+ sql """ DROP TABLE IF EXISTS test_pythonudf_nested_complex_type; """
+ sql """
+ CREATE TABLE test_pythonudf_nested_complex_type (
+ id INT,
+ array_map ARRAY<MAP<STRING, INT>>,
+ map_array_map MAP<STRING, ARRAY<MAP<STRING, INT>>>,
+ struct_nested STRUCT<
+ label: STRING,
+ maps: ARRAY<MAP<STRING, INT>>,
+ attrs: MAP<STRING, ARRAY<INT>>
+ >,
+ map_struct_nested MAP<STRING, STRUCT<
+ tag: STRING,
+ metrics: ARRAY<MAP<STRING, INT>>
+ >>
+ ) ENGINE=OLAP
+ DUPLICATE KEY(id)
+ DISTRIBUTED BY HASH(id) BUCKETS 1
+ PROPERTIES("replication_num" = "1");
+ """
+
+ sql """
+ INSERT INTO test_pythonudf_nested_complex_type VALUES
+ (
+ 1,
+ [{'a': 1, 'b': 2}, {'c': 3}],
+ {'left': [{'x': 10}], 'right': [{'y': 20}, {'z': 30}]},
+ {'row1', [{'s': 7}, {'t': 8}], {'nums': [1, 2], 'empty': []}},
+ {'first': {'tagA', [{'m': 1}, {'n': 2}]}, 'second': {'tagB',
[]}}
+ ),
+ (
+ 2,
+ [],
+ {'empty': []},
+ {'row2', [], {'none': NULL}},
+ {'empty': {'tagEmpty', []}}
+ ),
+ (
+ 3,
+ NULL,
+ NULL,
+ NULL,
+ NULL
+ );
+ """
+
+ sql """
+ DROP FUNCTION IF EXISTS py_nested_complex_scalar(
+ ARRAY<MAP<STRING, INT>>,
+ MAP<STRING, ARRAY<MAP<STRING, INT>>>,
+ STRUCT<label: STRING, maps: ARRAY<MAP<STRING, INT>>, attrs:
MAP<STRING, ARRAY<INT>>>,
+ MAP<STRING, STRUCT<tag: STRING, metrics: ARRAY<MAP<STRING,
INT>>>>
+ );
+ """
+ sql """
+ CREATE FUNCTION py_nested_complex_scalar(
+ ARRAY<MAP<STRING, INT>>,
+ MAP<STRING, ARRAY<MAP<STRING, INT>>>,
+ STRUCT<label: STRING, maps: ARRAY<MAP<STRING, INT>>, attrs:
MAP<STRING, ARRAY<INT>>>,
+ MAP<STRING, STRUCT<tag: STRING, metrics: ARRAY<MAP<STRING,
INT>>>>
+ )
+ RETURNS STRING
+ PROPERTIES (
+ "type" = "PYTHON_UDF",
+ "symbol" = "evaluate",
+ "runtime_version" = "${runtime_version}",
+ "always_nullable" = "true"
+ )
+ AS \$\$
+def format_map(m):
+ if m is None:
+ return 'NULL'
+ if not isinstance(m, dict):
+ return 'BAD_MAP:' + type(m).__name__
+ return '{' + ','.join(f'{k}:{m[k]}' for k in sorted(m)) + '}'
+
+def format_array_map(arr):
+ if arr is None:
+ return 'NULL'
+ return '[' + ','.join(format_map(item) for item in arr) + ']'
+
+def format_map_array_map(m):
+ if m is None:
+ return 'NULL'
+ if not isinstance(m, dict):
+ return 'BAD_MAP_ARRAY_MAP:' + type(m).__name__
+ return '{' + ','.join(f'{k}:{format_array_map(m[k])}' for k in sorted(m))
+ '}'
+
+def format_attrs(attrs):
+ if attrs is None:
+ return 'NULL'
+ if not isinstance(attrs, dict):
+ return 'BAD_ATTRS:' + type(attrs).__name__
+ parts = []
+ for key in sorted(attrs):
+ val = attrs[key]
+ if val is None:
+ parts.append(f'{key}:NULL')
+ else:
+ parts.append(f'{key}:[' + ','.join(str(x) for x in val) + ']')
+ return '{' + ','.join(parts) + '}'
+
+def format_struct(s):
+ if s is None:
+ return 'NULL'
+ if not isinstance(s, dict):
+ return 'BAD_STRUCT:' + type(s).__name__
+ return '(' + str(s.get('label')) + ',' + format_array_map(s.get('maps')) +
',' + format_attrs(s.get('attrs')) + ')'
+
+def format_map_struct_nested(m):
+ if m is None:
+ return 'NULL'
+ if not isinstance(m, dict):
+ return 'BAD_MAP_STRUCT:' + type(m).__name__
+ parts = []
+ for key in sorted(m):
+ val = m[key]
+ if val is None:
+ parts.append(f'{key}:NULL')
+ elif not isinstance(val, dict):
+ parts.append(f'{key}:BAD_STRUCT:' + type(val).__name__)
+ else:
+ parts.append(f'{key}:(' + str(val.get('tag')) + ',' +
format_array_map(val.get('metrics')) + ')')
+ return '{' + ','.join(parts) + '}'
+
+def evaluate(array_map, map_array_map, struct_nested, map_struct_nested):
+ return '|'.join([
+ format_array_map(array_map),
+ format_map_array_map(map_array_map),
+ format_struct(struct_nested),
+ format_map_struct_nested(map_struct_nested),
+ ])
+\$\$;
+ """
+
+ qt_scalar_constant_nested_complex """
+ SELECT py_nested_complex_scalar(
+ CAST([{'a': 1, 'b': 2}, {'c': 3}] AS ARRAY<MAP<STRING, INT>>),
+ CAST({'left': [{'x': 10}], 'right': [{'y': 20}, {'z': 30}]} AS
MAP<STRING, ARRAY<MAP<STRING, INT>>>),
+ CAST({'const', [{'s': 7}], {'nums': [1, 2], 'empty': []}} AS
STRUCT<label: STRING, maps: ARRAY<MAP<STRING, INT>>, attrs: MAP<STRING,
ARRAY<INT>>>),
+ CAST({'const_key': {'constTag', [{'cm': 11}]}} AS MAP<STRING,
STRUCT<tag: STRING, metrics: ARRAY<MAP<STRING, INT>>>>)
+ )
+ UNION ALL
+ SELECT py_nested_complex_scalar(
+ CAST([] AS ARRAY<MAP<STRING, INT>>),
+ CAST({'empty': []} AS MAP<STRING, ARRAY<MAP<STRING, INT>>>),
+ CAST({'empty', [], {'none': NULL}} AS STRUCT<label: STRING,
maps: ARRAY<MAP<STRING, INT>>, attrs: MAP<STRING, ARRAY<INT>>>),
+ CAST({'empty': {'emptyTag', []}} AS MAP<STRING, STRUCT<tag:
STRING, metrics: ARRAY<MAP<STRING, INT>>>>)
+ )
+ UNION ALL
+ SELECT py_nested_complex_scalar(
+ CAST(NULL AS ARRAY<MAP<STRING, INT>>),
+ CAST(NULL AS MAP<STRING, ARRAY<MAP<STRING, INT>>>),
+ CAST(NULL AS STRUCT<label: STRING, maps: ARRAY<MAP<STRING,
INT>>, attrs: MAP<STRING, ARRAY<INT>>>),
+ CAST(NULL AS MAP<STRING, STRUCT<tag: STRING, metrics:
ARRAY<MAP<STRING, INT>>>>)
+ );
+ """
+
+ sql """
+ DROP FUNCTION IF EXISTS py_nested_complex_vector_list(
+ ARRAY<MAP<STRING, INT>>,
+ MAP<STRING, ARRAY<MAP<STRING, INT>>>,
+ STRUCT<label: STRING, maps: ARRAY<MAP<STRING, INT>>, attrs:
MAP<STRING, ARRAY<INT>>>,
+ MAP<STRING, STRUCT<tag: STRING, metrics: ARRAY<MAP<STRING,
INT>>>>
+ );
+ """
+ sql """
+ CREATE FUNCTION py_nested_complex_vector_list(
+ ARRAY<MAP<STRING, INT>>,
+ MAP<STRING, ARRAY<MAP<STRING, INT>>>,
+ STRUCT<label: STRING, maps: ARRAY<MAP<STRING, INT>>, attrs:
MAP<STRING, ARRAY<INT>>>,
+ MAP<STRING, STRUCT<tag: STRING, metrics: ARRAY<MAP<STRING,
INT>>>>
+ )
+ RETURNS STRING
+ PROPERTIES (
+ "type" = "PYTHON_UDF",
+ "symbol" = "evaluate",
+ "runtime_version" = "${runtime_version}",
+ "always_nullable" = "true"
+ )
+ AS \$\$
+def format_map(m):
+ if m is None:
+ return 'NULL'
+ if not isinstance(m, dict):
+ return 'BAD_MAP:' + type(m).__name__
+ return '{' + ','.join(f'{k}:{m[k]}' for k in sorted(m)) + '}'
+
+def format_array_map(arr):
+ if arr is None:
+ return 'NULL'
+ return '[' + ','.join(format_map(item) for item in arr) + ']'
+
+def format_map_array_map(m):
+ if m is None:
+ return 'NULL'
+ if not isinstance(m, dict):
+ return 'BAD_MAP_ARRAY_MAP:' + type(m).__name__
+ return '{' + ','.join(f'{k}:{format_array_map(m[k])}' for k in sorted(m))
+ '}'
+
+def format_attrs(attrs):
+ if attrs is None:
+ return 'NULL'
+ if not isinstance(attrs, dict):
+ return 'BAD_ATTRS:' + type(attrs).__name__
+ parts = []
+ for key in sorted(attrs):
+ val = attrs[key]
+ if val is None:
+ parts.append(f'{key}:NULL')
+ else:
+ parts.append(f'{key}:[' + ','.join(str(x) for x in val) + ']')
+ return '{' + ','.join(parts) + '}'
+
+def format_struct(s):
+ if s is None:
+ return 'NULL'
+ if not isinstance(s, dict):
+ return 'BAD_STRUCT:' + type(s).__name__
+ return '(' + str(s.get('label')) + ',' + format_array_map(s.get('maps')) +
',' + format_attrs(s.get('attrs')) + ')'
+
+def format_map_struct_nested(m):
+ if m is None:
+ return 'NULL'
+ if not isinstance(m, dict):
+ return 'BAD_MAP_STRUCT:' + type(m).__name__
+ parts = []
+ for key in sorted(m):
+ val = m[key]
+ if val is None:
+ parts.append(f'{key}:NULL')
+ elif not isinstance(val, dict):
+ parts.append(f'{key}:BAD_STRUCT:' + type(val).__name__)
+ else:
+ parts.append(f'{key}:(' + str(val.get('tag')) + ',' +
format_array_map(val.get('metrics')) + ')')
+ return '{' + ','.join(parts) + '}'
+
+def evaluate(array_maps: list, map_array_maps: list, struct_nesteds: list,
map_struct_nesteds: list):
+ result = []
+ for array_map, map_array_map, struct_nested, map_struct_nested in
zip(array_maps, map_array_maps, struct_nesteds, map_struct_nesteds):
+ result.append('|'.join([
+ format_array_map(array_map),
+ format_map_array_map(map_array_map),
+ format_struct(struct_nested),
+ format_map_struct_nested(map_struct_nested),
+ ]))
+ return result
+\$\$;
+ """
+
+ qt_vector_list_nested_complex """
+ SELECT py_nested_complex_vector_list(array_map, map_array_map,
struct_nested, map_struct_nested) AS result
+ FROM test_pythonudf_nested_complex_type
+ ORDER BY id;
+ """
+
+ sql """
+ DROP FUNCTION IF EXISTS py_nested_complex_vector_series(
+ ARRAY<MAP<STRING, INT>>,
+ MAP<STRING, ARRAY<MAP<STRING, INT>>>,
+ STRUCT<label: STRING, maps: ARRAY<MAP<STRING, INT>>, attrs:
MAP<STRING, ARRAY<INT>>>,
+ MAP<STRING, STRUCT<tag: STRING, metrics: ARRAY<MAP<STRING,
INT>>>>
+ );
+ """
+ sql """
+ CREATE FUNCTION py_nested_complex_vector_series(
+ ARRAY<MAP<STRING, INT>>,
+ MAP<STRING, ARRAY<MAP<STRING, INT>>>,
+ STRUCT<label: STRING, maps: ARRAY<MAP<STRING, INT>>, attrs:
MAP<STRING, ARRAY<INT>>>,
+ MAP<STRING, STRUCT<tag: STRING, metrics: ARRAY<MAP<STRING,
INT>>>>
+ )
+ RETURNS STRING
+ PROPERTIES (
+ "type" = "PYTHON_UDF",
+ "symbol" = "evaluate",
+ "runtime_version" = "${runtime_version}",
+ "always_nullable" = "true"
+ )
+ AS \$\$
+import pandas as pd
+
+def format_map(m):
+ if m is None:
+ return 'NULL'
+ if not isinstance(m, dict):
+ return 'BAD_MAP:' + type(m).__name__
+ return '{' + ','.join(f'{k}:{m[k]}' for k in sorted(m)) + '}'
+
+def format_array_map(arr):
+ if arr is None:
+ return 'NULL'
+ return '[' + ','.join(format_map(item) for item in arr) + ']'
+
+def format_map_array_map(m):
+ if m is None:
+ return 'NULL'
+ if not isinstance(m, dict):
+ return 'BAD_MAP_ARRAY_MAP:' + type(m).__name__
+ return '{' + ','.join(f'{k}:{format_array_map(m[k])}' for k in sorted(m))
+ '}'
+
+def format_attrs(attrs):
+ if attrs is None:
+ return 'NULL'
+ if not isinstance(attrs, dict):
+ return 'BAD_ATTRS:' + type(attrs).__name__
+ parts = []
+ for key in sorted(attrs):
+ val = attrs[key]
+ if val is None:
+ parts.append(f'{key}:NULL')
+ else:
+ parts.append(f'{key}:[' + ','.join(str(x) for x in val) + ']')
+ return '{' + ','.join(parts) + '}'
+
+def format_struct(s):
+ if s is None:
+ return 'NULL'
+ if not isinstance(s, dict):
+ return 'BAD_STRUCT:' + type(s).__name__
+ return '(' + str(s.get('label')) + ',' + format_array_map(s.get('maps')) +
',' + format_attrs(s.get('attrs')) + ')'
+
+def format_map_struct_nested(m):
+ if m is None:
+ return 'NULL'
+ if not isinstance(m, dict):
+ return 'BAD_MAP_STRUCT:' + type(m).__name__
+ parts = []
+ for key in sorted(m):
+ val = m[key]
+ if val is None:
+ parts.append(f'{key}:NULL')
+ elif not isinstance(val, dict):
+ parts.append(f'{key}:BAD_STRUCT:' + type(val).__name__)
+ else:
+ parts.append(f'{key}:(' + str(val.get('tag')) + ',' +
format_array_map(val.get('metrics')) + ')')
+ return '{' + ','.join(parts) + '}'
+
+def evaluate(array_maps: pd.Series, map_array_maps: pd.Series, struct_nesteds:
pd.Series, map_struct_nesteds: pd.Series) -> pd.Series:
+ if not all(isinstance(arg, pd.Series) for arg in [array_maps,
map_array_maps, struct_nesteds, map_struct_nesteds]):
+ return pd.Series(['BAD_VECTOR_ARGS'] * len(array_maps))
+ result = []
+ for array_map, map_array_map, struct_nested, map_struct_nested in
zip(array_maps, map_array_maps, struct_nesteds, map_struct_nesteds):
+ result.append('|'.join([
+ format_array_map(array_map),
+ format_map_array_map(map_array_map),
+ format_struct(struct_nested),
+ format_map_struct_nested(map_struct_nested),
+ ]))
+ return pd.Series(result)
+\$\$;
+ """
+
+ qt_vector_series_nested_complex """
+ SELECT py_nested_complex_vector_series(array_map, map_array_map,
struct_nested, map_struct_nested) AS result
+ FROM test_pythonudf_nested_complex_type
+ ORDER BY id;
+ """
+
+ sql """
+ DROP FUNCTION IF EXISTS py_nested_complex_vector_mixed(
+ INT,
+ MAP<STRING, STRUCT<tag: STRING, metrics: ARRAY<MAP<STRING,
INT>>>>
+ );
+ """
+ sql """
+ CREATE FUNCTION py_nested_complex_vector_mixed(
+ INT,
+ MAP<STRING, STRUCT<tag: STRING, metrics: ARRAY<MAP<STRING,
INT>>>>
+ )
+ RETURNS STRING
+ PROPERTIES (
+ "type" = "PYTHON_UDF",
+ "symbol" = "evaluate",
+ "runtime_version" = "${runtime_version}",
+ "always_nullable" = "true"
+ )
+ AS \$\$
+import pandas as pd
+
+def format_map(m):
+ if m is None:
+ return 'NULL'
+ if not isinstance(m, dict):
+ return 'BAD_MAP:' + type(m).__name__
+ return '{' + ','.join(f'{k}:{m[k]}' for k in sorted(m)) + '}'
+
+def format_array_map(arr):
+ if arr is None:
+ return 'NULL'
+ return '[' + ','.join(format_map(item) for item in arr) + ']'
+
+def format_map_struct_nested(m):
+ if m is None:
+ return 'NULL'
+ if not isinstance(m, dict):
+ return 'BAD_MIXED_SCALAR:' + type(m).__name__
+ parts = []
+ for key in sorted(m):
+ val = m[key]
+ if val is None:
+ parts.append(f'{key}:NULL')
+ elif not isinstance(val, dict):
+ parts.append(f'{key}:BAD_STRUCT:' + type(val).__name__)
+ else:
+ parts.append(f'{key}:(' + str(val.get('tag')) + ',' +
format_array_map(val.get('metrics')) + ')')
+ return '{' + ','.join(parts) + '}'
+
+def evaluate(ids: pd.Series, mixed_map_struct_nested) -> pd.Series:
+ if not isinstance(ids, pd.Series):
+ return pd.Series(['BAD_VECTOR_ARG'])
+ formatted = format_map_struct_nested(mixed_map_struct_nested)
+ return pd.Series([str(id_value) + '|' + formatted for id_value in ids])
+\$\$;
+ """
+
+ qt_vector_mixed_scalar_nested_complex """
+ SELECT py_nested_complex_vector_mixed(id, map_struct_nested) AS
result
+ FROM test_pythonudf_nested_complex_type
+ ORDER BY id;
+ """
+ } finally {
+ try_sql("DROP TABLE IF EXISTS test_pythonudf_nested_complex_type;")
+ }
Review Comment:
The regression-test standard for Doris is to drop tables before use, not
after, so the environment remains available for debugging when a test fails.
This test already drops the table at the beginning; please remove the `finally`
cleanup that drops it again after execution.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]