This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new d2e5c7045c9 [feature](functions) Impl func xpath_string (#49262) d2e5c7045c9 is described below commit d2e5c7045c9c2622fa06fd6cf1b0571d2102b85d Author: Socrates <suyit...@selectdb.com> AuthorDate: Wed Apr 2 21:05:59 2025 +0800 [feature](functions) Impl func xpath_string (#49262) ### What problem does this PR solve? ### Release note Add xpath_string Function #### Syntax ```sql VARCHAR xpath_string(VARCHAR xml_string, VARCHAR xpath_expr) ``` #### Arguments - `xml_string`: A string containing valid XML content - `xpath_expr`: A valid XPath expression string #### Return Value - Returns VARCHAR type - Returns NULL if either argument is NULL - Returns empty string if XPath doesn't match any nodes #### Examples ```sql -- Basic node value extraction SELECT xpath_string('<a>123</a>', '/a'); -- Returns: '123' -- Nested element extraction SELECT xpath_string('<a><b>123</b></a>', '/a/b'); -- Returns: '123' -- Using attributes SELECT xpath_string('<a><b id="1">123</b></a>', '//b[@id="1"]'); -- Returns: '123' -- Using position predicates SELECT xpath_string('<a><b>1</b><b>2</b></a>', '/a/b[2]'); -- Returns: '2' -- Handling CDATA and comments SELECT xpath_string('<a><![CDATA[123]]></a>', '/a'); -- Returns: '123' SELECT xpath_string('<a><!-- comment -->123</a>', '/a'); -- Returns: '123' ``` --- be/cmake/thirdparty.cmake | 3 + be/src/vec/functions/function_string.cpp | 1 + be/src/vec/functions/function_string.h | 136 +++++++++++++++++++ be/test/vec/function/function_string_test.cpp | 51 +++++++ .../doris/catalog/BuiltinScalarFunctions.java | 2 + .../expressions/functions/scalar/XpathString.java | 72 ++++++++++ .../expressions/visitor/ScalarFunctionVisitor.java | 5 + .../string_functions/test_xpath_string.out | Bin 0 -> 1850 bytes .../string_functions/test_xpath_string.groovy | 149 +++++++++++++++++++++ 9 files changed, 419 insertions(+) diff --git a/be/cmake/thirdparty.cmake b/be/cmake/thirdparty.cmake index 1250e8ab1f5..b3d9202d5b0 100644 --- a/be/cmake/thirdparty.cmake +++ b/be/cmake/thirdparty.cmake @@ -176,6 +176,9 @@ add_thirdparty(icuuc LIB64) add_thirdparty(icui18n LIB64) add_thirdparty(icudata LIB64) + +add_thirdparty(pugixml LIB64) + if (BUILD_FAISS) add_thirdparty(openblas LIB64) add_thirdparty(faiss LIB64) diff --git a/be/src/vec/functions/function_string.cpp b/be/src/vec/functions/function_string.cpp index 1d292b038d3..ca4b20c8c76 100644 --- a/be/src/vec/functions/function_string.cpp +++ b/be/src/vec/functions/function_string.cpp @@ -1281,6 +1281,7 @@ void register_function_string(SimpleFunctionFactory& factory) { factory.register_function<FunctionOverlay>(); factory.register_function<FunctionStrcmp>(); factory.register_function<FunctionNgramSearch>(); + factory.register_function<FunctionXPathString>(); factory.register_alias(FunctionLeft::name, "strleft"); factory.register_alias(FunctionRight::name, "strright"); diff --git a/be/src/vec/functions/function_string.h b/be/src/vec/functions/function_string.h index d2ae6b6f9e2..fb18848c5b6 100644 --- a/be/src/vec/functions/function_string.h +++ b/be/src/vec/functions/function_string.h @@ -17,6 +17,7 @@ #pragma once +#include <glog/logging.h> #include <sys/types.h> #include <algorithm> @@ -83,6 +84,7 @@ #include <string_view> #include "exprs/math_functions.h" +#include "pugixml.hpp" #include "udf/udf.h" #include "util/md5.h" #include "util/simd/vstring_function.h" @@ -4588,4 +4590,138 @@ private: } }; +/// xpath_string(xml, xpath) -> String +/// Returns the text content of the first node that matches the XPath expression. +/// Returns NULL if either xml or xpath is NULL. +/// Returns empty string if the XPath expression matches no nodes. +/// The text content includes the node and all its descendants. +/// Example: +/// xpath_string('<a><b>b1</b><b>b2</b></a>', '/a/b[1]') = 'b1' +/// xpath_string('<a><b>b1</b><b>b2</b></a>', '/a/b[2]') = 'b2' +/// xpath_string('<a><b>b1</b><b>b2</b></a>', '/a/c') = '' +/// xpath_string('invalid xml', '/a/b[1]') = NULL +/// xpath_string(NULL, '/a/b[1]') = NULL +/// xpath_string('<a><b>b1</b><b>b2</b></a>', NULL) = NULL +class FunctionXPathString : public IFunction { +public: + static constexpr auto name = "xpath_string"; + static FunctionPtr create() { return std::make_shared<FunctionXPathString>(); } + String get_name() const override { return name; } + size_t get_number_of_arguments() const override { return 2; } + DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { + return make_nullable(std::make_shared<DataTypeString>()); + } + + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + uint32_t result, size_t input_rows_count) const override { + CHECK_EQ(arguments.size(), 2); + auto col_res = ColumnNullable::create(ColumnString::create(), ColumnUInt8::create()); + const auto& [left_col, left_const] = + unpack_if_const(block.get_by_position(arguments[0]).column); + const auto& [right_col, right_const] = + unpack_if_const(block.get_by_position(arguments[1]).column); + const auto& xml_col = *assert_cast<const ColumnString*>(left_col.get()); + const auto& xpath_col = *assert_cast<const ColumnString*>(right_col.get()); + + Status status; + if (left_const && right_const) { + status = execute_vector<true, true>(input_rows_count, xml_col, xpath_col, *col_res); + } else if (left_const) { + status = execute_vector<true, false>(input_rows_count, xml_col, xpath_col, *col_res); + } else if (right_const) { + status = execute_vector<false, true>(input_rows_count, xml_col, xpath_col, *col_res); + } else { + status = execute_vector<false, false>(input_rows_count, xml_col, xpath_col, *col_res); + } + if (!status.ok()) { + return status; + } + + block.get_by_position(result).column = std::move(col_res); + return Status::OK(); + } + +private: + // Build the text of the node and all its children. + static std::string get_text(const pugi::xml_node& node) { + std::string result; + build_text(node, result); + return result; + } + + static void build_text(const pugi::xml_node& node, std::string& builder) { + if (node.type() == pugi::node_pcdata || node.type() == pugi::node_cdata) { + builder += node.value(); + } + for (pugi::xml_node child : node.children()) { + build_text(child, builder); + } + } + + static Status parse_xml(const StringRef& xml_str, pugi::xml_document& xml_doc) { + pugi::xml_parse_result result = xml_doc.load_buffer(xml_str.data, xml_str.size); + if (!result) { + return Status::InvalidArgument("Function {} failed to parse XML string: {}", name, + result.description()); + } + return Status::OK(); + } + + template <bool left_const, bool right_const> + static Status execute_vector(const size_t input_rows_count, const ColumnString& xml_col, + const ColumnString& xpath_col, ColumnNullable& res_col) { + pugi::xml_document xml_doc; + StringRef xpath_str; + // first check right_const, because we want to check empty input first + if constexpr (right_const) { + xpath_str = xpath_col.get_data_at(0); + if (xpath_str.empty()) { + // should return null if xpath_str is empty + res_col.insert_many_defaults(input_rows_count); + return Status::OK(); + } + } + if constexpr (left_const) { + auto xml_str = xml_col.get_data_at(0); + if (xml_str.empty()) { + // should return null if xml_str is empty + res_col.insert_many_defaults(input_rows_count); + return Status::OK(); + } + RETURN_IF_ERROR(parse_xml(xml_str, xml_doc)); + } + + for (size_t i = 0; i < input_rows_count; ++i) { + if constexpr (!right_const) { + xpath_str = xpath_col.get_data_at(i); + if (xpath_str.empty()) { + // should return null if xpath_str is empty + res_col.insert_default(); + continue; + } + } + if constexpr (!left_const) { + auto xml_str = xml_col.get_data_at(i); + if (xml_str.empty()) { + // should return null if xml_str is empty + res_col.insert_default(); + continue; + } + RETURN_IF_ERROR(parse_xml(xml_str, xml_doc)); + } + // NOTE!!!: don't use to_string_view(), because xpath_str maybe not null-terminated + pugi::xpath_node node = xml_doc.select_node(xpath_str.to_string().c_str()); + if (!node) { + // should return empty string if not found + auto empty_str = std::string(""); + res_col.insert_data(empty_str.data(), empty_str.size()); + continue; + } + auto text = get_text(node.node()); + res_col.insert_data(text.data(), text.size()); + } + return Status::OK(); + } +}; + } // namespace doris::vectorized diff --git a/be/test/vec/function/function_string_test.cpp b/be/test/vec/function/function_string_test.cpp index 45a29fbfc05..28d83a4c1b1 100644 --- a/be/test/vec/function/function_string_test.cpp +++ b/be/test/vec/function/function_string_test.cpp @@ -3374,4 +3374,55 @@ TEST(function_string_test, function_rpad_test) { check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, data_set); } +TEST(function_string_test, function_xpath_string_test) { + std::string func_name = "xpath_string"; + BaseInputTypeSet input_types = {TypeIndex::String, TypeIndex::String}; + + DataSet data_set = { + {{std::string("<a>123</a>"), std::string("/a")}, std::string("123")}, + {{std::string("<a><b>123</b></a>"), std::string("/a/b")}, std::string("123")}, + {{std::string("<a><b>123</b><c>456</c></a>"), std::string("/a/c")}, std::string("456")}, + {{std::string("<a><b>123</b><c>456</c></a>"), std::string("/a/d")}, std::string("")}, + {{std::string("<a><b>123</b><b>456</b></a>"), std::string("/a/b[1]")}, + std::string("123")}, + {{std::string("<a><b>123</b><b>456</b></a>"), std::string("/a/b[2]")}, + std::string("456")}, + {{std::string("<a><b>123</b><b>456</b></a>"), std::string("/a/b[3]")}, std::string("")}, + {{std::string("<a><b attr='val'>123</b></a>"), std::string("/a/b[@attr]")}, + std::string("123")}, + {{std::string("<a><b attr='val'>123</b></a>"), std::string("/a/b[@attr='val']")}, + std::string("123")}, + {{std::string("<a><b attr='val'>123</b></a>"), std::string("/a/b[@attr='wrong']")}, + std::string("")}, + {{std::string("<a><!-- comment -->123</a>"), std::string("/a")}, std::string("123")}, + {{std::string("<a><![CDATA[123]]></a>"), std::string("/a")}, std::string("123")}, + {{std::string("<a>123<b>456</b>789</a>"), std::string("/a")}, std::string("123456789")}, + {{std::string("<a> 123 </a>"), std::string("/a")}, std::string(" 123 ")}, + {{std::string("<a></a>"), std::string("/a")}, std::string("")}, + {{std::string("<a/>"), std::string("/a")}, std::string("")}, + {{std::string("<a>123</a>"), std::string("")}, Null()}, + {{std::string(""), std::string("/a")}, Null()}, + {{Null(), std::string("/a")}, Null()}, + {{std::string("<a>123</a>"), Null()}, Null()}, + {{std::string("<book><title>Intro to Hive</title><author>John " + "Doe</author><publisher>Tech Press</publisher></book>"), + std::string("//title/text()")}, + std::string("Intro to Hive")}, + {{std::string("<book><title>Intro to Hive</title><author>John " + "Doe</author><publisher>Tech Press</publisher></book>"), + std::string("//author/text()")}, + std::string("John Doe")}, + {{std::string("<book><title>Intro to Hive</title><author>John " + "Doe</author><publisher>Tech Press</publisher></book>"), + std::string("//publisher/text()")}, + std::string("Tech Press")}, + {{std::string("<book><title>Intro to Hive</title><author>John " + "Doe</author><publisher>Tech Press</publisher></book>"), + std::string("/book")}, + std::string("Intro to HiveJohn DoeTech Press")}, + {{Null(), Null()}, Null()}}; + + check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, data_set); +} + } // namespace doris::vectorized diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java index c7e39967c0b..4248e5ef907 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java @@ -476,6 +476,7 @@ import org.apache.doris.nereids.trees.expressions.functions.scalar.WeeksDiff; import org.apache.doris.nereids.trees.expressions.functions.scalar.WeeksSub; import org.apache.doris.nereids.trees.expressions.functions.scalar.WidthBucket; import org.apache.doris.nereids.trees.expressions.functions.scalar.Xor; +import org.apache.doris.nereids.trees.expressions.functions.scalar.XpathString; import org.apache.doris.nereids.trees.expressions.functions.scalar.XxHash32; import org.apache.doris.nereids.trees.expressions.functions.scalar.XxHash64; import org.apache.doris.nereids.trees.expressions.functions.scalar.Year; @@ -976,6 +977,7 @@ public class BuiltinScalarFunctions implements FunctionHelper { scalar(XxHash32.class, "xxhash_32"), scalar(XxHash64.class, "xxhash_64"), scalar(Xor.class, "xor"), + scalar(XpathString.class, "xpath_string"), scalar(Year.class, "year"), scalar(YearCeil.class, "year_ceil"), scalar(YearFloor.class, "year_floor"), diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/XpathString.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/XpathString.java new file mode 100644 index 00000000000..734af357d71 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/XpathString.java @@ -0,0 +1,72 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.expressions.functions.scalar; + +import org.apache.doris.catalog.FunctionSignature; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable; +import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; +import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression; +import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; +import org.apache.doris.nereids.types.StringType; +import org.apache.doris.nereids.types.VarcharType; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; + +import java.util.List; + +/** + * ScalarFunction 'xpath_string'. + */ +public class XpathString extends ScalarFunction + implements BinaryExpression, ExplicitlyCastableSignature, AlwaysNullable { + + public static final List<FunctionSignature> SIGNATURES = ImmutableList.of( + FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT) + .args(VarcharType.SYSTEM_DEFAULT, VarcharType.SYSTEM_DEFAULT), + FunctionSignature.ret(StringType.INSTANCE) + .args(StringType.INSTANCE, StringType.INSTANCE) + ); + + /** + * constructor with 2 arguments. + */ + public XpathString(Expression arg0, Expression arg1) { + super("xpath_string", arg0, arg1); + } + + /** + * withChildren. + */ + @Override + public XpathString withChildren(List<Expression> children) { + Preconditions.checkArgument(children.size() == 2); + return new XpathString(children.get(0), children.get(1)); + } + + @Override + public List<FunctionSignature> getSignatures() { + return SIGNATURES; + } + + @Override + public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) { + return visitor.visitXpathString(this, context); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java index 5cfd3b62503..25e9036eddf 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java @@ -473,6 +473,7 @@ import org.apache.doris.nereids.trees.expressions.functions.scalar.WeeksDiff; import org.apache.doris.nereids.trees.expressions.functions.scalar.WeeksSub; import org.apache.doris.nereids.trees.expressions.functions.scalar.WidthBucket; import org.apache.doris.nereids.trees.expressions.functions.scalar.Xor; +import org.apache.doris.nereids.trees.expressions.functions.scalar.XpathString; import org.apache.doris.nereids.trees.expressions.functions.scalar.XxHash32; import org.apache.doris.nereids.trees.expressions.functions.scalar.XxHash64; import org.apache.doris.nereids.trees.expressions.functions.scalar.Year; @@ -2334,6 +2335,10 @@ public interface ScalarFunctionVisitor<R, C> { return visitScalarFunction(xor, context); } + default R visitXpathString(XpathString xpathString, C context) { + return visitScalarFunction(xpathString, context); + } + // struct function default R visitCreateStruct(CreateStruct createStruct, C context) { diff --git a/regression-test/data/query_p0/sql_functions/string_functions/test_xpath_string.out b/regression-test/data/query_p0/sql_functions/string_functions/test_xpath_string.out new file mode 100644 index 00000000000..3451f0737f4 Binary files /dev/null and b/regression-test/data/query_p0/sql_functions/string_functions/test_xpath_string.out differ diff --git a/regression-test/suites/query_p0/sql_functions/string_functions/test_xpath_string.groovy b/regression-test/suites/query_p0/sql_functions/string_functions/test_xpath_string.groovy new file mode 100644 index 00000000000..c73f70b26f2 --- /dev/null +++ b/regression-test/suites/query_p0/sql_functions/string_functions/test_xpath_string.groovy @@ -0,0 +1,149 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_xpath_string") { + sql "drop table if exists xpath_string_args;" + sql """ + create table xpath_string_args ( + k0 int, + xml_not_null string not null, + xml_null string null, + xpath_not_null string not null, + xpath_null string null + ) + DISTRIBUTED BY HASH(k0) + PROPERTIES + ( + "replication_num" = "1" + ); + """ + + order_qt_empty_nullable "select xpath_string(xml_null, xpath_null) from xpath_string_args" + order_qt_empty_not_nullable "select xpath_string(xml_not_null, xpath_not_null) from xpath_string_args" + order_qt_empty_partial_nullable "select xpath_string(xml_null, xpath_not_null), xpath_string(xml_not_null, xpath_null) from xpath_string_args" + + sql "insert into xpath_string_args values (1, '<a><b>123</b></a>', null, '/a/b', null)" + + order_qt_all_null "select xpath_string(xml_null, xpath_null) from xpath_string_args" + order_qt_all_not_null "select xpath_string(xml_not_null, xpath_not_null) from xpath_string_args" + order_qt_partial_nullable "select xpath_string(xml_null, xpath_not_null), xpath_string(xml_not_null, xpath_null) from xpath_string_args" + order_qt_nullable_no_null "select xpath_string(xml_null, nullable(xpath_not_null)), xpath_string(nullable(xml_not_null), xpath_null) from xpath_string_args" + + sql "truncate table xpath_string_args" + + sql """ + insert into xpath_string_args values + (2, '<a>123</a>', '<a>456</a>', '/a', '/a'), + (3, '<a><b>123</b><c>456</c></a>', null, '/a/c', '/a/b'), + (4, '<a><b>123</b><c>456</c></a>', '<a><d>789</d></a>', '/a/d', null), + (5, '<a><b>123</b><b>456</b></a>', '<a><b>789</b></a>', '/a/b[1]', '/a/b'), + (6, '<a><b>123</b><b>456</b></a>', null, '/a/b[2]', '/a/b[1]'), + (7, '<a><b attr="val">123</b></a>', '<a><b attr="other">456</b></a>', '/a/b[@attr]', '/a/b[@attr="val"]'), + (8, '<a><!-- comment -->123</a>', '<a>456</a>', '/a', null), + (9, '<a><![CDATA[123]]></a>', null, '/a', '/a'), + (10, '<a>123<b>456</b>789</a>', '<a><b>test</b></a>', '/a', '/a/b'), + (11, '<a> 123 </a>', '<a>456</a>', '/a', null), + (12, '<book><title>Intro to Hive</title><author>John Doe</author></book>', + '<book><title>SQL Guide</title></book>', + '//title/text()', + '//author/text()'), + (13, '<root><user id="1"><name>Alice</name><age>25</age></user></root>', + '<root><user id="2"><name>Bob</name></user></root>', + '/root/user[@id="1"]/name', + '/root/user/age'), + (14, '<products><item price="10.99">Book</item><item price="20.99">Pen</item></products>', + null, + '/products/item[@price="20.99"]', + '/products/item[1]'), + (15, '<data><![CDATA[<nested>value</nested>]]></data>', + '<data><plain>text</plain></data>', + '/data', + '//plain/text()'), + (16, '<menu><item>Coffee<price>3.99</price></item><item>Tea<price>2.99</price></item></menu>', + '<menu><item><price>5.99</price></item></menu>', + '//item[price="2.99"]', + '/menu/item[1]/price'), + (17, '<doc><section id="1">First</section><section id="2">Second</section></doc>', + null, + '/doc/section[@id="2"]', + '/doc/section[1]'), + (18, '<list><elem pos="1">A</elem><elem pos="2">B</elem><elem pos="3">C</elem></list>', + '<list><elem>X</elem></list>', + '/list/elem[@pos="2"]', + '/list/elem[last()]'), + (19, '<nested><a><b><c>Deep</c></b></a></nested>', + '<nested><x><y>Shallow</y></x></nested>', + '//c', + '/nested/x/y'), + (20, '<mixed>Text<b>Bold</b>Normal<i>Italic</i>End</mixed>', + '<mixed><b>Only Bold</b></mixed>', + '/mixed', + '//b/text()'), + (21, '<empty></empty>', + '<empty/>', + '/empty', + '/empty/text()') + """ + + order_qt_all_null "select xpath_string(xml_null, xpath_null) from xpath_string_args" + order_qt_all_not_null "select xpath_string(xml_not_null, xpath_not_null) from xpath_string_args" + order_qt_partial_nullable "select xpath_string(xml_null, xpath_not_null), xpath_string(xml_not_null, xpath_null) from xpath_string_args" + order_qt_nullable_no_null "select xpath_string(xml_null, nullable(xpath_not_null)), xpath_string(nullable(xml_not_null), xpath_null) from xpath_string_args" + + /// consts. most by BE-UT + order_qt_const_nullable "select xpath_string(xml_null, NULL), xpath_string(NULL, xpath_null) from xpath_string_args" + order_qt_const_not_nullable "select xpath_string(xml_not_null, '/a/b'), xpath_string('<a><b>123</b></a>', xpath_not_null) from xpath_string_args" + order_qt_const_partial_nullable "select xpath_string(xml_null, nullable('/a/b')), xpath_string(xml_not_null, nullable(xpath_null)) from xpath_string_args" + order_qt_const_nullable_no_null "select xpath_string(nullable(xml_not_null), nullable('/a/b')), xpath_string(nullable('<a><b>123</b></a>'), nullable(xpath_not_null)) from xpath_string_args" + + order_qt_1 "select xpath_string('', '')" + order_qt_2 "select xpath_string(NULL, NULL)" + order_qt_3 "select xpath_string(NULL, '/a/b')" + order_qt_4 "select xpath_string('<a><b>123</b></a>', NULL)" + order_qt_5 "select xpath_string('<a><b>123</b></a>', '/a/b')" + order_qt_6 "select xpath_string('<a>123</a>', '/a')" + order_qt_7 "select xpath_string('<a><b>123</b><c>456</c></a>', '/a/c')" + order_qt_8 "select xpath_string('<a><b>123</b><b>456</b></a>', '/a/b[1]')" + order_qt_9 "select xpath_string('<a><b attr=\"val\">123</b></a>', '/a/b[@attr]')" + order_qt_10 "select xpath_string('<a><!-- comment -->123</a>', '/a')" + order_qt_11 "select xpath_string('<a><![CDATA[123]]></a>', '/a')" + order_qt_12 "select xpath_string('<book><title>Intro to Hive</title></book>', '//title/text()')" + order_qt_13 "select xpath_string(nullable('<a><b>123</b></a>'), nullable('/a/b'))" + order_qt_14 "select xpath_string('<a><b>123</b></a>', nullable('/a/b'))" + order_qt_15 "select xpath_string(nullable('<a><b>123</b></a>'), '/a/b')" + order_qt_16 "select xpath_string('<root><user id=\"1\"><name>Alice</name></user></root>', '/root/user[@id=\"1\"]/name')" + order_qt_17 "select xpath_string('<products><item price=\"10.99\">Book</item></products>', '/products/item[@price=\"10.99\"]')" + order_qt_18 "select xpath_string('<menu><item><price>3.99</price></item></menu>', '//item/price/text()')" + order_qt_19 "select xpath_string('<data><a>1</a><a>2</a><a>3</a></data>', '/data/a[last()]')" + order_qt_20 "select xpath_string('<nested><a><b><c>Deep</c></b></a></nested>', '//c/text()')" + order_qt_21 "select xpath_string('<mixed>Text<b>Bold</b>Normal</mixed>', '/mixed/text()')" + order_qt_22 "select xpath_string('<doc><item pos=\"1\">First</item></doc>', '/doc/item[@pos=\"1\"]/text()')" + order_qt_23 "select xpath_string('<test><a>x</a><b>y</b><c>z</c></test>', '/test/*[2]')" + order_qt_24 "select xpath_string('<data><![CDATA[<nested>value</nested>]]></data>', '/data')" + order_qt_25 "select xpath_string('<root><elem><!-- comment -->value</elem></root>', '/root/elem')" + order_qt_26 "select xpath_string('<doc><section><title>Test</title><para>Text</para></section></doc>', '/doc/section[title=\"Test\"]/para')" + order_qt_27 "select xpath_string('<list><item val=\"1\"/><item val=\"2\"/></list>', '/list/item[@val=\"2\"]')" + order_qt_28 "select xpath_string('<data><group><name>A</name><value>1</value></group></data>', '/data/group[name=\"A\"]/value')" + order_qt_29 "select xpath_string('<root><a><b>1</b></a><a><b>2</b></a></root>', '//a[b=\"2\"]/b')" + order_qt_30 "select xpath_string('<doc><p class=\"main\">Content</p></doc>', '//p[@class=\"main\"]/text()')" + + /// error cases: + test { + sql """ select xpath_string('wrong xml', '//a/c') """ + exception "Function xpath_string failed to parse XML string: No document element found" + } +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org