This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new d2e5c7045c9 [feature](functions) Impl func xpath_string (#49262)
d2e5c7045c9 is described below

commit d2e5c7045c9c2622fa06fd6cf1b0571d2102b85d
Author: Socrates <suyit...@selectdb.com>
AuthorDate: Wed Apr 2 21:05:59 2025 +0800

    [feature](functions) Impl func xpath_string (#49262)
    
    ### What problem does this PR solve?
    ### Release note
    Add xpath_string Function
    #### Syntax
    ```sql
    VARCHAR xpath_string(VARCHAR xml_string, VARCHAR xpath_expr)
    ```
    
    #### Arguments
    - `xml_string`: A string containing valid XML content
    - `xpath_expr`: A valid XPath expression string
    
    #### Return Value
    - Returns VARCHAR type
    - Returns NULL if either argument is NULL
    - Returns empty string if XPath doesn't match any nodes
    
    #### Examples
    ```sql
    -- Basic node value extraction
    SELECT xpath_string('<a>123</a>', '/a');  -- Returns: '123'
    
    -- Nested element extraction
    SELECT xpath_string('<a><b>123</b></a>', '/a/b');  -- Returns: '123'
    
    -- Using attributes
    SELECT xpath_string('<a><b id="1">123</b></a>', '//b[@id="1"]');  -- 
Returns: '123'
    
    -- Using position predicates
    SELECT xpath_string('<a><b>1</b><b>2</b></a>', '/a/b[2]');  -- Returns: '2'
    
    -- Handling CDATA and comments
    SELECT xpath_string('<a><![CDATA[123]]></a>', '/a');  -- Returns: '123'
    SELECT xpath_string('<a><!-- comment -->123</a>', '/a');  -- Returns: '123'
    ```
---
 be/cmake/thirdparty.cmake                          |   3 +
 be/src/vec/functions/function_string.cpp           |   1 +
 be/src/vec/functions/function_string.h             | 136 +++++++++++++++++++
 be/test/vec/function/function_string_test.cpp      |  51 +++++++
 .../doris/catalog/BuiltinScalarFunctions.java      |   2 +
 .../expressions/functions/scalar/XpathString.java  |  72 ++++++++++
 .../expressions/visitor/ScalarFunctionVisitor.java |   5 +
 .../string_functions/test_xpath_string.out         | Bin 0 -> 1850 bytes
 .../string_functions/test_xpath_string.groovy      | 149 +++++++++++++++++++++
 9 files changed, 419 insertions(+)

diff --git a/be/cmake/thirdparty.cmake b/be/cmake/thirdparty.cmake
index 1250e8ab1f5..b3d9202d5b0 100644
--- a/be/cmake/thirdparty.cmake
+++ b/be/cmake/thirdparty.cmake
@@ -176,6 +176,9 @@ add_thirdparty(icuuc LIB64)
 add_thirdparty(icui18n LIB64)
 add_thirdparty(icudata LIB64)
 
+
+add_thirdparty(pugixml LIB64)
+
 if (BUILD_FAISS)
     add_thirdparty(openblas LIB64)
     add_thirdparty(faiss LIB64)
diff --git a/be/src/vec/functions/function_string.cpp 
b/be/src/vec/functions/function_string.cpp
index 1d292b038d3..ca4b20c8c76 100644
--- a/be/src/vec/functions/function_string.cpp
+++ b/be/src/vec/functions/function_string.cpp
@@ -1281,6 +1281,7 @@ void register_function_string(SimpleFunctionFactory& 
factory) {
     factory.register_function<FunctionOverlay>();
     factory.register_function<FunctionStrcmp>();
     factory.register_function<FunctionNgramSearch>();
+    factory.register_function<FunctionXPathString>();
 
     factory.register_alias(FunctionLeft::name, "strleft");
     factory.register_alias(FunctionRight::name, "strright");
diff --git a/be/src/vec/functions/function_string.h 
b/be/src/vec/functions/function_string.h
index d2ae6b6f9e2..fb18848c5b6 100644
--- a/be/src/vec/functions/function_string.h
+++ b/be/src/vec/functions/function_string.h
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include <glog/logging.h>
 #include <sys/types.h>
 
 #include <algorithm>
@@ -83,6 +84,7 @@
 #include <string_view>
 
 #include "exprs/math_functions.h"
+#include "pugixml.hpp"
 #include "udf/udf.h"
 #include "util/md5.h"
 #include "util/simd/vstring_function.h"
@@ -4588,4 +4590,138 @@ private:
     }
 };
 
+/// xpath_string(xml, xpath) -> String
+/// Returns the text content of the first node that matches the XPath 
expression.
+/// Returns NULL if either xml or xpath is NULL.
+/// Returns empty string if the XPath expression matches no nodes.
+/// The text content includes the node and all its descendants.
+/// Example:
+///   xpath_string('<a><b>b1</b><b>b2</b></a>', '/a/b[1]') = 'b1'
+///   xpath_string('<a><b>b1</b><b>b2</b></a>', '/a/b[2]') = 'b2'
+///   xpath_string('<a><b>b1</b><b>b2</b></a>', '/a/c') = ''
+///   xpath_string('invalid xml', '/a/b[1]') = NULL
+///   xpath_string(NULL, '/a/b[1]') = NULL
+///   xpath_string('<a><b>b1</b><b>b2</b></a>', NULL) = NULL
+class FunctionXPathString : public IFunction {
+public:
+    static constexpr auto name = "xpath_string";
+    static FunctionPtr create() { return 
std::make_shared<FunctionXPathString>(); }
+    String get_name() const override { return name; }
+    size_t get_number_of_arguments() const override { return 2; }
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const 
override {
+        return make_nullable(std::make_shared<DataTypeString>());
+    }
+
+    Status execute_impl(FunctionContext* context, Block& block, const 
ColumnNumbers& arguments,
+                        uint32_t result, size_t input_rows_count) const 
override {
+        CHECK_EQ(arguments.size(), 2);
+        auto col_res = ColumnNullable::create(ColumnString::create(), 
ColumnUInt8::create());
+        const auto& [left_col, left_const] =
+                unpack_if_const(block.get_by_position(arguments[0]).column);
+        const auto& [right_col, right_const] =
+                unpack_if_const(block.get_by_position(arguments[1]).column);
+        const auto& xml_col = *assert_cast<const 
ColumnString*>(left_col.get());
+        const auto& xpath_col = *assert_cast<const 
ColumnString*>(right_col.get());
+
+        Status status;
+        if (left_const && right_const) {
+            status = execute_vector<true, true>(input_rows_count, xml_col, 
xpath_col, *col_res);
+        } else if (left_const) {
+            status = execute_vector<true, false>(input_rows_count, xml_col, 
xpath_col, *col_res);
+        } else if (right_const) {
+            status = execute_vector<false, true>(input_rows_count, xml_col, 
xpath_col, *col_res);
+        } else {
+            status = execute_vector<false, false>(input_rows_count, xml_col, 
xpath_col, *col_res);
+        }
+        if (!status.ok()) {
+            return status;
+        }
+
+        block.get_by_position(result).column = std::move(col_res);
+        return Status::OK();
+    }
+
+private:
+    // Build the text of the node and all its children.
+    static std::string get_text(const pugi::xml_node& node) {
+        std::string result;
+        build_text(node, result);
+        return result;
+    }
+
+    static void build_text(const pugi::xml_node& node, std::string& builder) {
+        if (node.type() == pugi::node_pcdata || node.type() == 
pugi::node_cdata) {
+            builder += node.value();
+        }
+        for (pugi::xml_node child : node.children()) {
+            build_text(child, builder);
+        }
+    }
+
+    static Status parse_xml(const StringRef& xml_str, pugi::xml_document& 
xml_doc) {
+        pugi::xml_parse_result result = xml_doc.load_buffer(xml_str.data, 
xml_str.size);
+        if (!result) {
+            return Status::InvalidArgument("Function {} failed to parse XML 
string: {}", name,
+                                           result.description());
+        }
+        return Status::OK();
+    }
+
+    template <bool left_const, bool right_const>
+    static Status execute_vector(const size_t input_rows_count, const 
ColumnString& xml_col,
+                                 const ColumnString& xpath_col, 
ColumnNullable& res_col) {
+        pugi::xml_document xml_doc;
+        StringRef xpath_str;
+        // first check right_const, because we want to check empty input first
+        if constexpr (right_const) {
+            xpath_str = xpath_col.get_data_at(0);
+            if (xpath_str.empty()) {
+                // should return null if xpath_str is empty
+                res_col.insert_many_defaults(input_rows_count);
+                return Status::OK();
+            }
+        }
+        if constexpr (left_const) {
+            auto xml_str = xml_col.get_data_at(0);
+            if (xml_str.empty()) {
+                // should return null if xml_str is empty
+                res_col.insert_many_defaults(input_rows_count);
+                return Status::OK();
+            }
+            RETURN_IF_ERROR(parse_xml(xml_str, xml_doc));
+        }
+
+        for (size_t i = 0; i < input_rows_count; ++i) {
+            if constexpr (!right_const) {
+                xpath_str = xpath_col.get_data_at(i);
+                if (xpath_str.empty()) {
+                    // should return null if xpath_str is empty
+                    res_col.insert_default();
+                    continue;
+                }
+            }
+            if constexpr (!left_const) {
+                auto xml_str = xml_col.get_data_at(i);
+                if (xml_str.empty()) {
+                    // should return null if xml_str is empty
+                    res_col.insert_default();
+                    continue;
+                }
+                RETURN_IF_ERROR(parse_xml(xml_str, xml_doc));
+            }
+            // NOTE!!!: don't use to_string_view(), because xpath_str maybe 
not null-terminated
+            pugi::xpath_node node = 
xml_doc.select_node(xpath_str.to_string().c_str());
+            if (!node) {
+                // should return empty string if not found
+                auto empty_str = std::string("");
+                res_col.insert_data(empty_str.data(), empty_str.size());
+                continue;
+            }
+            auto text = get_text(node.node());
+            res_col.insert_data(text.data(), text.size());
+        }
+        return Status::OK();
+    }
+};
+
 } // namespace doris::vectorized
diff --git a/be/test/vec/function/function_string_test.cpp 
b/be/test/vec/function/function_string_test.cpp
index 45a29fbfc05..28d83a4c1b1 100644
--- a/be/test/vec/function/function_string_test.cpp
+++ b/be/test/vec/function/function_string_test.cpp
@@ -3374,4 +3374,55 @@ TEST(function_string_test, function_rpad_test) {
     check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, 
data_set);
 }
 
+TEST(function_string_test, function_xpath_string_test) {
+    std::string func_name = "xpath_string";
+    BaseInputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
+
+    DataSet data_set = {
+            {{std::string("<a>123</a>"), std::string("/a")}, 
std::string("123")},
+            {{std::string("<a><b>123</b></a>"), std::string("/a/b")}, 
std::string("123")},
+            {{std::string("<a><b>123</b><c>456</c></a>"), 
std::string("/a/c")}, std::string("456")},
+            {{std::string("<a><b>123</b><c>456</c></a>"), 
std::string("/a/d")}, std::string("")},
+            {{std::string("<a><b>123</b><b>456</b></a>"), 
std::string("/a/b[1]")},
+             std::string("123")},
+            {{std::string("<a><b>123</b><b>456</b></a>"), 
std::string("/a/b[2]")},
+             std::string("456")},
+            {{std::string("<a><b>123</b><b>456</b></a>"), 
std::string("/a/b[3]")}, std::string("")},
+            {{std::string("<a><b attr='val'>123</b></a>"), 
std::string("/a/b[@attr]")},
+             std::string("123")},
+            {{std::string("<a><b attr='val'>123</b></a>"), 
std::string("/a/b[@attr='val']")},
+             std::string("123")},
+            {{std::string("<a><b attr='val'>123</b></a>"), 
std::string("/a/b[@attr='wrong']")},
+             std::string("")},
+            {{std::string("<a><!-- comment -->123</a>"), std::string("/a")}, 
std::string("123")},
+            {{std::string("<a><![CDATA[123]]></a>"), std::string("/a")}, 
std::string("123")},
+            {{std::string("<a>123<b>456</b>789</a>"), std::string("/a")}, 
std::string("123456789")},
+            {{std::string("<a>  123  </a>"), std::string("/a")}, std::string(" 
 123  ")},
+            {{std::string("<a></a>"), std::string("/a")}, std::string("")},
+            {{std::string("<a/>"), std::string("/a")}, std::string("")},
+            {{std::string("<a>123</a>"), std::string("")}, Null()},
+            {{std::string(""), std::string("/a")}, Null()},
+            {{Null(), std::string("/a")}, Null()},
+            {{std::string("<a>123</a>"), Null()}, Null()},
+            {{std::string("<book><title>Intro to Hive</title><author>John "
+                          "Doe</author><publisher>Tech 
Press</publisher></book>"),
+              std::string("//title/text()")},
+             std::string("Intro to Hive")},
+            {{std::string("<book><title>Intro to Hive</title><author>John "
+                          "Doe</author><publisher>Tech 
Press</publisher></book>"),
+              std::string("//author/text()")},
+             std::string("John Doe")},
+            {{std::string("<book><title>Intro to Hive</title><author>John "
+                          "Doe</author><publisher>Tech 
Press</publisher></book>"),
+              std::string("//publisher/text()")},
+             std::string("Tech Press")},
+            {{std::string("<book><title>Intro to Hive</title><author>John "
+                          "Doe</author><publisher>Tech 
Press</publisher></book>"),
+              std::string("/book")},
+             std::string("Intro to HiveJohn DoeTech Press")},
+            {{Null(), Null()}, Null()}};
+
+    check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, 
data_set);
+}
+
 } // namespace doris::vectorized
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
index c7e39967c0b..4248e5ef907 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
@@ -476,6 +476,7 @@ import 
org.apache.doris.nereids.trees.expressions.functions.scalar.WeeksDiff;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.WeeksSub;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.WidthBucket;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Xor;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.XpathString;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.XxHash32;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.XxHash64;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Year;
@@ -976,6 +977,7 @@ public class BuiltinScalarFunctions implements 
FunctionHelper {
             scalar(XxHash32.class, "xxhash_32"),
             scalar(XxHash64.class, "xxhash_64"),
             scalar(Xor.class, "xor"),
+            scalar(XpathString.class, "xpath_string"),
             scalar(Year.class, "year"),
             scalar(YearCeil.class, "year_ceil"),
             scalar(YearFloor.class, "year_floor"),
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/XpathString.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/XpathString.java
new file mode 100644
index 00000000000..734af357d71
--- /dev/null
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/XpathString.java
@@ -0,0 +1,72 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.trees.expressions.functions.scalar;
+
+import org.apache.doris.catalog.FunctionSignature;
+import org.apache.doris.nereids.trees.expressions.Expression;
+import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable;
+import 
org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
+import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression;
+import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
+import org.apache.doris.nereids.types.StringType;
+import org.apache.doris.nereids.types.VarcharType;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+
+import java.util.List;
+
+/**
+ * ScalarFunction 'xpath_string'.
+ */
+public class XpathString extends ScalarFunction
+        implements BinaryExpression, ExplicitlyCastableSignature, 
AlwaysNullable {
+
+    public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
+            FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT)
+                    .args(VarcharType.SYSTEM_DEFAULT, 
VarcharType.SYSTEM_DEFAULT),
+            FunctionSignature.ret(StringType.INSTANCE)
+                    .args(StringType.INSTANCE, StringType.INSTANCE)
+    );
+
+    /**
+     * constructor with 2 arguments.
+     */
+    public XpathString(Expression arg0, Expression arg1) {
+        super("xpath_string", arg0, arg1);
+    }
+
+    /**
+     * withChildren.
+     */
+    @Override
+    public XpathString withChildren(List<Expression> children) {
+        Preconditions.checkArgument(children.size() == 2);
+        return new XpathString(children.get(0), children.get(1));
+    }
+
+    @Override
+    public List<FunctionSignature> getSignatures() {
+        return SIGNATURES;
+    }
+
+    @Override
+    public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
+        return visitor.visitXpathString(this, context);
+    }
+}
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
index 5cfd3b62503..25e9036eddf 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
@@ -473,6 +473,7 @@ import 
org.apache.doris.nereids.trees.expressions.functions.scalar.WeeksDiff;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.WeeksSub;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.WidthBucket;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Xor;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.XpathString;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.XxHash32;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.XxHash64;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Year;
@@ -2334,6 +2335,10 @@ public interface ScalarFunctionVisitor<R, C> {
         return visitScalarFunction(xor, context);
     }
 
+    default R visitXpathString(XpathString xpathString, C context) {
+        return visitScalarFunction(xpathString, context);
+    }
+
     // struct function
 
     default R visitCreateStruct(CreateStruct createStruct, C context) {
diff --git 
a/regression-test/data/query_p0/sql_functions/string_functions/test_xpath_string.out
 
b/regression-test/data/query_p0/sql_functions/string_functions/test_xpath_string.out
new file mode 100644
index 00000000000..3451f0737f4
Binary files /dev/null and 
b/regression-test/data/query_p0/sql_functions/string_functions/test_xpath_string.out
 differ
diff --git 
a/regression-test/suites/query_p0/sql_functions/string_functions/test_xpath_string.groovy
 
b/regression-test/suites/query_p0/sql_functions/string_functions/test_xpath_string.groovy
new file mode 100644
index 00000000000..c73f70b26f2
--- /dev/null
+++ 
b/regression-test/suites/query_p0/sql_functions/string_functions/test_xpath_string.groovy
@@ -0,0 +1,149 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_xpath_string") {
+    sql "drop table if exists xpath_string_args;"
+    sql """
+        create table xpath_string_args (
+            k0 int,
+            xml_not_null string not null,
+            xml_null string null,
+            xpath_not_null string not null,
+            xpath_null string null
+        )
+        DISTRIBUTED BY HASH(k0)
+        PROPERTIES
+        (
+            "replication_num" = "1"
+        );
+    """
+
+    order_qt_empty_nullable "select xpath_string(xml_null, xpath_null) from 
xpath_string_args"
+    order_qt_empty_not_nullable "select xpath_string(xml_not_null, 
xpath_not_null) from xpath_string_args"
+    order_qt_empty_partial_nullable "select xpath_string(xml_null, 
xpath_not_null), xpath_string(xml_not_null, xpath_null) from xpath_string_args"
+
+    sql "insert into xpath_string_args values (1, '<a><b>123</b></a>', null, 
'/a/b', null)"
+
+    order_qt_all_null "select xpath_string(xml_null, xpath_null) from 
xpath_string_args"
+    order_qt_all_not_null "select xpath_string(xml_not_null, xpath_not_null) 
from xpath_string_args"
+    order_qt_partial_nullable "select xpath_string(xml_null, xpath_not_null), 
xpath_string(xml_not_null, xpath_null) from xpath_string_args"
+    order_qt_nullable_no_null "select xpath_string(xml_null, 
nullable(xpath_not_null)), xpath_string(nullable(xml_not_null), xpath_null) 
from xpath_string_args"
+
+    sql "truncate table xpath_string_args"
+
+    sql """
+    insert into xpath_string_args values 
+        (2, '<a>123</a>', '<a>456</a>', '/a', '/a'),
+        (3, '<a><b>123</b><c>456</c></a>', null, '/a/c', '/a/b'),
+        (4, '<a><b>123</b><c>456</c></a>', '<a><d>789</d></a>', '/a/d', null),
+        (5, '<a><b>123</b><b>456</b></a>', '<a><b>789</b></a>', '/a/b[1]', 
'/a/b'),
+        (6, '<a><b>123</b><b>456</b></a>', null, '/a/b[2]', '/a/b[1]'),
+        (7, '<a><b attr="val">123</b></a>', '<a><b attr="other">456</b></a>', 
'/a/b[@attr]', '/a/b[@attr="val"]'),
+        (8, '<a><!-- comment -->123</a>', '<a>456</a>', '/a', null),
+        (9, '<a><![CDATA[123]]></a>', null, '/a', '/a'),
+        (10, '<a>123<b>456</b>789</a>', '<a><b>test</b></a>', '/a', '/a/b'),
+        (11, '<a>  123  </a>', '<a>456</a>', '/a', null),
+        (12, '<book><title>Intro to Hive</title><author>John 
Doe</author></book>', 
+            '<book><title>SQL Guide</title></book>', 
+            '//title/text()', 
+            '//author/text()'),
+        (13, '<root><user 
id="1"><name>Alice</name><age>25</age></user></root>',
+            '<root><user id="2"><name>Bob</name></user></root>',
+            '/root/user[@id="1"]/name',
+            '/root/user/age'),
+        (14, '<products><item price="10.99">Book</item><item 
price="20.99">Pen</item></products>',
+            null,
+            '/products/item[@price="20.99"]',
+            '/products/item[1]'),
+        (15, '<data><![CDATA[<nested>value</nested>]]></data>',
+            '<data><plain>text</plain></data>',
+            '/data',
+            '//plain/text()'),
+        (16, 
'<menu><item>Coffee<price>3.99</price></item><item>Tea<price>2.99</price></item></menu>',
+            '<menu><item><price>5.99</price></item></menu>',
+            '//item[price="2.99"]',
+            '/menu/item[1]/price'),
+        (17, '<doc><section id="1">First</section><section 
id="2">Second</section></doc>',
+            null,
+            '/doc/section[@id="2"]',
+            '/doc/section[1]'),
+        (18, '<list><elem pos="1">A</elem><elem pos="2">B</elem><elem 
pos="3">C</elem></list>',
+            '<list><elem>X</elem></list>',
+            '/list/elem[@pos="2"]',
+            '/list/elem[last()]'),
+        (19, '<nested><a><b><c>Deep</c></b></a></nested>',
+            '<nested><x><y>Shallow</y></x></nested>',
+            '//c',
+            '/nested/x/y'),
+        (20, '<mixed>Text<b>Bold</b>Normal<i>Italic</i>End</mixed>',
+            '<mixed><b>Only Bold</b></mixed>',
+            '/mixed',
+            '//b/text()'),
+        (21, '<empty></empty>',
+            '<empty/>',
+            '/empty',
+            '/empty/text()')
+    """
+
+    order_qt_all_null "select xpath_string(xml_null, xpath_null) from 
xpath_string_args"
+    order_qt_all_not_null "select xpath_string(xml_not_null, xpath_not_null) 
from xpath_string_args"
+    order_qt_partial_nullable "select xpath_string(xml_null, xpath_not_null), 
xpath_string(xml_not_null, xpath_null) from xpath_string_args"
+    order_qt_nullable_no_null "select xpath_string(xml_null, 
nullable(xpath_not_null)), xpath_string(nullable(xml_not_null), xpath_null) 
from xpath_string_args"
+
+    /// consts. most by BE-UT
+    order_qt_const_nullable "select xpath_string(xml_null, NULL), 
xpath_string(NULL, xpath_null) from xpath_string_args"
+    order_qt_const_not_nullable "select xpath_string(xml_not_null, '/a/b'), 
xpath_string('<a><b>123</b></a>', xpath_not_null) from xpath_string_args"
+    order_qt_const_partial_nullable "select xpath_string(xml_null, 
nullable('/a/b')), xpath_string(xml_not_null, nullable(xpath_null)) from 
xpath_string_args"
+    order_qt_const_nullable_no_null "select 
xpath_string(nullable(xml_not_null), nullable('/a/b')), 
xpath_string(nullable('<a><b>123</b></a>'), nullable(xpath_not_null)) from 
xpath_string_args"
+
+    order_qt_1 "select xpath_string('', '')"
+    order_qt_2 "select xpath_string(NULL, NULL)"
+    order_qt_3 "select xpath_string(NULL, '/a/b')"
+    order_qt_4 "select xpath_string('<a><b>123</b></a>', NULL)"
+    order_qt_5 "select xpath_string('<a><b>123</b></a>', '/a/b')"
+    order_qt_6 "select xpath_string('<a>123</a>', '/a')"
+    order_qt_7 "select xpath_string('<a><b>123</b><c>456</c></a>', '/a/c')"
+    order_qt_8 "select xpath_string('<a><b>123</b><b>456</b></a>', '/a/b[1]')"
+    order_qt_9 "select xpath_string('<a><b attr=\"val\">123</b></a>', 
'/a/b[@attr]')"
+    order_qt_10 "select xpath_string('<a><!-- comment -->123</a>', '/a')"
+    order_qt_11 "select xpath_string('<a><![CDATA[123]]></a>', '/a')"
+    order_qt_12 "select xpath_string('<book><title>Intro to 
Hive</title></book>', '//title/text()')"
+    order_qt_13 "select xpath_string(nullable('<a><b>123</b></a>'), 
nullable('/a/b'))"
+    order_qt_14 "select xpath_string('<a><b>123</b></a>', nullable('/a/b'))"
+    order_qt_15 "select xpath_string(nullable('<a><b>123</b></a>'), '/a/b')"
+    order_qt_16 "select xpath_string('<root><user 
id=\"1\"><name>Alice</name></user></root>', '/root/user[@id=\"1\"]/name')"
+    order_qt_17 "select xpath_string('<products><item 
price=\"10.99\">Book</item></products>', '/products/item[@price=\"10.99\"]')"
+    order_qt_18 "select 
xpath_string('<menu><item><price>3.99</price></item></menu>', 
'//item/price/text()')"
+    order_qt_19 "select xpath_string('<data><a>1</a><a>2</a><a>3</a></data>', 
'/data/a[last()]')"
+    order_qt_20 "select 
xpath_string('<nested><a><b><c>Deep</c></b></a></nested>', '//c/text()')"
+    order_qt_21 "select xpath_string('<mixed>Text<b>Bold</b>Normal</mixed>', 
'/mixed/text()')"
+    order_qt_22 "select xpath_string('<doc><item 
pos=\"1\">First</item></doc>', '/doc/item[@pos=\"1\"]/text()')"
+    order_qt_23 "select xpath_string('<test><a>x</a><b>y</b><c>z</c></test>', 
'/test/*[2]')"
+    order_qt_24 "select 
xpath_string('<data><![CDATA[<nested>value</nested>]]></data>', '/data')"
+    order_qt_25 "select xpath_string('<root><elem><!-- comment 
-->value</elem></root>', '/root/elem')"
+    order_qt_26 "select 
xpath_string('<doc><section><title>Test</title><para>Text</para></section></doc>',
 '/doc/section[title=\"Test\"]/para')"
+    order_qt_27 "select xpath_string('<list><item val=\"1\"/><item 
val=\"2\"/></list>', '/list/item[@val=\"2\"]')"
+    order_qt_28 "select 
xpath_string('<data><group><name>A</name><value>1</value></group></data>', 
'/data/group[name=\"A\"]/value')"
+    order_qt_29 "select 
xpath_string('<root><a><b>1</b></a><a><b>2</b></a></root>', '//a[b=\"2\"]/b')"
+    order_qt_30 "select xpath_string('<doc><p 
class=\"main\">Content</p></doc>', '//p[@class=\"main\"]/text()')"
+
+    /// error cases:
+    test {
+        sql """ select xpath_string('wrong xml', '//a/c') """
+        exception "Function xpath_string failed to parse XML string: No 
document element found"
+    }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to