This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new 6c3d42e09a4 [cherry-pick](branch-21) cherry-pick pr about (#42488) 
(#42099) (#42055) (#42916)
6c3d42e09a4 is described below

commit 6c3d42e09a48dd6c40289d6232b0dac14684a661
Author: zhangstar333 <87313068+zhangstar...@users.noreply.github.com>
AuthorDate: Thu Oct 31 14:14:19 2024 +0800

    [cherry-pick](branch-21) cherry-pick pr about (#42488) (#42099) (#42055) 
(#42916)
    
    ## Proposed changes
    
    Issue Number: close #xxx
    
    <!--Describe your changes.-->
---
 be/src/vec/functions/function_bit_test.cpp         | 156 +++++++
 be/src/vec/functions/function_string.cpp           |   1 +
 be/src/vec/functions/function_string.h             | 116 +++++
 be/src/vec/functions/simple_function_factory.h     |   2 +
 be/src/vec/functions/url/domain.h                  | 127 +++++-
 be/src/vec/functions/url/find_symbols.h            | 481 +++++++++++++++++++++
 be/src/vec/functions/url/function_url.cpp          |  23 +
 be/src/vec/functions/url/functions_url.h           |  11 -
 be/src/vec/functions/url/tldLookup.generated.cpp   | 140 ++++++
 be/src/vec/functions/url/tldLookup.h               |  34 ++
 .../doris/catalog/BuiltinScalarFunctions.java      |  10 +
 .../expressions/functions/scalar/BitTest.java      |  75 ++++
 .../functions/scalar/CountSubstring.java           |  70 +++
 .../scalar/CutToFirstSignificantSubdomain.java     |  68 +++
 .../scalar/FirstSignificantSubdomain.java          |  68 +++
 .../functions/scalar/TopLevelDomain.java           |  68 +++
 .../expressions/visitor/ScalarFunctionVisitor.java |  26 ++
 gensrc/script/doris_builtins_functions.py          |  14 +-
 .../data/correctness_p0/test_bit_test_function.out | 191 ++++++++
 .../string_functions/test_count_substrings.out     | 147 +++++++
 .../string_functions/test_url_functions.out        | 121 ++++++
 .../correctness_p0/test_bit_test_function.groovy   |  91 ++++
 .../string_functions/test_count_substrings.groovy  |  76 ++++
 .../string_functions/test_url_functions.groovy     |  79 ++++
 24 files changed, 2180 insertions(+), 15 deletions(-)

diff --git a/be/src/vec/functions/function_bit_test.cpp 
b/be/src/vec/functions/function_bit_test.cpp
new file mode 100644
index 00000000000..8e010fd9446
--- /dev/null
+++ b/be/src/vec/functions/function_bit_test.cpp
@@ -0,0 +1,156 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <bit>
+#include <bitset>
+
+#include "common/status.h"
+#include "vec/columns/column.h"
+#include "vec/columns/column_vector.h"
+#include "vec/common/assert_cast.h"
+#include "vec/core/types.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/functions/cast_type_to_either.h"
+#include "vec/functions/simple_function_factory.h"
+
+namespace doris::vectorized {
+
+class FunctionBitTest : public IFunction {
+public:
+    static constexpr auto name = "bit_test";
+
+    static FunctionPtr create() { return std::make_shared<FunctionBitTest>(); }
+
+    String get_name() const override { return name; }
+
+    size_t get_number_of_arguments() const override { return 0; }
+
+    bool is_variadic() const override { return true; }
+
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const 
override {
+        return std::make_shared<DataTypeInt8>();
+    }
+
+    Status execute_impl(FunctionContext* context, Block& block, const 
ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) const override 
{
+        bool valid =
+                cast_type(block.get_by_position(arguments[0]).type.get(), 
[&](const auto& type) {
+                    using DataType = std::decay_t<decltype(type)>;
+                    using T = typename DataType::FieldType;
+                    if (auto col = check_and_get_column<ColumnVector<T>>(
+                                           
block.get_by_position(arguments[0]).column.get()) ||
+                                   
is_column_const(*block.get_by_position(arguments[0]).column)) {
+                        execute_inner<T>(block, arguments, result, 
input_rows_count);
+                        return true;
+                    }
+                    return false;
+                });
+        if (!valid) {
+            return Status::RuntimeError(
+                    "{}'s argument does not match the expected data type, 
type: {}, column: {}",
+                    get_name(), 
block.get_by_position(arguments[0]).type->get_name(),
+                    
block.get_by_position(arguments[0]).column->dump_structure());
+        }
+        return Status::OK();
+    }
+
+    template <typename F>
+    static bool cast_type(const IDataType* type, F&& f) {
+        return cast_type_to_either<DataTypeInt8, DataTypeInt16, DataTypeInt32, 
DataTypeInt64,
+                                   DataTypeInt128>(type, std::forward<F>(f));
+    }
+
+    template <typename T>
+    void execute_inner(Block& block, const ColumnNumbers& arguments, size_t 
result,
+                       size_t input_rows_count) const {
+        size_t argument_size = arguments.size();
+        std::vector<ColumnPtr> argument_columns(argument_size);
+        auto result_data_column = ColumnInt8::create(input_rows_count, 1);
+        auto& res_data = result_data_column->get_data();
+
+        // maybe most user is bit_test(column, const), so only handle this case
+        if (argument_size == 2) {
+            std::vector<uint8_t> is_consts(argument_size);
+            std::tie(argument_columns[0], is_consts[0]) =
+                    
unpack_if_const(block.get_by_position(arguments[0]).column);
+            std::tie(argument_columns[1], is_consts[1]) =
+                    
unpack_if_const(block.get_by_position(arguments[1]).column);
+            execute_for_two_argument<T>(argument_columns, is_consts, res_data, 
input_rows_count);
+        } else {
+            for (size_t i = 0; i < argument_size; ++i) {
+                argument_columns[i] = block.get_by_position(arguments[i])
+                                              
.column->convert_to_full_column_if_const();
+            }
+            execute_for_others_arg<T>(argument_columns, res_data, 
argument_size, input_rows_count);
+        }
+
+        block.replace_by_position(result, std::move(result_data_column));
+    }
+
+    template <typename T>
+    void execute_for_two_argument(std::vector<ColumnPtr>& argument_columns,
+                                  std::vector<uint8_t>& is_consts, 
ColumnInt8::Container& res_data,
+                                  size_t input_rows_count) const {
+        const auto& first_column_data =
+                assert_cast<const 
ColumnVector<T>&>(*argument_columns[0].get()).get_data();
+        const auto& second_column_data =
+                assert_cast<const 
ColumnVector<T>&>(*argument_columns[1].get()).get_data();
+        for (int i = 0; i < input_rows_count; ++i) {
+            auto first_value = first_column_data[index_check_const(i, 
is_consts[0])];
+            auto second_value = second_column_data[index_check_const(i, 
is_consts[1])];
+            // the pos is invalid, set result = 0
+            if (second_value < 0 || second_value >= sizeof(T) * 8) {
+                res_data[i] = 0;
+                continue;
+            }
+            res_data[i] = ((first_value >> second_value) & 1);
+        }
+    }
+
+    template <typename T>
+    void execute_for_others_arg(std::vector<ColumnPtr>& argument_columns,
+                                ColumnInt8::Container& res_data, size_t 
argument_size,
+                                size_t input_rows_count) const {
+        const auto& first_column_data =
+                assert_cast<const 
ColumnVector<T>&>(*argument_columns[0].get()).get_data();
+        for (int i = 0; i < input_rows_count; ++i) {
+            auto first_value = first_column_data[i];
+            for (int col = 1; col < argument_size; ++col) {
+                const auto& arg_column_data =
+                        assert_cast<const 
ColumnVector<T>&>(*argument_columns[col].get())
+                                .get_data();
+                // the pos is invalid, set result = 0
+                if (arg_column_data[i] < 0 || arg_column_data[i] >= sizeof(T) 
* 8) {
+                    res_data[i] = 0;
+                    break;
+                }
+                // if one of pos & result is 0, could set res = 0, and return 
directly.
+                if (!((first_value >> arg_column_data[i]) & 1)) {
+                    res_data[i] = 0;
+                    break;
+                }
+            }
+        }
+    }
+};
+
+void register_function_bit_test(SimpleFunctionFactory& factory) {
+    factory.register_function<FunctionBitTest>();
+    factory.register_alias("bit_test", "bit_test_all");
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/function_string.cpp 
b/be/src/vec/functions/function_string.cpp
index edf43300f94..15e977ecbb5 100644
--- a/be/src/vec/functions/function_string.cpp
+++ b/be/src/vec/functions/function_string.cpp
@@ -1175,6 +1175,7 @@ void register_function_string(SimpleFunctionFactory& 
factory) {
     factory.register_function<FunctionFromBase64>();
     factory.register_function<FunctionSplitPart>();
     factory.register_function<FunctionSplitByString>();
+    factory.register_function<FunctionCountSubString>();
     factory.register_function<FunctionSubstringIndex>();
     factory.register_function<FunctionExtractURLParameter>();
     factory.register_function<FunctionStringParseUrl>();
diff --git a/be/src/vec/functions/function_string.h 
b/be/src/vec/functions/function_string.h
index 48887af85f0..256e5943990 100644
--- a/be/src/vec/functions/function_string.h
+++ b/be/src/vec/functions/function_string.h
@@ -2733,6 +2733,122 @@ private:
     }
 };
 
+class FunctionCountSubString : public IFunction {
+public:
+    static constexpr auto name = "count_substrings";
+
+    static FunctionPtr create() { return 
std::make_shared<FunctionCountSubString>(); }
+    using NullMapType = PaddedPODArray<UInt8>;
+
+    String get_name() const override { return name; }
+
+    size_t get_number_of_arguments() const override { return 2; }
+
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const 
override {
+        DCHECK(is_string(arguments[0]))
+                << "first argument for function: " << name << " should be 
string"
+                << " and arguments[0] is " << arguments[0]->get_name();
+        DCHECK(is_string(arguments[1]))
+                << "second argument for function: " << name << " should be 
string"
+                << " and arguments[1] is " << arguments[1]->get_name();
+        return std::make_shared<DataTypeInt32>();
+    }
+
+    Status execute_impl(FunctionContext* /*context*/, Block& block, const 
ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) const override 
{
+        DCHECK_EQ(arguments.size(), 2);
+        const auto& [src_column, left_const] =
+                unpack_if_const(block.get_by_position(arguments[0]).column);
+        const auto& [right_column, right_const] =
+                unpack_if_const(block.get_by_position(arguments[1]).column);
+
+        const auto* col_left = 
check_and_get_column<ColumnString>(src_column.get());
+        if (!col_left) {
+            return Status::InternalError("Left operator of function {} can not 
be {}", get_name(),
+                                         
block.get_by_position(arguments[0]).type->get_name());
+        }
+
+        const auto* col_right = 
check_and_get_column<ColumnString>(right_column.get());
+        if (!col_right) {
+            return Status::InternalError("Right operator of function {} can 
not be {}", get_name(),
+                                         
block.get_by_position(arguments[1]).type->get_name());
+        }
+
+        auto dest_column_ptr = ColumnInt32::create(input_rows_count, 0);
+        // count_substring(ColumnString, "xxx")
+        if (right_const) {
+            _execute_constant_pattern(*col_left, col_right->get_data_at(0),
+                                      dest_column_ptr->get_data(), 
input_rows_count);
+        } else if (left_const) {
+            // count_substring("xxx", ColumnString)
+            _execute_constant_src_string(col_left->get_data_at(0), *col_right,
+                                         dest_column_ptr->get_data(), 
input_rows_count);
+        } else {
+            // count_substring(ColumnString, ColumnString)
+            _execute_vector(*col_left, *col_right, 
dest_column_ptr->get_data(), input_rows_count);
+        }
+
+        block.replace_by_position(result, std::move(dest_column_ptr));
+        return Status::OK();
+    }
+
+private:
+    void _execute_constant_pattern(const ColumnString& src_column_string,
+                                   const StringRef& pattern_ref,
+                                   ColumnInt32::Container& dest_column_data,
+                                   size_t input_rows_count) const {
+        for (size_t i = 0; i < input_rows_count; i++) {
+            const StringRef str_ref = src_column_string.get_data_at(i);
+            dest_column_data[i] = find_str_count(str_ref, pattern_ref);
+        }
+    }
+
+    void _execute_vector(const ColumnString& src_column_string, const 
ColumnString& pattern_column,
+                         ColumnInt32::Container& dest_column_data, size_t 
input_rows_count) const {
+        for (size_t i = 0; i < input_rows_count; i++) {
+            const StringRef pattern_ref = pattern_column.get_data_at(i);
+            const StringRef str_ref = src_column_string.get_data_at(i);
+            dest_column_data[i] = find_str_count(str_ref, pattern_ref);
+        }
+    }
+
+    void _execute_constant_src_string(const StringRef& str_ref, const 
ColumnString& pattern_col,
+                                      ColumnInt32::Container& dest_column_data,
+                                      size_t input_rows_count) const {
+        for (size_t i = 0; i < input_rows_count; ++i) {
+            const StringRef pattern_ref = pattern_col.get_data_at(i);
+            dest_column_data[i] = find_str_count(str_ref, pattern_ref);
+        }
+    }
+
+    size_t find_pos(size_t pos, const StringRef str_ref, const StringRef 
pattern_ref) const {
+        size_t old_size = pos;
+        size_t str_size = str_ref.size;
+        while (pos < str_size && memcmp_small_allow_overflow15(str_ref.data + 
pos, pattern_ref.data,
+                                                               
pattern_ref.size)) {
+            pos++;
+        }
+        return pos - old_size;
+    }
+
+    int find_str_count(const StringRef str_ref, StringRef pattern_ref) const {
+        int count = 0;
+        if (str_ref.size == 0 || pattern_ref.size == 0) {
+            return 0;
+        } else {
+            for (size_t str_pos = 0; str_pos <= str_ref.size;) {
+                const size_t res_pos = find_pos(str_pos, str_ref, pattern_ref);
+                if (res_pos == (str_ref.size - str_pos)) {
+                    break; // not find
+                }
+                count++;
+                str_pos = str_pos + res_pos + pattern_ref.size;
+            }
+        }
+        return count;
+    }
+};
+
 struct SM3Sum {
     static constexpr auto name = "sm3sum";
     using ObjectData = SM3Digest;
diff --git a/be/src/vec/functions/simple_function_factory.h 
b/be/src/vec/functions/simple_function_factory.h
index d164e40abb1..a859a4685e2 100644
--- a/be/src/vec/functions/simple_function_factory.h
+++ b/be/src/vec/functions/simple_function_factory.h
@@ -106,6 +106,7 @@ void register_function_tokenize(SimpleFunctionFactory& 
factory);
 void register_function_url(SimpleFunctionFactory& factory);
 void register_function_ip(SimpleFunctionFactory& factory);
 void register_function_multi_match(SimpleFunctionFactory& factory);
+void register_function_bit_test(SimpleFunctionFactory& factory);
 
 class SimpleFunctionFactory {
     using Creator = std::function<FunctionBuilderPtr()>;
@@ -297,6 +298,7 @@ public:
             register_function_ignore(instance);
             register_function_variant_element(instance);
             register_function_multi_match(instance);
+            register_function_bit_test(instance);
         });
         return instance;
     }
diff --git a/be/src/vec/functions/url/domain.h 
b/be/src/vec/functions/url/domain.h
index 54361134eff..b2ec5e0c9d9 100644
--- a/be/src/vec/functions/url/domain.h
+++ b/be/src/vec/functions/url/domain.h
@@ -20,11 +20,12 @@
 
 #pragma once
 
-// #include <base/find_symbols.h>
 #include <cstring>
 
 #include "vec/common/string_utils/string_utils.h"
+#include "vec/functions/url/find_symbols.h"
 #include "vec/functions/url/protocol.h"
+#include "vec/functions/url/tldLookup.h"
 
 namespace doris::vectorized {
 
@@ -144,4 +145,128 @@ struct ExtractDomain {
     }
 };
 
+struct ExtractTopLevelDomain {
+    static size_t get_reserve_length_for_element() { return 5; }
+
+    static void execute(const char* data, size_t size, const char*& res_data, 
size_t& res_size) {
+        res_data = data;
+        res_size = 0;
+        StringRef host = get_url_host(data, size);
+
+        if (host.size == 0) {
+            return;
+        } else {
+            auto host_view = host.to_string_view();
+            if (host_view[host_view.size() - 1] == '.') {
+                host_view.remove_suffix(1);
+            }
+
+            const auto* host_end = host_view.data() + host_view.size();
+            const char* last_dot = 
find_last_symbols_or_null<'.'>(host_view.data(), host_end);
+            if (!last_dot) {
+                return;
+            }
+
+            /// For IPv4 addresses select nothing.
+            ///
+            /// NOTE: it is safe to access last_dot[1]
+            /// since getURLHost() will not return a host if there is symbol 
after dot.
+            if (is_numeric_ascii(last_dot[1])) {
+                return;
+            }
+
+            res_data = last_dot + 1;
+            res_size = host_end - res_data;
+        }
+    }
+};
+
+struct ExtractFirstSignificantSubdomain {
+    static size_t get_reserve_length_for_element() { return 10; }
+
+    static void execute(const Pos data, const size_t size, Pos& res_data, 
size_t& res_size,
+                        Pos* out_domain_end = nullptr) {
+        res_data = data;
+        res_size = 0;
+
+        Pos tmp;
+        size_t domain_length = 0;
+        ExtractDomain<true>::execute(data, size, tmp, domain_length);
+
+        if (domain_length == 0) {
+            return;
+        }
+        if (out_domain_end) {
+            *out_domain_end = tmp + domain_length;
+        }
+
+        /// cut useless dot
+        if (tmp[domain_length - 1] == '.') {
+            --domain_length;
+        }
+
+        res_data = tmp;
+        res_size = domain_length;
+
+        const auto* begin = tmp;
+        const auto* end = begin + domain_length;
+        std::array<const char*, 3> last_periods {};
+
+        const auto* pos = find_first_symbols<'.'>(begin, end);
+        while (pos < end) {
+            last_periods[2] = last_periods[1];
+            last_periods[1] = last_periods[0];
+            last_periods[0] = pos;
+            pos = find_first_symbols<'.'>(pos + 1, end);
+        }
+
+        if (!last_periods[0]) {
+            return;
+        }
+
+        if (!last_periods[1]) {
+            res_size = last_periods[0] - begin;
+            return;
+        }
+
+        if (!last_periods[2]) {
+            last_periods[2] = begin - 1;
+        }
+
+        const auto* end_of_level_domain = 
find_first_symbols<'/'>(last_periods[0], end);
+        if (!end_of_level_domain) {
+            end_of_level_domain = end;
+        }
+
+        auto host_len = static_cast<size_t>(end_of_level_domain - 
last_periods[1] - 1);
+        StringRef host {last_periods[1] + 1, host_len};
+        if (tldLookup::is_valid(host.data, host.size)) {
+            res_data += last_periods[2] + 1 - begin;
+            res_size = last_periods[1] - last_periods[2] - 1;
+        } else {
+            res_data += last_periods[1] + 1 - begin;
+            res_size = last_periods[0] - last_periods[1] - 1;
+        }
+    }
+};
+
+struct CutToFirstSignificantSubdomain {
+    static size_t get_reserve_length_for_element() { return 15; }
+
+    static void execute(const Pos data, const size_t size, Pos& res_data, 
size_t& res_size) {
+        res_data = data;
+        res_size = 0;
+
+        Pos tmp_data = data;
+        size_t tmp_length;
+        Pos domain_end = data;
+        ExtractFirstSignificantSubdomain::execute(data, size, tmp_data, 
tmp_length, &domain_end);
+
+        if (tmp_length == 0) {
+            return;
+        }
+        res_data = tmp_data;
+        res_size = domain_end - tmp_data;
+    }
+};
 } // namespace doris::vectorized
diff --git a/be/src/vec/functions/url/find_symbols.h 
b/be/src/vec/functions/url/find_symbols.h
new file mode 100644
index 00000000000..7af95ce06bd
--- /dev/null
+++ b/be/src/vec/functions/url/find_symbols.h
@@ -0,0 +1,481 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// 
https://github.com/ClickHouse/ClickHouse/blob/master/base/base/find_symbols.h
+// and modified by Doris
+
+#pragma once
+
+#include <array>
+#include <cstdint>
+#include <string>
+
+#if defined(__SSE4_2__)
+#include <nmmintrin.h>
+#endif
+
+/** find_first_symbols<c1, c2, ...>(begin, end):
+  *
+  * Allow to search for next character from the set of 'symbols...' in a 
string.
+  * It is similar to 'strpbrk', 'strcspn' (and 'strchr', 'memchr' in the case 
of one symbol and '\0'),
+  * but with the following differences:
+  * - works with any memory ranges, including containing zero bytes;
+  * - doesn't require terminating zero byte: end of memory range is passed 
explicitly;
+  * - if not found, returns pointer to end instead of nullptr;
+  * - maximum number of symbols to search is 16.
+  *
+  * Uses SSE 2 in case of small number of symbols for search and SSE 4.2 in 
the case of large number of symbols,
+  *  that have more than 2x performance advantage over trivial loop
+  *  in the case of parsing tab-separated dump with (probably escaped) string 
fields.
+  * In the case of parsing tab separated dump with short strings, there is no 
performance degradation over trivial loop.
+  *
+  * Note: the optimal threshold to choose between SSE 2 and SSE 4.2 may depend 
on CPU model.
+  *
+  * find_last_symbols_or_null<c1, c2, ...>(begin, end):
+  *
+  * Allow to search for the last matching character in a string.
+  * If no such characters, returns nullptr.
+  */
+
+struct SearchSymbols {
+    static constexpr auto BUFFER_SIZE = 16;
+
+    SearchSymbols() = default;
+
+    explicit SearchSymbols(std::string in) : str(std::move(in)) {
+#if defined(__SSE4_2__)
+        if (str.size() > BUFFER_SIZE) {
+            throw std::runtime_error("SearchSymbols can contain at most " +
+                                     std::to_string(BUFFER_SIZE) + " symbols 
and " +
+                                     std::to_string(str.size()) + " was 
provided\n");
+        }
+
+        char tmp_safety_buffer[BUFFER_SIZE] = {0};
+
+        memcpy(tmp_safety_buffer, str.data(), str.size());
+
+        simd_vector = _mm_loadu_si128(reinterpret_cast<const 
__m128i*>(tmp_safety_buffer));
+#endif
+    }
+
+#if defined(__SSE4_2__)
+    __m128i simd_vector;
+#endif
+    std::string str;
+};
+
+namespace detail {
+template <char... chars>
+constexpr bool is_in(char x) {
+    return ((x == chars) || ...);
+} // NOLINT(misc-redundant-expression)
+
+static bool is_in(char c, const char* symbols, size_t num_chars) {
+    for (size_t i = 0U; i < num_chars; ++i) {
+        if (c == symbols[i]) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+#if defined(__SSE2__)
+template <char s0>
+inline __m128i mm_is_in(__m128i bytes) {
+    __m128i eq0 = _mm_cmpeq_epi8(bytes, _mm_set1_epi8(s0));
+    return eq0;
+}
+
+template <char s0, char s1, char... tail>
+inline __m128i mm_is_in(__m128i bytes) {
+    __m128i eq0 = _mm_cmpeq_epi8(bytes, _mm_set1_epi8(s0));
+    __m128i eq = mm_is_in<s1, tail...>(bytes);
+    return _mm_or_si128(eq0, eq);
+}
+
+inline __m128i mm_is_in(__m128i bytes, const char* symbols, size_t num_chars) {
+    __m128i accumulator = _mm_setzero_si128();
+    for (size_t i = 0; i < num_chars; ++i) {
+        __m128i eq = _mm_cmpeq_epi8(bytes, _mm_set1_epi8(symbols[i]));
+        accumulator = _mm_or_si128(accumulator, eq);
+    }
+
+    return accumulator;
+}
+
+inline std::array<__m128i, 16u> mm_is_in_prepare(const char* symbols, size_t 
num_chars) {
+    std::array<__m128i, 16u> result {};
+
+    for (size_t i = 0; i < num_chars; ++i) {
+        result[i] = _mm_set1_epi8(symbols[i]);
+    }
+
+    return result;
+}
+
+inline __m128i mm_is_in_execute(__m128i bytes, const std::array<__m128i, 16u>& 
needles) {
+    __m128i accumulator = _mm_setzero_si128();
+
+    for (const auto& needle : needles) {
+        __m128i eq = _mm_cmpeq_epi8(bytes, needle);
+        accumulator = _mm_or_si128(accumulator, eq);
+    }
+
+    return accumulator;
+}
+#endif
+
+template <bool positive>
+constexpr bool maybe_negate(bool x) {
+    return x == positive;
+}
+
+template <bool positive>
+constexpr uint16_t maybe_negate(uint16_t x) {
+    if constexpr (positive)
+        return x;
+    else
+        return ~x;
+}
+
+enum class ReturnMode : uint8_t {
+    End,
+    Nullptr,
+};
+
+template <bool positive, ReturnMode return_mode, char... symbols>
+inline const char* find_first_symbols_sse2(const char* const begin, const 
char* const end) {
+    const char* pos = begin;
+
+#if defined(__SSE2__)
+    for (; pos + 15 < end; pos += 16) {
+        __m128i bytes = _mm_loadu_si128(reinterpret_cast<const __m128i*>(pos));
+
+        __m128i eq = mm_is_in<symbols...>(bytes);
+
+        uint16_t bit_mask = 
maybe_negate<positive>(uint16_t(_mm_movemask_epi8(eq)));
+        if (bit_mask) return pos + __builtin_ctz(bit_mask);
+    }
+#endif
+
+    for (; pos < end; ++pos)
+        if (maybe_negate<positive>(is_in<symbols...>(*pos))) return pos;
+
+    return return_mode == ReturnMode::End ? end : nullptr;
+}
+
+template <bool positive, ReturnMode return_mode>
+inline const char* find_first_symbols_sse2(const char* const begin, const 
char* const end,
+                                           const char* symbols, size_t 
num_chars) {
+    const char* pos = begin;
+
+#if defined(__SSE2__)
+    const auto needles = mm_is_in_prepare(symbols, num_chars);
+    for (; pos + 15 < end; pos += 16) {
+        __m128i bytes = _mm_loadu_si128(reinterpret_cast<const __m128i*>(pos));
+
+        __m128i eq = mm_is_in_execute(bytes, needles);
+
+        uint16_t bit_mask = 
maybe_negate<positive>(uint16_t(_mm_movemask_epi8(eq)));
+        if (bit_mask) return pos + __builtin_ctz(bit_mask);
+    }
+#endif
+
+    for (; pos < end; ++pos)
+        if (maybe_negate<positive>(is_in(*pos, symbols, num_chars))) return 
pos;
+
+    return return_mode == ReturnMode::End ? end : nullptr;
+}
+
+template <bool positive, ReturnMode return_mode, char... symbols>
+inline const char* find_last_symbols_sse2(const char* const begin, const char* 
const end) {
+    const char* pos = end;
+
+#if defined(__SSE2__)
+    for (; pos - 16 >= begin;
+         pos -=
+         16) /// Assuming the pointer cannot overflow. Assuming we can compare 
these pointers.
+    {
+        __m128i bytes = _mm_loadu_si128(reinterpret_cast<const __m128i*>(pos - 
16));
+
+        __m128i eq = mm_is_in<symbols...>(bytes);
+
+        uint16_t bit_mask = 
maybe_negate<positive>(uint16_t(_mm_movemask_epi8(eq)));
+        if (bit_mask)
+            return pos - 1 -
+                   (__builtin_clz(bit_mask) -
+                    16); /// because __builtin_clz works with mask as uint32.
+    }
+#endif
+
+    --pos;
+    for (; pos >= begin; --pos)
+        if (maybe_negate<positive>(is_in<symbols...>(*pos))) return pos;
+
+    return return_mode == ReturnMode::End ? end : nullptr;
+}
+
+template <bool positive, ReturnMode return_mode, size_t num_chars, char c01, 
char c02 = 0,
+          char c03 = 0, char c04 = 0, char c05 = 0, char c06 = 0, char c07 = 
0, char c08 = 0,
+          char c09 = 0, char c10 = 0, char c11 = 0, char c12 = 0, char c13 = 
0, char c14 = 0,
+          char c15 = 0, char c16 = 0>
+inline const char* find_first_symbols_sse42(const char* const begin, const 
char* const end) {
+    const char* pos = begin;
+
+#if defined(__SSE4_2__)
+    constexpr int mode = _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | 
_SIDD_LEAST_SIGNIFICANT;
+
+    __m128i set = _mm_setr_epi8(c01, c02, c03, c04, c05, c06, c07, c08, c09, 
c10, c11, c12, c13,
+                                c14, c15, c16);
+
+    for (; pos + 15 < end; pos += 16) {
+        __m128i bytes = _mm_loadu_si128(reinterpret_cast<const __m128i*>(pos));
+
+        if constexpr (positive) {
+            if (_mm_cmpestrc(set, num_chars, bytes, 16, mode))
+                return pos + _mm_cmpestri(set, num_chars, bytes, 16, mode);
+        } else {
+            if (_mm_cmpestrc(set, num_chars, bytes, 16, mode | 
_SIDD_NEGATIVE_POLARITY))
+                return pos +
+                       _mm_cmpestri(set, num_chars, bytes, 16, mode | 
_SIDD_NEGATIVE_POLARITY);
+        }
+    }
+#endif
+
+    for (; pos < end; ++pos)
+        if ((num_chars == 1 && maybe_negate<positive>(is_in<c01>(*pos))) ||
+            (num_chars == 2 && maybe_negate<positive>(is_in<c01, c02>(*pos))) 
||
+            (num_chars == 3 && maybe_negate<positive>(is_in<c01, c02, 
c03>(*pos))) ||
+            (num_chars == 4 && maybe_negate<positive>(is_in<c01, c02, c03, 
c04>(*pos))) ||
+            (num_chars == 5 && maybe_negate<positive>(is_in<c01, c02, c03, 
c04, c05>(*pos))) ||
+            (num_chars == 6 && maybe_negate<positive>(is_in<c01, c02, c03, 
c04, c05, c06>(*pos))) ||
+            (num_chars == 7 &&
+             maybe_negate<positive>(is_in<c01, c02, c03, c04, c05, c06, 
c07>(*pos))) ||
+            (num_chars == 8 &&
+             maybe_negate<positive>(is_in<c01, c02, c03, c04, c05, c06, c07, 
c08>(*pos))) ||
+            (num_chars == 9 &&
+             maybe_negate<positive>(is_in<c01, c02, c03, c04, c05, c06, c07, 
c08, c09>(*pos))) ||
+            (num_chars == 10 &&
+             maybe_negate<positive>(
+                     is_in<c01, c02, c03, c04, c05, c06, c07, c08, c09, 
c10>(*pos))) ||
+            (num_chars == 11 &&
+             maybe_negate<positive>(
+                     is_in<c01, c02, c03, c04, c05, c06, c07, c08, c09, c10, 
c11>(*pos))) ||
+            (num_chars == 12 &&
+             maybe_negate<positive>(
+                     is_in<c01, c02, c03, c04, c05, c06, c07, c08, c09, c10, 
c11, c12>(*pos))) ||
+            (num_chars == 13 &&
+             maybe_negate<positive>(
+                     is_in<c01, c02, c03, c04, c05, c06, c07, c08, c09, c10, 
c11, c12, c13>(
+                             *pos))) ||
+            (num_chars == 14 &&
+             maybe_negate<positive>(
+                     is_in<c01, c02, c03, c04, c05, c06, c07, c08, c09, c10, 
c11, c12, c13, c14>(
+                             *pos))) ||
+            (num_chars == 15 &&
+             maybe_negate<positive>(is_in<c01, c02, c03, c04, c05, c06, c07, 
c08, c09, c10, c11,
+                                          c12, c13, c14, c15>(*pos))) ||
+            (num_chars == 16 &&
+             maybe_negate<positive>(is_in<c01, c02, c03, c04, c05, c06, c07, 
c08, c09, c10, c11,
+                                          c12, c13, c14, c15, c16>(*pos))))
+            return pos;
+    return return_mode == ReturnMode::End ? end : nullptr;
+}
+
+template <bool positive, ReturnMode return_mode>
+inline const char* find_first_symbols_sse42(const char* const begin, const 
char* const end,
+                                            const SearchSymbols& symbols) {
+    const char* pos = begin;
+
+    const auto num_chars = symbols.str.size();
+
+#if defined(__SSE4_2__)
+    constexpr int mode = _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | 
_SIDD_LEAST_SIGNIFICANT;
+
+    const __m128i set = symbols.simd_vector;
+
+    for (; pos + 15 < end; pos += 16) {
+        __m128i bytes = _mm_loadu_si128(reinterpret_cast<const __m128i*>(pos));
+
+        if constexpr (positive) {
+            if (_mm_cmpestrc(set, num_chars, bytes, 16, mode))
+                return pos + _mm_cmpestri(set, num_chars, bytes, 16, mode);
+        } else {
+            if (_mm_cmpestrc(set, num_chars, bytes, 16, mode | 
_SIDD_NEGATIVE_POLARITY))
+                return pos +
+                       _mm_cmpestri(set, num_chars, bytes, 16, mode | 
_SIDD_NEGATIVE_POLARITY);
+        }
+    }
+#endif
+
+    for (; pos < end; ++pos)
+        if (maybe_negate<positive>(is_in(*pos, symbols.str.data(), 
num_chars))) return pos;
+
+    return return_mode == ReturnMode::End ? end : nullptr;
+}
+
+/// NOTE No SSE 4.2 implementation for find_last_symbols_or_null. Not worth to 
do.
+
+template <bool positive, ReturnMode return_mode, char... symbols>
+inline const char* find_first_symbols_dispatch(const char* begin, const char* 
end)
+    requires(0 <= sizeof...(symbols) && sizeof...(symbols) <= 16)
+{
+#if defined(__SSE4_2__)
+    if (sizeof...(symbols) >= 5)
+        return find_first_symbols_sse42<positive, return_mode, 
sizeof...(symbols), symbols...>(
+                begin, end);
+    else
+#endif
+        return find_first_symbols_sse2<positive, return_mode, 
symbols...>(begin, end);
+}
+
+template <bool positive, ReturnMode return_mode>
+inline const char* find_first_symbols_dispatch(const std::string_view haystack,
+                                               const SearchSymbols& symbols) {
+#if defined(__SSE4_2__)
+    if (symbols.str.size() >= 5)
+        return find_first_symbols_sse42<positive, 
return_mode>(haystack.begin(), haystack.end(),
+                                                               symbols);
+    else
+#endif
+        return find_first_symbols_sse2<positive, return_mode>(
+                haystack.begin(), haystack.end(), symbols.str.data(), 
symbols.str.size());
+}
+
+} // namespace detail
+
+template <char... symbols>
+inline const char* find_first_symbols(const char* begin, const char* end) {
+    return detail::find_first_symbols_dispatch<true, detail::ReturnMode::End, 
symbols...>(begin,
+                                                                               
           end);
+}
+
+/// Returning non const result for non const arguments.
+/// It is convenient when you are using this function to iterate through 
non-const buffer.
+template <char... symbols>
+inline char* find_first_symbols(char* begin, char* end) {
+    return const_cast<char*>(
+            detail::find_first_symbols_dispatch<true, detail::ReturnMode::End, 
symbols...>(begin,
+                                                                               
            end));
+}
+
+inline const char* find_first_symbols(std::string_view haystack, const 
SearchSymbols& symbols) {
+    return detail::find_first_symbols_dispatch<true, 
detail::ReturnMode::End>(haystack, symbols);
+}
+
+template <char... symbols>
+inline const char* find_first_not_symbols(const char* begin, const char* end) {
+    return detail::find_first_symbols_dispatch<false, detail::ReturnMode::End, 
symbols...>(begin,
+                                                                               
            end);
+}
+
+template <char... symbols>
+inline char* find_first_not_symbols(char* begin, char* end) {
+    return const_cast<char*>(
+            detail::find_first_symbols_dispatch<false, 
detail::ReturnMode::End, symbols...>(begin,
+                                                                               
             end));
+}
+
+inline const char* find_first_not_symbols(std::string_view haystack, const 
SearchSymbols& symbols) {
+    return detail::find_first_symbols_dispatch<false, 
detail::ReturnMode::End>(haystack, symbols);
+}
+
+template <char... symbols>
+inline const char* find_first_symbols_or_null(const char* begin, const char* 
end) {
+    return detail::find_first_symbols_dispatch<true, 
detail::ReturnMode::Nullptr, symbols...>(begin,
+                                                                               
               end);
+}
+
+template <char... symbols>
+inline char* find_first_symbols_or_null(char* begin, char* end) {
+    return const_cast<char*>(
+            detail::find_first_symbols_dispatch<true, 
detail::ReturnMode::Nullptr, symbols...>(
+                    begin, end));
+}
+
+inline const char* find_first_symbols_or_null(std::string_view haystack,
+                                              const SearchSymbols& symbols) {
+    return detail::find_first_symbols_dispatch<true, 
detail::ReturnMode::Nullptr>(haystack,
+                                                                               
   symbols);
+}
+
+template <char... symbols>
+inline const char* find_first_not_symbols_or_null(const char* begin, const 
char* end) {
+    return detail::find_first_symbols_dispatch<false, 
detail::ReturnMode::Nullptr, symbols...>(
+            begin, end);
+}
+
+template <char... symbols>
+inline char* find_first_not_symbols_or_null(char* begin, char* end) {
+    return const_cast<char*>(
+            detail::find_first_symbols_dispatch<false, 
detail::ReturnMode::Nullptr, symbols...>(
+                    begin, end));
+}
+
+inline const char* find_first_not_symbols_or_null(std::string_view haystack,
+                                                  const SearchSymbols& 
symbols) {
+    return detail::find_first_symbols_dispatch<false, 
detail::ReturnMode::Nullptr>(haystack,
+                                                                               
    symbols);
+}
+
+template <char... symbols>
+inline const char* find_last_symbols_or_null(const char* begin, const char* 
end) {
+    return detail::find_last_symbols_sse2<true, detail::ReturnMode::Nullptr, 
symbols...>(begin,
+                                                                               
          end);
+}
+
+template <char... symbols>
+inline char* find_last_symbols_or_null(char* begin, char* end) {
+    return const_cast<char*>(
+            detail::find_last_symbols_sse2<true, detail::ReturnMode::Nullptr, 
symbols...>(begin,
+                                                                               
           end));
+}
+
+template <char... symbols>
+inline const char* find_last_not_symbols_or_null(const char* begin, const 
char* end) {
+    return detail::find_last_symbols_sse2<false, detail::ReturnMode::Nullptr, 
symbols...>(begin,
+                                                                               
           end);
+}
+
+template <char... symbols>
+inline char* find_last_not_symbols_or_null(char* begin, char* end) {
+    return const_cast<char*>(
+            detail::find_last_symbols_sse2<false, detail::ReturnMode::Nullptr, 
symbols...>(begin,
+                                                                               
            end));
+}
+
+/// Slightly resembles boost::split. The drawback of boost::split is that it 
fires a false positive in clang static analyzer.
+/// See https://github.com/boostorg/algorithm/issues/63
+/// And https://bugs.llvm.org/show_bug.cgi?id=41141
+template <char... symbols, typename To>
+inline To& splitInto(To& to, std::string_view what, bool token_compress = 
false) {
+    const char* pos = what.data();
+    const char* end = pos + what.size();
+    while (pos < end) {
+        const char* delimiter_or_end = find_first_symbols<symbols...>(pos, 
end);
+
+        if (!token_compress || pos < delimiter_or_end) to.emplace_back(pos, 
delimiter_or_end - pos);
+
+        if (delimiter_or_end < end)
+            pos = delimiter_or_end + 1;
+        else
+            pos = delimiter_or_end;
+    }
+
+    return to;
+}
diff --git a/be/src/vec/functions/url/function_url.cpp 
b/be/src/vec/functions/url/function_url.cpp
index e25af6f7f27..47afe076b74 100644
--- a/be/src/vec/functions/url/function_url.cpp
+++ b/be/src/vec/functions/url/function_url.cpp
@@ -46,10 +46,33 @@ struct NameProtocol {
 using FunctionProtocol =
         FunctionStringToString<ExtractSubstringImpl<ExtractProtocol>, 
NameProtocol>;
 
+struct NameTopLevelDomain {
+    static constexpr auto name = "top_level_domain";
+};
+using FunctionTopLevelDomain =
+        FunctionStringToString<ExtractSubstringImpl<ExtractTopLevelDomain>, 
NameTopLevelDomain>;
+
+struct NameFirstSignificantSubdomain {
+    static constexpr auto name = "first_significant_subdomain";
+};
+using FunctionFirstSignificantSubdomain =
+        
FunctionStringToString<ExtractSubstringImpl<ExtractFirstSignificantSubdomain>,
+                               NameFirstSignificantSubdomain>;
+
+struct NameCutToFirstSignificantSubdomain {
+    static constexpr auto name = "cut_to_first_significant_subdomain";
+};
+using FunctionCutToFirstSignificantSubdomain =
+        
FunctionStringToString<ExtractSubstringImpl<CutToFirstSignificantSubdomain>,
+                               NameCutToFirstSignificantSubdomain>;
+
 void register_function_url(SimpleFunctionFactory& factory) {
     factory.register_function<FunctionDomain>();
     factory.register_function<FunctionDomainWithoutWWW>();
     factory.register_function<FunctionProtocol>();
+    factory.register_function<FunctionTopLevelDomain>();
+    factory.register_function<FunctionFirstSignificantSubdomain>();
+    factory.register_function<FunctionCutToFirstSignificantSubdomain>();
 }
 
 } // namespace doris::vectorized
diff --git a/be/src/vec/functions/url/functions_url.h 
b/be/src/vec/functions/url/functions_url.h
index f9f02a17a66..b6736496d24 100644
--- a/be/src/vec/functions/url/functions_url.h
+++ b/be/src/vec/functions/url/functions_url.h
@@ -89,7 +89,6 @@ struct ExtractSubstringImpl {
         for (size_t i = 0; i < size; ++i) {
             Extractor::execute(reinterpret_cast<const 
char*>(&data[prev_offset]),
                                offsets[i] - prev_offset, start, length);
-
             res_data.resize(res_data.size() + length);
             memcpy_small_allow_read_write_overflow15(&res_data[res_offset], 
start, length);
             res_offset += length;
@@ -105,11 +104,6 @@ struct ExtractSubstringImpl {
         Extractor::execute(data.data(), data.size(), start, length);
         res_data.assign(start, length);
     }
-
-    // static void vector_fixed(const ColumnString::Chars &, size_t, 
ColumnString::Chars &)
-    // {
-    //     throw Exception("Column of type FixedString is not supported by URL 
functions", ErrorCodes::ILLEGAL_COLUMN);
-    // }
 };
 
 /** Delete part of string using the Extractor.
@@ -155,11 +149,6 @@ struct CutSubstringImpl {
         res_data.append(data.data(), start);
         res_data.append(start + length, data.data() + data.size());
     }
-
-    // static void vector_fixed(const ColumnString::Chars &, size_t, 
ColumnString::Chars &)
-    // {
-    //     throw Exception("Column of type FixedString is not supported by URL 
functions", ErrorCodes::ILLEGAL_COLUMN);
-    // }
 };
 
 } // namespace doris::vectorized
diff --git a/be/src/vec/functions/url/tldLookup.generated.cpp 
b/be/src/vec/functions/url/tldLookup.generated.cpp
new file mode 100644
index 00000000000..9b9471c094d
--- /dev/null
+++ b/be/src/vec/functions/url/tldLookup.generated.cpp
@@ -0,0 +1,140 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// 
https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/URL/tldLookup.generated.cpp
+// and modified by Doris
+
+// clang-format off
+/* C++ code produced by gperf version 3.1 */
+/* Command-line: /usr/bin/gperf --output-file=tldLookup.generated.cpp 
tldLookup.gperf  */
+/* Computed positions: -k'1-11,13-14,17,$' */
+
+#if !( \
+        (' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) && ('%' == 
37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) && (')' == 41) && ('*' == 42) 
&& ('+' == 43) && (',' == 44) && ('-' == 45) && ('.' == 46) && ('/' == 47) && 
('0' == 48) && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) && ('5' 
== 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) && ('9' == 57) && (':' == 
58) && (';' == 59) && ('<' == 60) && ('=' == 61) && ('>' == 62) && ('?' == 63) 
&& ('A' == 65) && ('B [...]
+/* The character set is not based on ISO-646.  */
+#error "gperf generated tables don't work with this execution character set. 
Please report a bug to <bug-gp...@gnu.org>."
+#endif
+
+#line 7 "tldLookup.gperf"
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
+#pragma GCC diagnostic ignored "-Wzero-as-null-pointer-constant"
+#pragma GCC diagnostic ignored "-Wunused-macros"
+#include <cstring>
+
+#define TOTAL_KEYWORDS 5045
+#define MIN_WORD_LENGTH 4
+#define MAX_WORD_LENGTH 34
+#define MIN_HASH_VALUE 75
+#define MAX_HASH_VALUE 110600
+/* maximum key range = 110526, duplicates = 0 */
+
+class TopLevelDomainLookupHash {
+private:
+    static inline unsigned int hash(const char* str, size_t len);
+
+public:
+    static const char* is_valid(const char* str, size_t len);
+};
+
+inline unsigned int TopLevelDomainLookupHash::hash(const char* str, size_t 
len) {
+    static const unsigned int asso_values[] = {110601, 110601, 110601, 110601, 
110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, 
110601, 110601,
+                                               110601, 110601, 110601, 110601, 
110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, 
110601, 110601,
+                                               110601, 110601, 110601, 110601, 
110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, 3905, 
0, 5,
+                                               11617, 15312, 10, 5, 25, 0, 25, 
0, 5, 0, 0, 110601, 110601, 110601, 5, 110601,
+                                               110601, 110601, 110601, 110601, 
30, 20, 5, 15, 10, 65, 45, 80, 70, 55, 110601, 110601,
+                                               110601, 110601, 110601, 110601, 
110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, 
110601, 110601,
+                                               110601, 2570, 9477, 1350, 15, 
130, 5915, 1830, 4360, 2210, 5405, 63, 3190, 20, 1165, 5,
+                                               6120, 5863, 470, 2315, 175, 0, 
815, 40, 13577, 115, 5680, 1030, 11798, 23179, 345, 1097,
+                                               28079, 13839, 245, 25674, 
31874, 75, 31774, 7351, 27474, 190, 16044, 8040, 50, 25, 35, 55,
+                                               0, 0, 30, 0, 10, 0, 0, 0, 35, 
0, 55, 10, 5, 65, 0, 60,
+                                               0, 25, 5, 30, 0, 5, 10, 0, 20, 
5, 5, 35, 5, 0, 0, 0,
+                                               0, 0, 15, 0, 5, 5, 0, 5, 5, 5, 
0, 0, 0, 0, 0, 15,
+                                               5, 110601, 110601, 5, 10, 45, 
5, 110601, 0, 110601, 110601, 110601, 110601, 110601, 110601, 110601,
+                                               0, 0, 0, 0, 110601, 110601, 
110601, 45, 0, 0, 0, 0, 110601, 110601, 110601, 110601,
+                                               0, 0, 110601, 0, 0, 0, 0, 5, 0, 
5, 30, 0, 0, 110601, 110601, 110601,
+                                               110601, 110601, 110601, 110601, 
0, 110601, 110601, 110601, 0, 0, 5, 0, 20, 40, 110601, 110601,
+                                               110601, 110601, 110601, 110601, 
110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, 110601, 
110601, 110601,
+                                               110601, 110601, 110601, 110601};
+    unsigned int hval = len;
+
+    switch (hval) {
+    default:
+        hval += asso_values[static_cast<unsigned char>(str[16])];
+    /*FALLTHROUGH*/
+    case 16:
+    case 15:
+    case 14:
+        hval += asso_values[static_cast<unsigned char>(str[13] + 1)];
+    /*FALLTHROUGH*/
+    case 13:
+        hval += asso_values[static_cast<unsigned char>(str[12])];
+    /*FALLTHROUGH*/
+    case 12:
+    case 11:
+        hval += asso_values[static_cast<unsigned char>(str[10])];
+    /*FALLTHROUGH*/
+    case 10:
+        hval += asso_values[static_cast<unsigned char>(str[9])];
+    /*FALLTHROUGH*/
+    case 9:
+        hval += asso_values[static_cast<unsigned char>(str[8] + 1)];
+    /*FALLTHROUGH*/
+    case 8:
+        hval += asso_values[static_cast<unsigned char>(str[7])];
+    /*FALLTHROUGH*/
+    case 7:
+        hval += asso_values[static_cast<unsigned char>(str[6] + 3)];
+    /*FALLTHROUGH*/
+    case 6:
+        hval += asso_values[static_cast<unsigned char>(str[5])];
+    /*FALLTHROUGH*/
+    case 5:
+        hval += asso_values[static_cast<unsigned char>(str[4] + 2)];
+    /*FALLTHROUGH*/
+    case 4:
+        hval += asso_values[static_cast<unsigned char>(str[3] + 1)];
+    /*FALLTHROUGH*/
+    case 3:
+        hval += asso_values[static_cast<unsigned char>(str[2])];
+    /*FALLTHROUGH*/
+    case 2:
+        hval += asso_values[static_cast<unsigned char>(str[1])];
+    /*FALLTHROUGH*/
+    case 1:
+        hval += asso_values[static_cast<unsigned char>(str[0] + 20)];
+        break;
+    }
+    return hval + asso_values[static_cast<unsigned char>(str[len - 1])];
+}
+
+const char* TopLevelDomainLookupHash::is_valid(const char* str, size_t len) {
+    static const char* const wordlist[] = 
{"","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","","co.tm","","\340\270\227\340\270\253\340\270\262\340\270\243.\340\271\204\340\270\227\340\270\242","","","","com.mu","","","","","com.so","","\340\270\243\340\270\261\340\270\220\340\270\232\340\270\262\340\270\245.\340\271\
 [...]
+    if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) {
+        unsigned int key = hash(str, len);
+
+        if (key <= MAX_HASH_VALUE) {
+            const char* s = wordlist[key];
+
+            if (*str == *s && !strncmp(str + 1, s + 1, len - 1) && s[len] == 
'\0')
+                return s;
+        }
+    }
+    return nullptr;
+}
+#line 5060 "tldLookup.gperf"
\ No newline at end of file
diff --git a/be/src/vec/functions/url/tldLookup.h 
b/be/src/vec/functions/url/tldLookup.h
new file mode 100644
index 00000000000..9be88890c14
--- /dev/null
+++ b/be/src/vec/functions/url/tldLookup.h
@@ -0,0 +1,34 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// 
https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/URL/tldLookup.h
+// and modified by Doris
+
+#pragma once
+
+#include <cstdlib>
+
+// Definition of the class generated by gperf, present on gperf/tldLookup.gperf
+class TopLevelDomainLookupHash {
+private:
+    static inline unsigned int hash(const char* str, size_t len);
+
+public:
+    static const char* is_valid(const char* str, size_t len);
+};
+
+using tldLookup = TopLevelDomainLookupHash;
\ No newline at end of file
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
index f84bda52178..b09ea1033b5 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java
@@ -77,6 +77,7 @@ import 
org.apache.doris.nereids.trees.expressions.functions.scalar.BitCount;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.BitLength;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.BitShiftLeft;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.BitShiftRight;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.BitTest;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.BitmapAnd;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.BitmapAndCount;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.BitmapAndNot;
@@ -122,6 +123,7 @@ import 
org.apache.doris.nereids.trees.expressions.functions.scalar.Cos;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Cosh;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.CosineDistance;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.CountEqual;
+import 
org.apache.doris.nereids.trees.expressions.functions.scalar.CountSubstring;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Crc32;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.CreateMap;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.CreateNamedStruct;
@@ -131,6 +133,7 @@ import 
org.apache.doris.nereids.trees.expressions.functions.scalar.CurrentDate;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.CurrentTime;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.CurrentUser;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.CutIpv6;
+import 
org.apache.doris.nereids.trees.expressions.functions.scalar.CutToFirstSignificantSubdomain;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Database;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Date;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.DateDiff;
@@ -167,6 +170,7 @@ import 
org.apache.doris.nereids.trees.expressions.functions.scalar.Exp;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.ExtractUrlParameter;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Field;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.FindInSet;
+import 
org.apache.doris.nereids.trees.expressions.functions.scalar.FirstSignificantSubdomain;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Floor;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Fmod;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Fpow;
@@ -422,6 +426,7 @@ import 
org.apache.doris.nereids.trees.expressions.functions.scalar.ToIso8601;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.ToMonday;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.ToQuantileState;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Tokenize;
+import 
org.apache.doris.nereids.trees.expressions.functions.scalar.TopLevelDomain;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Translate;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Trim;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Truncate;
@@ -554,6 +559,7 @@ public class BuiltinScalarFunctions implements 
FunctionHelper {
             scalar(BitmapXorCount.class, "bitmap_xor_count"),
             scalar(BitShiftLeft.class, "bit_shift_left"),
             scalar(BitShiftRight.class, "bit_shift_right"),
+            scalar(BitTest.class, "bit_test", "bit_test_all"),
             scalar(Cardinality.class, "array_size", "cardinality", "size"),
             scalar(Cbrt.class, "cbrt"),
             scalar(Ceil.class, "ceil", "ceiling"),
@@ -570,6 +576,7 @@ public class BuiltinScalarFunctions implements 
FunctionHelper {
             scalar(Cosh.class, "cosh"),
             scalar(CosineDistance.class, "cosine_distance"),
             scalar(CountEqual.class, "countequal"),
+            scalar(CountSubstring.class, "count_substrings"),
             scalar(CreateMap.class, "map"),
             scalar(CreateStruct.class, "struct"),
             scalar(CreateNamedStruct.class, "named_struct"),
@@ -578,6 +585,7 @@ public class BuiltinScalarFunctions implements 
FunctionHelper {
             scalar(CurrentTime.class, "curtime", "current_time"),
             scalar(CurrentUser.class, "current_user"),
             scalar(CutIpv6.class, "cut_ipv6"),
+            scalar(CutToFirstSignificantSubdomain.class, 
"cut_to_first_significant_subdomain"),
             scalar(Database.class, "database", "schema"),
             scalar(Date.class, "date"),
             scalar(DateDiff.class, "datediff"),
@@ -614,6 +622,7 @@ public class BuiltinScalarFunctions implements 
FunctionHelper {
             scalar(ExtractUrlParameter.class, "extract_url_parameter"),
             scalar(Field.class, "field"),
             scalar(FindInSet.class, "find_in_set"),
+            scalar(FirstSignificantSubdomain.class, 
"first_significant_subdomain"),
             scalar(Floor.class, "floor"),
             scalar(Fmod.class, "fmod"),
             scalar(Fpow.class, "fpow"),
@@ -889,6 +898,7 @@ public class BuiltinScalarFunctions implements 
FunctionHelper {
             scalar(ToIso8601.class, "to_iso8601"),
             scalar(Tokenize.class, "tokenize"),
             scalar(ToMonday.class, "to_monday"),
+            scalar(TopLevelDomain.class, "top_level_domain"),
             scalar(ToQuantileState.class, "to_quantile_state"),
             scalar(Translate.class, "translate"),
             scalar(Trim.class, "trim"),
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/BitTest.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/BitTest.java
new file mode 100644
index 00000000000..5c32005c126
--- /dev/null
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/BitTest.java
@@ -0,0 +1,75 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.trees.expressions.functions.scalar;
+
+import org.apache.doris.catalog.FunctionSignature;
+import org.apache.doris.nereids.trees.expressions.Expression;
+import 
org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
+import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable;
+import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression;
+import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
+import org.apache.doris.nereids.types.BigIntType;
+import org.apache.doris.nereids.types.IntegerType;
+import org.apache.doris.nereids.types.LargeIntType;
+import org.apache.doris.nereids.types.SmallIntType;
+import org.apache.doris.nereids.types.TinyIntType;
+import org.apache.doris.nereids.util.ExpressionUtils;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+
+import java.util.List;
+
+/** BitTest function */
+
+public class BitTest extends ScalarFunction
+        implements UnaryExpression, ExplicitlyCastableSignature, 
PropagateNullable {
+    public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
+            
FunctionSignature.ret(TinyIntType.INSTANCE).varArgs(TinyIntType.INSTANCE, 
TinyIntType.INSTANCE),
+            
FunctionSignature.ret(TinyIntType.INSTANCE).varArgs(SmallIntType.INSTANCE, 
SmallIntType.INSTANCE),
+            
FunctionSignature.ret(TinyIntType.INSTANCE).varArgs(IntegerType.INSTANCE, 
IntegerType.INSTANCE),
+            
FunctionSignature.ret(TinyIntType.INSTANCE).varArgs(LargeIntType.INSTANCE, 
LargeIntType.INSTANCE),
+            
FunctionSignature.ret(TinyIntType.INSTANCE).varArgs(BigIntType.INSTANCE, 
BigIntType.INSTANCE));
+
+    /**
+     * constructor with 2 or more arguments.
+     */
+    public BitTest(Expression arg0, Expression arg1, Expression... varArgs) {
+        super("bit_test", ExpressionUtils.mergeArguments(arg0, arg1, varArgs));
+    }
+
+    /**
+     * withChildren.
+     */
+    @Override
+    public BitTest withChildren(List<Expression> children) {
+        Preconditions.checkArgument(children.size() >= 2);
+        return new BitTest(children.get(0), children.get(1),
+                children.subList(2, children.size()).toArray(new 
Expression[0]));
+    }
+
+    @Override
+    public List<FunctionSignature> getSignatures() {
+        return SIGNATURES;
+    }
+
+    @Override
+    public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
+        return visitor.visitBitTest(this, context);
+    }
+}
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/CountSubstring.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/CountSubstring.java
new file mode 100644
index 00000000000..ce7a43cf94b
--- /dev/null
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/CountSubstring.java
@@ -0,0 +1,70 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.trees.expressions.functions.scalar;
+
+import org.apache.doris.catalog.FunctionSignature;
+import org.apache.doris.nereids.trees.expressions.Expression;
+import 
org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
+import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable;
+import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression;
+import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
+import org.apache.doris.nereids.types.IntegerType;
+import org.apache.doris.nereids.types.StringType;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+
+import java.util.List;
+
+/**
+ * ScalarFunction 'count_substrings'.
+ */
+public class CountSubstring extends ScalarFunction
+        implements BinaryExpression, ExplicitlyCastableSignature, 
PropagateNullable {
+
+    public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
+            FunctionSignature.ret(IntegerType.INSTANCE)
+                    .args(StringType.INSTANCE, StringType.INSTANCE)
+    );
+
+    /**
+     * constructor with 2 arguments.
+     */
+    public CountSubstring(Expression arg0, Expression arg1) {
+        super("count_substrings", arg0, arg1);
+    }
+
+    /**
+     * withChildren.
+     */
+    @Override
+    public CountSubstring withChildren(List<Expression> children) {
+        Preconditions.checkArgument(children.size() == 2);
+        return new CountSubstring(children.get(0), children.get(1));
+    }
+
+    @Override
+    public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
+        return visitor.visitCountSubstring(this, context);
+    }
+
+    @Override
+    public List<FunctionSignature> getSignatures() {
+        return SIGNATURES;
+    }
+}
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/CutToFirstSignificantSubdomain.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/CutToFirstSignificantSubdomain.java
new file mode 100644
index 00000000000..a2e77531e43
--- /dev/null
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/CutToFirstSignificantSubdomain.java
@@ -0,0 +1,68 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.trees.expressions.functions.scalar;
+
+import org.apache.doris.catalog.FunctionSignature;
+import org.apache.doris.nereids.trees.expressions.Expression;
+import 
org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
+import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable;
+import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression;
+import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
+import org.apache.doris.nereids.types.StringType;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+
+import java.util.List;
+
+/**
+ * ScalarFunction 'CutToFirstSignificantSubdomain'. This class is generated by 
GenerateFunction.
+ */
+public class CutToFirstSignificantSubdomain extends ScalarFunction
+        implements UnaryExpression, ExplicitlyCastableSignature, 
PropagateNullable {
+
+    public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
+            
FunctionSignature.ret(StringType.INSTANCE).args(StringType.INSTANCE)
+    );
+
+    /**
+     * constructor with 1 argument.
+     */
+    public CutToFirstSignificantSubdomain(Expression arg) {
+        super("cut_to_first_significant_subdomain", arg);
+    }
+
+    /**
+     * withChildren.
+     */
+    @Override
+    public CutToFirstSignificantSubdomain withChildren(List<Expression> 
children) {
+        Preconditions.checkArgument(children.size() == 1);
+        return new CutToFirstSignificantSubdomain(children.get(0));
+    }
+
+    @Override
+    public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
+        return visitor.visitCutToFirstSignificantSubdomain(this, context);
+    }
+
+    @Override
+    public List<FunctionSignature> getSignatures() {
+        return SIGNATURES;
+    }
+}
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/FirstSignificantSubdomain.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/FirstSignificantSubdomain.java
new file mode 100644
index 00000000000..1af4dd96e6d
--- /dev/null
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/FirstSignificantSubdomain.java
@@ -0,0 +1,68 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.trees.expressions.functions.scalar;
+
+import org.apache.doris.catalog.FunctionSignature;
+import org.apache.doris.nereids.trees.expressions.Expression;
+import 
org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
+import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable;
+import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression;
+import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
+import org.apache.doris.nereids.types.StringType;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+
+import java.util.List;
+
+/**
+ * ScalarFunction 'FirstSignificantSubdomain'. This class is generated by 
GenerateFunction.
+ */
+public class FirstSignificantSubdomain extends ScalarFunction
+        implements UnaryExpression, ExplicitlyCastableSignature, 
PropagateNullable {
+
+    public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
+            
FunctionSignature.ret(StringType.INSTANCE).args(StringType.INSTANCE)
+    );
+
+    /**
+     * constructor with 1 argument.
+     */
+    public FirstSignificantSubdomain(Expression arg) {
+        super("first_significant_subdomain", arg);
+    }
+
+    /**
+     * withChildren.
+     */
+    @Override
+    public FirstSignificantSubdomain withChildren(List<Expression> children) {
+        Preconditions.checkArgument(children.size() == 1);
+        return new FirstSignificantSubdomain(children.get(0));
+    }
+
+    @Override
+    public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
+        return visitor.visitFirstSignificantSubdomain(this, context);
+    }
+
+    @Override
+    public List<FunctionSignature> getSignatures() {
+        return SIGNATURES;
+    }
+}
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/TopLevelDomain.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/TopLevelDomain.java
new file mode 100644
index 00000000000..05997659a2e
--- /dev/null
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/TopLevelDomain.java
@@ -0,0 +1,68 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.trees.expressions.functions.scalar;
+
+import org.apache.doris.catalog.FunctionSignature;
+import org.apache.doris.nereids.trees.expressions.Expression;
+import 
org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
+import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable;
+import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression;
+import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
+import org.apache.doris.nereids.types.StringType;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+
+import java.util.List;
+
+/**
+ * ScalarFunction 'TopLevelDomain'. This class is generated by 
GenerateFunction.
+ */
+public class TopLevelDomain extends ScalarFunction
+        implements UnaryExpression, ExplicitlyCastableSignature, 
PropagateNullable {
+
+    public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
+            
FunctionSignature.ret(StringType.INSTANCE).args(StringType.INSTANCE)
+    );
+
+    /**
+     * constructor with 1 argument.
+     */
+    public TopLevelDomain(Expression arg) {
+        super("top_level_domain", arg);
+    }
+
+    /**
+     * withChildren.
+     */
+    @Override
+    public TopLevelDomain withChildren(List<Expression> children) {
+        Preconditions.checkArgument(children.size() == 1);
+        return new TopLevelDomain(children.get(0));
+    }
+
+    @Override
+    public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
+        return visitor.visitTopLevelDomain(this, context);
+    }
+
+    @Override
+    public List<FunctionSignature> getSignatures() {
+        return SIGNATURES;
+    }
+}
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
index 79b8452e1df..0192151ad78 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
@@ -84,6 +84,7 @@ import 
org.apache.doris.nereids.trees.expressions.functions.scalar.BitCount;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.BitLength;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.BitShiftLeft;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.BitShiftRight;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.BitTest;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.BitmapAnd;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.BitmapAndCount;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.BitmapAndNot;
@@ -129,6 +130,7 @@ import 
org.apache.doris.nereids.trees.expressions.functions.scalar.Cos;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Cosh;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.CosineDistance;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.CountEqual;
+import 
org.apache.doris.nereids.trees.expressions.functions.scalar.CountSubstring;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Crc32;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.CreateMap;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.CreateNamedStruct;
@@ -138,6 +140,7 @@ import 
org.apache.doris.nereids.trees.expressions.functions.scalar.CurrentDate;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.CurrentTime;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.CurrentUser;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.CutIpv6;
+import 
org.apache.doris.nereids.trees.expressions.functions.scalar.CutToFirstSignificantSubdomain;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Database;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Date;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.DateDiff;
@@ -175,6 +178,7 @@ import 
org.apache.doris.nereids.trees.expressions.functions.scalar.Exp;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.ExtractUrlParameter;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Field;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.FindInSet;
+import 
org.apache.doris.nereids.trees.expressions.functions.scalar.FirstSignificantSubdomain;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Floor;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Fmod;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Fpow;
@@ -419,6 +423,7 @@ import 
org.apache.doris.nereids.trees.expressions.functions.scalar.ToIso8601;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.ToMonday;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.ToQuantileState;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Tokenize;
+import 
org.apache.doris.nereids.trees.expressions.functions.scalar.TopLevelDomain;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Translate;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Trim;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Truncate;
@@ -827,6 +832,10 @@ public interface ScalarFunctionVisitor<R, C> {
         return visitScalarFunction(bitShiftRight, context);
     }
 
+    default R visitBitTest(BitTest bitTest, C context) {
+        return visitScalarFunction(bitTest, context);
+    }
+
     default R visitCardinality(Cardinality cardinality, C context) {
         return visitScalarFunction(cardinality, context);
     }
@@ -855,6 +864,11 @@ public interface ScalarFunctionVisitor<R, C> {
         return visitScalarFunction(charFunc, context);
     }
 
+    default R 
visitCutToFirstSignificantSubdomain(CutToFirstSignificantSubdomain 
cutToFirstSignificantSubdomain,
+            C context) {
+        return visitScalarFunction(cutToFirstSignificantSubdomain, context);
+    }
+
     default R visitConcatWs(ConcatWs concatWs, C context) {
         return visitScalarFunction(concatWs, context);
     }
@@ -891,6 +905,10 @@ public interface ScalarFunctionVisitor<R, C> {
         return visitScalarFunction(countequal, context);
     }
 
+    default R visitCountSubstring(CountSubstring countSubstring, C context) {
+        return visitScalarFunction(countSubstring, context);
+    }
+
     default R visitCurrentCatalog(CurrentCatalog currentCatalog, C context) {
         return visitScalarFunction(currentCatalog, context);
     }
@@ -1115,6 +1133,10 @@ public interface ScalarFunctionVisitor<R, C> {
         return visitScalarFunction(findInSet, context);
     }
 
+    default R visitFirstSignificantSubdomain(FirstSignificantSubdomain 
firstSignificantSubdomain, C context) {
+        return visitScalarFunction(firstSignificantSubdomain, context);
+    }
+
     default R visitFloor(Floor floor, C context) {
         return visitScalarFunction(floor, context);
     }
@@ -2023,6 +2045,10 @@ public interface ScalarFunctionVisitor<R, C> {
         return visitScalarFunction(tokenize, context);
     }
 
+    default R visitTopLevelDomain(TopLevelDomain topLevelDomain, C context) {
+        return visitScalarFunction(topLevelDomain, context);
+    }
+
     default R visitToQuantileState(ToQuantileState toQuantileState, C context) 
{
         return visitScalarFunction(toQuantileState, context);
     }
diff --git a/gensrc/script/doris_builtins_functions.py 
b/gensrc/script/doris_builtins_functions.py
index a05f6ac8abb..0da5f697100 100644
--- a/gensrc/script/doris_builtins_functions.py
+++ b/gensrc/script/doris_builtins_functions.py
@@ -77,7 +77,12 @@ visible_functions = {
         [['bitnot'], 'LARGEINT', ['LARGEINT'], ''],
         
         [['bit_shift_left'],    'BIGINT',   ['BIGINT',  'TINYINT'],     ''],
-        [['bit_shift_right'],   'BIGINT',   ['BIGINT',  'TINYINT'],     '']
+        [['bit_shift_right'],   'BIGINT',   ['BIGINT',  'TINYINT'],     ''],
+        [['bit_test','bit_test_all'], 'TINYINT', ['TINYINT','TINYINT','...'], 
''],
+        [['bit_test','bit_test_all'], 'TINYINT', 
['SMALLINT','SMALLINT','...'], ''],
+        [['bit_test','bit_test_all'], 'TINYINT', ['INT','INT','...'], ''],
+        [['bit_test','bit_test_all'], 'TINYINT', ['BIGINT','BIGINT','...'], 
''],
+        [['bit_test','bit_test_all'], 'TINYINT', 
['LARGEINT','LARGEINT','...'], '']
     ],
 
     # map functions
@@ -1625,7 +1630,7 @@ visible_functions = {
 
         [['char'], 'VARCHAR', ['VARCHAR', 'INT', '...'], 'ALWAYS_NULLABLE'],
         [['strcmp'], 'INT', ['VARCHAR', 'VARCHAR'], 'DEPEND_ON_ARGUMENT'],
-
+        [['count_substrings'], 'INT', ['STRING', 'STRING'], 
'DEPEND_ON_ARGUMENT'],
         [['substr', 'substring'], 'STRING', ['STRING', 'INT'], 
'DEPEND_ON_ARGUMENT'],
         [['substr', 'substring'], 'STRING', ['STRING', 'INT', 'INT'], 
'DEPEND_ON_ARGUMENT'],
         [['strleft', 'left'], 'STRING', ['STRING', 'INT'], 
'DEPEND_ON_ARGUMENT'],
@@ -2019,7 +2024,10 @@ visible_functions = {
     "Url": [
         [['domain'], 'STRING', ['STRING'], ''],
         [['domain_without_www'], 'STRING', ['STRING'], ''],
-        [['protocol'], 'STRING', ['STRING'], '']
+        [['protocol'], 'STRING', ['STRING'], ''],
+        [['top_level_domain'], 'STRING', ['STRING'], ''],
+        [['cut_to_first_significant_subdomain'], 'STRING', ['STRING'], ''],
+        [['first_significant_subdomain'], 'STRING', ['STRING'], '']
     ],
 
     # search functions
diff --git a/regression-test/data/correctness_p0/test_bit_test_function.out 
b/regression-test/data/correctness_p0/test_bit_test_function.out
new file mode 100644
index 00000000000..365f4d95921
--- /dev/null
+++ b/regression-test/data/correctness_p0/test_bit_test_function.out
@@ -0,0 +1,191 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !bit_test1 --
+0      0
+1      1
+2      0
+3      1
+4      0
+5      1
+6      0
+7      1
+8      0
+9      1
+
+-- !bit_test2 --
+0      0
+1      1
+2      0
+3      1
+4      0
+5      1
+6      0
+7      1
+8      0
+9      1
+
+-- !bit_test3 --
+0      0
+1      0
+2      1
+3      1
+4      0
+5      0
+6      1
+7      1
+8      0
+9      0
+
+-- !bit_test4 --
+0      0
+1      0
+2      0
+3      1
+4      0
+5      0
+6      0
+7      1
+8      0
+9      0
+
+-- !bit_test5 --
+1
+
+-- !bit_test6 --
+1
+
+-- !bit_test7 --
+1
+
+-- !bit_test8 --
+1
+
+-- !bit_test9 --
+1
+
+-- !bit_test10 --
+0
+
+-- !bit_test11 --
+0
+
+-- !bit_test12 --
+0
+
+-- !bit_test13 --
+0
+
+-- !bit_test14 --
+1
+
+-- !bit_test_TINYINT_MAX --
+1
+
+-- !bit_test_TINYINT_MIN --
+0
+
+-- !bit_test_SMALLINT_MAX --
+1
+
+-- !bit_test_SMALLINT_MIN --
+0
+
+-- !bit_test_INT_MAX --
+1
+
+-- !bit_test_INT_MIN --
+0
+
+-- !bit_test_INT64_MAX --
+1
+
+-- !bit_test_INT64_MIN --
+0
+
+-- !bit_test_INT128_MAX --
+1      1
+
+-- !bit_test_INT128_MIN --
+0      0
+
+-- !select1_const --
+\N
+
+-- !select2_const --
+\N
+
+-- !select3_const --
+\N
+
+-- !select4_const --
+\N
+
+-- !select1_null_null --
+1      1       1       0
+2      \N      1       \N
+3      \N      \N      \N
+4      \N      \N      \N
+5      \N      \N      \N
+
+-- !select2_null_not_null --
+1      1       1       0
+2      \N      1       \N
+3      \N      1       \N
+4      \N      1       \N
+5      \N      2147483647      \N
+
+-- !select3_not_null_not_null --
+1      1       1       0
+2      1       1       0
+3      1       1       0
+4      2147483647      1       1
+5      2147483647      2147483647      0
+
+-- !select4_not_null_null --
+1      1       1       0
+2      1       1       0
+3      1       \N      \N
+4      2147483647      \N      \N
+5      2147483647      \N      \N
+
+-- !select5_null_const --
+1      1       1       0
+2      \N      1       \N
+3      \N      1       \N
+4      \N      1       \N
+5      \N      1       \N
+
+-- !select6_not_null_const --
+1      1       1       0
+2      1       1       0
+3      1       1       0
+4      2147483647      1       1
+5      2147483647      1       1
+
+-- !select7_const_null --
+1      6       1       1
+2      6       1       1
+3      6       \N      \N
+4      6       \N      \N
+5      6       \N      \N
+
+-- !select7_const_not_null --
+1      6       1       1
+2      6       1       1
+3      6       1       1
+4      6       1       1
+5      6       2147483647      0
+
+-- !select7_null_null --
+1      1       1       0
+2      \N      1       \N
+3      \N      \N      \N
+4      \N      \N      \N
+5      \N      \N      \N
+
+-- !select7_not_null_not_null --
+1      1       1       0
+2      1       1       0
+3      1       1       0
+4      2147483647      1       1
+5      2147483647      2147483647      0
+
diff --git 
a/regression-test/data/query_p0/sql_functions/string_functions/test_count_substrings.out
 
b/regression-test/data/query_p0/sql_functions/string_functions/test_count_substrings.out
new file mode 100644
index 00000000000..9bee1363c66
--- /dev/null
+++ 
b/regression-test/data/query_p0/sql_functions/string_functions/test_count_substrings.out
@@ -0,0 +1,147 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !select1 --
+\N
+
+-- !select2 --
+\N
+
+-- !select3 --
+\N
+
+-- !select4 --
+2
+
+-- !select5 --
+6
+
+-- !select6 --
+1
+
+-- !select4_empty --
+
+-- !select5_empty --
+
+-- !select6_empty --
+
+-- !select7_empty --
+
+-- !select5_null_null --
+abcde          0
+               0
+       a       0
+\N     \N      \N
+asdasd a       2
+a1b1c1d        1       3
+,,,    #       0
+a,b,c  v       0
+a,b,c, \N      \N
+\N     asd     \N
+a,b,c,12345    5       1
+a,b,c,12345    a       1
+a,你,你,1我2你4我5  你       3
+
+-- !select6_null_not --
+abcde          0
+               0
+       a       0
+\N             \N
+asdasd a       2
+a1b1c1d        1       3
+,,,    #       0
+a,b,c  v       0
+a,b,c,         0
+\N     asd     \N
+a,b,c,12345    5       1
+a,b,c,12345    a       1
+a,你,你,1我2你4我5  我       2
+
+-- !select7_not_null --
+abcde          0
+               0
+       a       0
+       \N      \N
+asdasd a       2
+a1b1c1d        1       3
+,,,    #       0
+a,b,c  v       0
+a,b,c  \N      \N
+       asd     0
+a,b,c,12345    5       1
+a,b,c,12345    a       1
+a你,你,1我2你4我5   你       3
+
+-- !select8_not_not --
+abcde          0
+               0
+       a       0
+               0
+asdasd a       2
+a1b1c1d        1       3
+,,,    #       0
+a,b,c  v       0
+a,b,c          0
+       asd     0
+a,b,c,12345    5       1
+a,b,c,12345    a       1
+a你,你,1我2你4我5   我       2
+
+-- !select9_null_const --
+abcde  a       1
+       a       0
+       a       0
+\N     a       \N
+asdasd a       2
+a1b1c1d        a       1
+,,,    a       0
+a,b,c  a       1
+a,b,c, a       1
+\N     a       \N
+a,b,c,12345    a       1
+a,b,c,12345    a       1
+a,你,你,1我2你4我5  a       1
+
+-- !select10_not_null_const --
+abcde  a       1
+       a       0
+       a       0
+       a       0
+asdasd a       2
+a1b1c1d        a       1
+,,,    a       0
+a,b,c  a       1
+a,b,c  a       1
+       a       0
+a,b,c,12345    a       1
+a,b,c,12345    a       1
+a你,你,1我2你4我5   a       1
+
+-- !select11_const_null --
+a              0
+a              0
+a      a       1
+a      \N      \N
+a      a       1
+a      1       0
+a      #       0
+a      v       0
+a      \N      \N
+a      asd     0
+a      5       0
+a      a       1
+a      你       0
+
+-- !select12_const_not_null --
+a              0
+a              0
+a      a       1
+a              0
+a      a       1
+a      1       0
+a      #       0
+a      v       0
+a              0
+a      asd     0
+a      5       0
+a      a       1
+a      我       0
+
diff --git 
a/regression-test/data/query_p0/sql_functions/string_functions/test_url_functions.out
 
b/regression-test/data/query_p0/sql_functions/string_functions/test_url_functions.out
new file mode 100644
index 00000000000..ce1ef717975
--- /dev/null
+++ 
b/regression-test/data/query_p0/sql_functions/string_functions/test_url_functions.out
@@ -0,0 +1,121 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !empty_nullable1 --
+
+-- !empty_nullable2 --
+
+-- !empty_nullable3 --
+
+-- !empty_not_nullable1 --
+
+-- !empty_not_nullable2 --
+
+-- !empty_not_nullable3 --
+
+-- !empty_null1 --
+\N
+
+-- !empty_null2 --
+\N
+
+-- !empty_null3 --
+\N
+
+-- !empty_const1 --
+com
+
+-- !empty_const2 --
+baidu
+
+-- !empty_const3 --
+baidu.com
+
+-- !empty_const4 --
+cn
+
+-- !empty_const5 --
+google
+
+-- !empty_const6 --
+google.com.cn
+
+-- !empty_const7 --
+
+
+-- !empty_const8 --
+
+
+-- !empty_const9 --
+
+
+-- !nullable1 --
+1      www.baidu.com   com
+10     https://news.clickhouse.com.tr/ tr
+2      www.google.com.cn       cn
+3      invalid url     
+4              
+5              
+6      \N      \N
+7      xxxxxxxx        
+8      http://www.example.com/a/b/c?a=b        com
+9      https://news.clickhouse.com/    com
+
+-- !nullable2 --
+1      www.baidu.com   baidu
+10     https://news.clickhouse.com.tr/ clickhouse
+2      www.google.com.cn       google
+3      invalid url     
+4              
+5              
+6      \N      \N
+7      xxxxxxxx        
+8      http://www.example.com/a/b/c?a=b        example
+9      https://news.clickhouse.com/    clickhouse
+
+-- !nullable3 --
+1      www.baidu.com   baidu.com
+10     https://news.clickhouse.com.tr/ clickhouse.com.tr
+2      www.google.com.cn       google.com.cn
+3      invalid url     
+4              
+5              
+6      \N      \N
+7      xxxxxxxx        
+8      http://www.example.com/a/b/c?a=b        example.com
+9      https://news.clickhouse.com/    clickhouse.com
+
+-- !not_nullable1 --
+1      www.baidu.com   com
+10     https://news.clickhouse.com.tr/ tr
+2      www.google.com.cn       cn
+3      invalid url     
+4              
+5              
+6              
+7      xxxxxxxx        
+8      http://www.example.com/a/b/c?a=b        com
+9      https://news.clickhouse.com/    com
+
+-- !not_nullable2 --
+1      www.baidu.com   baidu
+10     https://news.clickhouse.com.tr/ clickhouse
+2      www.google.com.cn       google
+3      invalid url     
+4              
+5              
+6              
+7      xxxxxxxx        
+8      http://www.example.com/a/b/c?a=b        example
+9      https://news.clickhouse.com/    clickhouse
+
+-- !not_nullable3 --
+1      www.baidu.com   baidu.com
+10     https://news.clickhouse.com.tr/ clickhouse.com.tr
+2      www.google.com.cn       google.com.cn
+3      invalid url     
+4              
+5              
+6              
+7      xxxxxxxx        
+8      http://www.example.com/a/b/c?a=b        example.com
+9      https://news.clickhouse.com/    clickhouse.com
+
diff --git 
a/regression-test/suites/correctness_p0/test_bit_test_function.groovy 
b/regression-test/suites/correctness_p0/test_bit_test_function.groovy
new file mode 100644
index 00000000000..6d2ab6da3a2
--- /dev/null
+++ b/regression-test/suites/correctness_p0/test_bit_test_function.groovy
@@ -0,0 +1,91 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_bit_test_function") {
+    qt_bit_test1 'select number,bit_test(number,0) from numbers("number"="10") 
order by 1;'
+    qt_bit_test2 'select number,bit_test_all(number,0) from 
numbers("number"="10") order by 1;'
+    qt_bit_test3 'select number,bit_test_all(number,1) from 
numbers("number"="10") order by 1;'
+    qt_bit_test4 'select number,bit_test(number,0,1) from 
numbers("number"="10") order by 1;'
+    qt_bit_test5 'select bit_test(cast (-1 as tinyint),0);'    
+    qt_bit_test6 'select bit_test(cast (-1 as smallint),1);'   
+    qt_bit_test7 'select bit_test(cast (-1 as int),2);'        
+    qt_bit_test8 'select bit_test(cast (-1 as bigint),3);'    
+    qt_bit_test9 'select bit_test(cast (-1 as largeint),4);'
+    qt_bit_test10 'select bit_test(10,-1);'
+    qt_bit_test11 'select bit_test(100,-2);'
+    qt_bit_test12 'select bit_test(100,1000);'
+    qt_bit_test13 'select bit_test(-43,1);'
+    qt_bit_test14 'select bit_test(-43,2);'
+    qt_bit_test_TINYINT_MAX 'select bit_test(cast (127 as tinyint),2);'        
          // TINYINT_MAX
+    qt_bit_test_TINYINT_MIN 'select bit_test(cast (-128 as tinyint),4);'       
          // TINYINT_MIN
+    qt_bit_test_SMALLINT_MAX 'select bit_test(cast (32767 as smallint),5);'    
          // SMALLINT_MAX
+    qt_bit_test_SMALLINT_MIN 'select bit_test(cast (-32768 as smallint),10);'  
          // SMALLINT_MIN
+    qt_bit_test_INT_MAX 'select bit_test(cast (2147483647 as int),12);'        
          // INT_MAX
+    qt_bit_test_INT_MIN 'select bit_test(cast (-2147483648 as int),11);'       
           // INT_MIN
+    qt_bit_test_INT64_MAX 'select bit_test(cast (9223372036854775807 as 
bigint),12);'    // INT64_MAX
+    qt_bit_test_INT64_MIN 'select bit_test(cast (-9223372036854775808 as 
bigint),12);'   // INT64_MIN
+    // INT128_MAX
+    qt_bit_test_INT128_MAX """
+        select bit_test(170141183460469231731687303715884105727,13),
+               bit_test(cast (170141183460469231731687303715884105727 as 
largeint),13);
+    """
+    // INT128_MIN
+    qt_bit_test_INT128_MIN """
+        select  bit_test(-170141183460469231731687303715884105728,11),
+                bit_test(cast (-170141183460469231731687303715884105728 as 
largeint),11);
+    """
+    // NULL
+    qt_select1_const "select bit_test(NULL,1);"
+    qt_select2_const "select bit_test(1,NULL);"
+    qt_select3_const "select bit_test(NULL,NULL);"
+    qt_select4_const "select bit_test(111,1,2,3,NULL);"
+
+   sql """DROP TABLE IF EXISTS test_bit_test"""
+   sql """ 
+             CREATE TABLE IF NOT EXISTS test_bit_test (
+               `k1` int(11) NULL COMMENT "",
+               `s1` int(20) NULL COMMENT "",
+               `s2` int(20) NOT NULL COMMENT "",
+               `p1` int(20) NULL COMMENT "",
+               `p2` int(20) NOT NULL COMMENT ""
+             ) ENGINE=OLAP
+             DUPLICATE KEY(`k1`)
+             DISTRIBUTED BY HASH(`k1`) BUCKETS 1
+             PROPERTIES (
+             "replication_allocation" = "tag.location.default: 1",
+             "storage_format" = "V2"
+             )
+    """
+    sql """ INSERT INTO test_bit_test VALUES(1, 1, 1, 1, 1); """
+    sql """ INSERT INTO test_bit_test VALUES(2, NULL, 1, 1, 1); """
+    sql """ INSERT INTO test_bit_test VALUES(3, NULL, 1, NULL, 1); """
+    sql """ INSERT INTO test_bit_test VALUES(4, NULL, 2147483647, NULL, 1); """
+    sql """ INSERT INTO test_bit_test VALUES(5, NULL, 2147483647, NULL, 
2147483647); """
+
+    
+    // null and not_null combine
+    qt_select1_null_null "select k1,s1,p1,bit_test(s1, p1) from test_bit_test 
order by k1;"
+    qt_select2_null_not_null "select k1,s1,p2,bit_test(s1, p2) from 
test_bit_test order by k1;"
+    qt_select3_not_null_not_null "select k1,s2,p2,bit_test(s2, p2) from 
test_bit_test order by k1;"
+    qt_select4_not_null_null "select k1,s2,p1,bit_test(s2, p1) from 
test_bit_test order by k1;"
+    qt_select5_null_const "select k1,s1,1,bit_test(s1, 1) from test_bit_test 
order by k1;"
+    qt_select6_not_null_const "select k1,s2,1,bit_test(s2, 1) from 
test_bit_test order by k1;"
+    qt_select7_const_null "select k1,6,p1,bit_test(6, p1) from test_bit_test 
order by k1;"
+    qt_select7_const_not_null "select k1,6,p2,bit_test(6, p2) from 
test_bit_test order by k1;"
+    qt_select7_null_null "select k1,s1,p1,bit_test(s1, p1,1,2,3) from 
test_bit_test order by k1;"
+    qt_select7_not_null_not_null "select k1,s2,p2,bit_test(s2, p2,1,2,3) from 
test_bit_test order by k1;"
+}
diff --git 
a/regression-test/suites/query_p0/sql_functions/string_functions/test_count_substrings.groovy
 
b/regression-test/suites/query_p0/sql_functions/string_functions/test_count_substrings.groovy
new file mode 100644
index 00000000000..64051ec7afc
--- /dev/null
+++ 
b/regression-test/suites/query_p0/sql_functions/string_functions/test_count_substrings.groovy
@@ -0,0 +1,76 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_count_substrings") {
+    // const / NULL
+    qt_select1 "select count_substrings(NULL,NULL);"
+    qt_select2 "select count_substrings('a12bc23de345f',NULL);"
+    qt_select3 "select count_substrings(NULL, 'a12bc23de345f');"
+    qt_select4 "select count_substrings('a12bc23de345f','2');"
+    qt_select5 "select count_substrings('a1你你c我你3我d你3你5你','你');"
+    qt_select6 "select count_substrings('ccc','cc');"
+
+    sql """DROP TABLE IF EXISTS test_count_substrings"""
+    sql """ 
+            CREATE TABLE IF NOT EXISTS test_count_substrings (
+              `k1` int(11) NULL COMMENT "",
+              `s1` varchar(30) NULL COMMENT "",
+              `s2` varchar(30) NOT NULL COMMENT "",
+              `p1` varchar(30) NULL COMMENT "",
+              `p2` varchar(30) NOT NULL COMMENT ""
+            ) ENGINE=OLAP
+            DUPLICATE KEY(`k1`)
+            DISTRIBUTED BY HASH(`k1`) BUCKETS 1
+            PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1",
+            "storage_format" = "V2"
+            )
+        """
+    // empty
+    qt_select4_empty "select count_substrings(s1,p1) from 
test_count_substrings;"
+    qt_select5_empty "select count_substrings(s2,p2) from 
test_count_substrings;"
+    qt_select6_empty "select count_substrings(s1,p2) from 
test_count_substrings;"
+    qt_select7_empty "select count_substrings(s2,p1) from 
test_count_substrings;"
+
+    // some normal/special/null value
+    sql """ INSERT INTO test_count_substrings VALUES(1, 'abcde', 'abcde', '', 
'') """
+    sql """ INSERT INTO test_count_substrings VALUES(2, '', '', '', '') """
+    sql """ INSERT INTO test_count_substrings VALUES(3, '', '','a','a') """
+    sql """ INSERT INTO test_count_substrings VALUES(4, NULL, '', NULL,'') """
+    sql """ INSERT INTO test_count_substrings VALUES(5, 'asdasd', 
'asdasd','a','a') """
+    sql """ INSERT INTO test_count_substrings VALUES(6, 'a1b1c1d', 
'a1b1c1d','1','1') """
+    sql """ INSERT INTO test_count_substrings VALUES(7, ',,,', ',,,','#','#') 
"""
+    sql """ INSERT INTO test_count_substrings VALUES(8, 'a,b,c', 
'a,b,c','v','v') """
+    sql """ INSERT INTO test_count_substrings VALUES(9, 'a,b,c,', 
'a,b,c',NULL,'') """
+    sql """ INSERT INTO test_count_substrings VALUES(10, NULL, '','asd','asd') 
"""
+    sql """ INSERT INTO test_count_substrings VALUES(11, 'a,b,c,12345', 
'a,b,c,12345','5','5') """
+    sql """ INSERT INTO test_count_substrings VALUES(12, 'a,b,c,12345', 
'a,b,c,12345','a','a') """
+    sql """ INSERT INTO test_count_substrings VALUES(13, 'a,你,你,1我2你4我5', 
'a你,你,1我2你4我5','你','我') """
+
+    // null and not_null combine
+    qt_select5_null_null "select s1,p1,count_substrings(s1, p1) from 
test_count_substrings order by k1;"
+    qt_select6_null_not "select s1, p2,count_substrings(s1, p2) from 
test_count_substrings order by k1;"
+    qt_select7_not_null "select s2, p1,count_substrings(s2, p1) from 
test_count_substrings order by k1;"
+    qt_select8_not_not "select s2, p2,count_substrings(s2, p2) from 
test_count_substrings order by k1;"
+
+    // null const combine
+    qt_select9_null_const "select s1, 'a',count_substrings(s1, 'a') from 
test_count_substrings order by k1;"
+    qt_select10_not_null_const "select s2, 'a',count_substrings(s2, 'a') from 
test_count_substrings order by k1;"
+    qt_select11_const_null "select 'a',p1,count_substrings('a', p1) from 
test_count_substrings order by k1;"
+    qt_select12_const_not_null "select 'a',p2,count_substrings('a', p2) from 
test_count_substrings order by k1;"
+}
+
diff --git 
a/regression-test/suites/query_p0/sql_functions/string_functions/test_url_functions.groovy
 
b/regression-test/suites/query_p0/sql_functions/string_functions/test_url_functions.groovy
new file mode 100644
index 00000000000..389020b63e2
--- /dev/null
+++ 
b/regression-test/suites/query_p0/sql_functions/string_functions/test_url_functions.groovy
@@ -0,0 +1,79 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_url_functions") {
+    sql " drop table if exists test_url_functions"
+    sql """
+        create table test_url_functions (
+            id int,
+            s1 string not null,
+            s2 string null
+        )
+        DISTRIBUTED BY HASH(id)
+        PROPERTIES
+        (
+            "replication_num" = "1"
+        );
+    """
+
+    //empty table
+    order_qt_empty_nullable1 "select top_level_domain(s2) from 
test_url_functions"
+    order_qt_empty_nullable2 "select first_significant_subdomain(s2) from 
test_url_functions"
+    order_qt_empty_nullable3 "select cut_to_first_significant_subdomain(s2) 
from test_url_functions"
+    order_qt_empty_not_nullable1 "select top_level_domain(s1) from 
test_url_functions"
+    order_qt_empty_not_nullable2 "select first_significant_subdomain(s1) from 
test_url_functions"
+    order_qt_empty_not_nullable3 "select 
cut_to_first_significant_subdomain(s1) from test_url_functions"
+
+    //null / const
+    order_qt_empty_null1 "select top_level_domain(NULL)"
+    order_qt_empty_null2 "select first_significant_subdomain(NULL)"
+    order_qt_empty_null3 "select cut_to_first_significant_subdomain(NULL)"
+    
+    //vaild url
+    order_qt_empty_const1 "select top_level_domain('www.baidu.com')"
+    order_qt_empty_const2 "select first_significant_subdomain('www.baidu.com')"
+    order_qt_empty_const3 "select 
cut_to_first_significant_subdomain('www.baidu.com')"
+    order_qt_empty_const4 "select top_level_domain('www.google.com.cn')"
+    order_qt_empty_const5 "select 
first_significant_subdomain('www.google.com.cn')"
+    order_qt_empty_const6 "select 
cut_to_first_significant_subdomain('www.google.com.cn')"
+    
+    //invaild url
+    order_qt_empty_const7 "select top_level_domain('I am invaild url')"
+    order_qt_empty_const8 "select first_significant_subdomain('I am invaild 
url')"
+    order_qt_empty_const9 "select cut_to_first_significant_subdomain('I am 
invaild url')"
+    
+
+    sql """ insert into test_url_functions values (1, 'www.baidu.com', 
'www.baidu.com'); """
+    sql """ insert into test_url_functions values (2, 'www.google.com.cn', 
'www.google.com.cn'); """
+    sql """ insert into test_url_functions values (3, 'invalid url', 'invalid 
url'); """
+    sql """ insert into test_url_functions values (4, '', ''); """
+    sql """ insert into test_url_functions values (5, ' ', ' '); """
+    sql """ insert into test_url_functions values (6, ' ', NULL); """
+    sql """ insert into test_url_functions values (7, 'xxxxxxxx', 'xxxxxxxx'); 
"""
+    sql """ insert into test_url_functions values (8, 
'http://www.example.com/a/b/c?a=b', 'http://www.example.com/a/b/c?a=b'); """
+    sql """ insert into test_url_functions values (9, 
'https://news.clickhouse.com/', 'https://news.clickhouse.com/'); """
+    sql """ insert into test_url_functions values (10, 
'https://news.clickhouse.com.tr/', 'https://news.clickhouse.com.tr/'); """
+
+    order_qt_nullable1 "select id,s2,top_level_domain(s2) from 
test_url_functions order by id"
+    order_qt_nullable2 "select id,s2,first_significant_subdomain(s2) from 
test_url_functions order by id"
+    order_qt_nullable3 "select id,s2,cut_to_first_significant_subdomain(s2) 
from test_url_functions order by id"
+
+    order_qt_not_nullable1 "select id,s1,top_level_domain(s1) from 
test_url_functions order by id"
+    order_qt_not_nullable2 "select id,s1,first_significant_subdomain(s1) from 
test_url_functions order by id"
+    order_qt_not_nullable3 "select 
id,s1,cut_to_first_significant_subdomain(s1) from test_url_functions order by 
id"
+
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to