This is an automated email from the ASF dual-hosted git repository.

jianliangqi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new b62c5a70c7 [fix](match query) fix array column match query failed 
without inverted index (#20344)
b62c5a70c7 is described below

commit b62c5a70c7400dc3e0e4e5776e9ab8854b7c333b
Author: YueW <45946325+tany...@users.noreply.github.com>
AuthorDate: Fri Jun 2 21:10:12 2023 +0800

    [fix](match query) fix array column match query failed without inverted 
index (#20344)
---
 be/src/vec/functions/match.cpp                     | 111 ++++++++++++++++-----
 be/src/vec/functions/match.h                       |  42 ++++++--
 .../data/inverted_index_p0/test_array_index.out    |  12 +++
 .../inverted_index_p0/test_array_index.groovy      |   6 ++
 4 files changed, 138 insertions(+), 33 deletions(-)

diff --git a/be/src/vec/functions/match.cpp b/be/src/vec/functions/match.cpp
index fe55bf9a5a..77de502107 100644
--- a/be/src/vec/functions/match.cpp
+++ b/be/src/vec/functions/match.cpp
@@ -17,7 +17,6 @@
 
 #include "vec/functions/match.h"
 
-#include "olap/rowset/segment_v2/inverted_index_reader.h"
 #include "runtime/query_context.h"
 #include "runtime/runtime_state.h"
 
@@ -36,19 +35,45 @@ Status FunctionMatchBase::execute_impl(FunctionContext* 
context, Block& block,
         InvertedIndexCtx* inverted_index_ctx = 
reinterpret_cast<InvertedIndexCtx*>(
                 context->get_function_state(FunctionContext::THREAD_LOCAL));
 
-        const auto values_col =
+        const ColumnPtr source_col =
                 
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
-        const auto* values = 
check_and_get_column<ColumnString>(values_col.get());
+        const auto* values = 
check_and_get_column<ColumnString>(source_col.get());
+        const ColumnArray* array_col = nullptr;
+        if (source_col->is_column_array()) {
+            array_col = check_and_get_column<ColumnArray>(source_col.get());
+            if (array_col && !array_col->get_data().is_column_string()) {
+                return Status::NotSupported(
+                        fmt::format("unsupported nested array of type {} for 
function {}",
+                                    is_column_nullable(array_col->get_data())
+                                            ? array_col->get_data().get_name()
+                                            : 
array_col->get_data().get_family_name(),
+                                    get_name()));
+            }
+
+            if (is_column_nullable(array_col->get_data())) {
+                const auto& array_nested_null_column =
+                        reinterpret_cast<const 
ColumnNullable&>(array_col->get_data());
+                values = check_and_get_column<ColumnString>(
+                        *(array_nested_null_column.get_nested_column_ptr()));
+            } else {
+                values = 
check_and_get_column<ColumnString>(*(array_col->get_data_ptr()));
+            }
+        } else if (auto* nullable = 
check_and_get_column<ColumnNullable>(source_col.get())) {
+            values = 
check_and_get_column<ColumnString>(*nullable->get_nested_column_ptr());
+        }
+
         if (!values) {
-            return Status::InternalError("Not supported input arguments 
types");
+            LOG(WARNING) << "Illegal column " << source_col->get_name();
+            return Status::InternalError("Not supported input column types");
         }
         // result column
         auto res = ColumnUInt8::create();
         ColumnUInt8::Container& vec_res = res->get_data();
         // set default value to 0, and match functions only need to set 1/true
         vec_res.resize_fill(input_rows_count);
-        RETURN_IF_ERROR(execute_match(column_name, match_query_str, 
input_rows_count, values,
-                                      inverted_index_ctx, vec_res));
+        RETURN_IF_ERROR(execute_match(
+                column_name, match_query_str, input_rows_count, values, 
inverted_index_ctx,
+                (array_col ? &(array_col->get_offsets()) : nullptr), vec_res));
         block.replace_by_position(result, std::move(res));
     } else {
         auto match_pred_column =
@@ -59,10 +84,46 @@ Status FunctionMatchBase::execute_impl(FunctionContext* 
context, Block& block,
     return Status::OK();
 }
 
+inline doris::segment_v2::InvertedIndexQueryType 
FunctionMatchBase::get_query_type_from_fn_name() {
+    std::string fn_name = get_name();
+    if (fn_name == MATCH_ANY_FUNCTION) {
+        return doris::segment_v2::InvertedIndexQueryType::MATCH_ANY_QUERY;
+    } else if (fn_name == MATCH_ALL_FUNCTION) {
+        return doris::segment_v2::InvertedIndexQueryType::MATCH_ALL_QUERY;
+    } else if (fn_name == MATCH_PHRASE_FUNCTION) {
+        return doris::segment_v2::InvertedIndexQueryType::MATCH_PHRASE_QUERY;
+    }
+    return doris::segment_v2::InvertedIndexQueryType::UNKNOWN_QUERY;
+}
+
+inline std::vector<std::wstring> FunctionMatchBase::analyse_data_token(
+        const std::string& column_name, InvertedIndexCtx* inverted_index_ctx,
+        const ColumnString* string_col, int32_t current_block_row_idx,
+        const ColumnArray::Offsets64* array_offsets, int32_t& 
current_src_array_offset) {
+    std::vector<std::wstring> data_tokens;
+    auto query_type = get_query_type_from_fn_name();
+    if (array_offsets) {
+        for (auto next_src_array_offset = 
(*array_offsets)[current_block_row_idx];
+             current_src_array_offset < next_src_array_offset; 
++current_src_array_offset) {
+            const auto& str_ref = 
string_col->get_data_at(current_src_array_offset);
+            std::vector<std::wstring> element_tokens =
+                    doris::segment_v2::InvertedIndexReader::get_analyse_result(
+                            column_name, str_ref.to_string(), query_type, 
inverted_index_ctx);
+            data_tokens.insert(data_tokens.end(), element_tokens.begin(), 
element_tokens.end());
+        }
+    } else {
+        const auto& str_ref = string_col->get_data_at(current_block_row_idx);
+        data_tokens = 
doris::segment_v2::InvertedIndexReader::get_analyse_result(
+                column_name, str_ref.to_string(), query_type, 
inverted_index_ctx);
+    }
+    return data_tokens;
+}
+
 Status FunctionMatchAny::execute_match(const std::string& column_name,
                                        const std::string& match_query_str, 
size_t input_rows_count,
-                                       const ColumnString* datas,
+                                       const ColumnString* string_col,
                                        InvertedIndexCtx* inverted_index_ctx,
+                                       const ColumnArray::Offsets64* 
array_offsets,
                                        ColumnUInt8::Container& result) {
     doris::InvertedIndexParserType parser_type = 
doris::InvertedIndexParserType::PARSER_UNKNOWN;
     if (inverted_index_ctx) {
@@ -79,13 +140,13 @@ Status FunctionMatchAny::execute_match(const std::string& 
column_name,
                      << ", please check your query sql";
         return Status::Error<ErrorCode::INVERTED_INDEX_NO_TERMS>();
     }
+
+    auto current_src_array_offset = 0;
     for (int i = 0; i < input_rows_count; i++) {
-        const auto& str_ref = datas->get_data_at(i);
         std::vector<std::wstring> data_tokens =
-                doris::segment_v2::InvertedIndexReader::get_analyse_result(
-                        column_name, str_ref.to_string(),
-                        
doris::segment_v2::InvertedIndexQueryType::MATCH_ANY_QUERY,
-                        inverted_index_ctx);
+                analyse_data_token(column_name, inverted_index_ctx, 
string_col, i, array_offsets,
+                                   current_src_array_offset);
+
         // TODO: more efficient impl
         for (auto& token : query_tokens) {
             auto it = std::find(data_tokens.begin(), data_tokens.end(), token);
@@ -101,8 +162,9 @@ Status FunctionMatchAny::execute_match(const std::string& 
column_name,
 
 Status FunctionMatchAll::execute_match(const std::string& column_name,
                                        const std::string& match_query_str, 
size_t input_rows_count,
-                                       const ColumnString* datas,
+                                       const ColumnString* string_col,
                                        InvertedIndexCtx* inverted_index_ctx,
+                                       const ColumnArray::Offsets64* 
array_offsets,
                                        ColumnUInt8::Container& result) {
     doris::InvertedIndexParserType parser_type = 
doris::InvertedIndexParserType::PARSER_UNKNOWN;
     if (inverted_index_ctx) {
@@ -119,13 +181,13 @@ Status FunctionMatchAll::execute_match(const std::string& 
column_name,
                      << ", please check your query sql";
         return Status::Error<ErrorCode::INVERTED_INDEX_NO_TERMS>();
     }
+
+    auto current_src_array_offset = 0;
     for (int i = 0; i < input_rows_count; i++) {
-        const auto& str_ref = datas->get_data_at(i);
         std::vector<std::wstring> data_tokens =
-                doris::segment_v2::InvertedIndexReader::get_analyse_result(
-                        column_name, str_ref.to_string(),
-                        
doris::segment_v2::InvertedIndexQueryType::MATCH_ALL_QUERY,
-                        inverted_index_ctx);
+                analyse_data_token(column_name, inverted_index_ctx, 
string_col, i, array_offsets,
+                                   current_src_array_offset);
+
         // TODO: more efficient impl
         auto find_count = 0;
         for (auto& token : query_tokens) {
@@ -147,8 +209,9 @@ Status FunctionMatchAll::execute_match(const std::string& 
column_name,
 
 Status FunctionMatchPhrase::execute_match(const std::string& column_name,
                                           const std::string& match_query_str,
-                                          size_t input_rows_count, const 
ColumnString* datas,
+                                          size_t input_rows_count, const 
ColumnString* string_col,
                                           InvertedIndexCtx* inverted_index_ctx,
+                                          const ColumnArray::Offsets64* 
array_offsets,
                                           ColumnUInt8::Container& result) {
     doris::InvertedIndexParserType parser_type = 
doris::InvertedIndexParserType::PARSER_UNKNOWN;
     if (inverted_index_ctx) {
@@ -166,13 +229,13 @@ Status FunctionMatchPhrase::execute_match(const 
std::string& column_name,
                      << ", please check your query sql";
         return Status::Error<ErrorCode::INVERTED_INDEX_NO_TERMS>();
     }
+
+    auto current_src_array_offset = 0;
     for (int i = 0; i < input_rows_count; i++) {
-        const auto& str_ref = datas->get_data_at(i);
         std::vector<std::wstring> data_tokens =
-                doris::segment_v2::InvertedIndexReader::get_analyse_result(
-                        column_name, str_ref.to_string(),
-                        
doris::segment_v2::InvertedIndexQueryType::MATCH_PHRASE_QUERY,
-                        inverted_index_ctx);
+                analyse_data_token(column_name, inverted_index_ctx, 
string_col, i, array_offsets,
+                                   current_src_array_offset);
+
         // TODO: more efficient impl
         bool matched = false;
         auto it = data_tokens.begin();
diff --git a/be/src/vec/functions/match.h b/be/src/vec/functions/match.h
index 3fcce4ebf1..dda00bb56c 100644
--- a/be/src/vec/functions/match.h
+++ b/be/src/vec/functions/match.h
@@ -31,8 +31,10 @@
 #include "common/logging.h"
 #include "common/status.h"
 #include "olap/inverted_index_parser.h"
+#include "olap/rowset/segment_v2/inverted_index_reader.h"
 #include "vec/aggregate_functions/aggregate_function.h"
 #include "vec/columns/column.h"
+#include "vec/columns/column_array.h"
 #include "vec/core/block.h"
 #include "vec/core/column_numbers.h"
 #include "vec/core/column_with_type_and_name.h"
@@ -48,6 +50,10 @@ class FunctionContext;
 
 namespace doris::vectorized {
 
+const std::string MATCH_ANY_FUNCTION = "match_any";
+const std::string MATCH_ALL_FUNCTION = "match_all";
+const std::string MATCH_PHRASE_FUNCTION = "match_phrase";
+
 class FunctionMatchBase : public IFunction {
 public:
     size_t get_number_of_arguments() const override { return 2; }
@@ -63,9 +69,19 @@ public:
                         size_t result, size_t input_rows_count) override;
 
     virtual Status execute_match(const std::string& column_name, const 
std::string& match_query_str,
-                                 size_t input_rows_count, const ColumnString* 
datas,
+                                 size_t input_rows_count, const ColumnString* 
string_col,
                                  InvertedIndexCtx* inverted_index_ctx,
+                                 const ColumnArray::Offsets64* array_offsets,
                                  ColumnUInt8::Container& result) = 0;
+
+    doris::segment_v2::InvertedIndexQueryType get_query_type_from_fn_name();
+
+    std::vector<std::wstring> analyse_data_token(const std::string& 
column_name,
+                                                 InvertedIndexCtx* 
inverted_index_ctx,
+                                                 const ColumnString* 
string_col,
+                                                 int32_t current_block_row_idx,
+                                                 const ColumnArray::Offsets64* 
array_offsets,
+                                                 int32_t& 
current_src_array_offset);
 };
 
 class FunctionMatchAny : public FunctionMatchBase {
@@ -76,8 +92,9 @@ public:
     String get_name() const override { return name; }
 
     virtual Status execute_match(const std::string& column_name, const 
std::string& match_query_str,
-                                 size_t input_rows_count, const ColumnString* 
datas,
+                                 size_t input_rows_count, const ColumnString* 
string_col,
                                  InvertedIndexCtx* inverted_index_ctx,
+                                 const ColumnArray::Offsets64* array_offsets,
                                  ColumnUInt8::Container& result) override;
 };
 
@@ -89,8 +106,9 @@ public:
     String get_name() const override { return name; }
 
     virtual Status execute_match(const std::string& column_name, const 
std::string& match_query_str,
-                                 size_t input_rows_count, const ColumnString* 
datas,
+                                 size_t input_rows_count, const ColumnString* 
string_col,
                                  InvertedIndexCtx* inverted_index_ctx,
+                                 const ColumnArray::Offsets64* array_offsets,
                                  ColumnUInt8::Container& result) override;
 };
 
@@ -102,8 +120,9 @@ public:
     String get_name() const override { return name; }
 
     virtual Status execute_match(const std::string& column_name, const 
std::string& match_query_str,
-                                 size_t input_rows_count, const ColumnString* 
datas,
+                                 size_t input_rows_count, const ColumnString* 
string_col,
                                  InvertedIndexCtx* inverted_index_ctx,
+                                 const ColumnArray::Offsets64* array_offsets,
                                  ColumnUInt8::Container& result) override;
 };
 
@@ -115,8 +134,9 @@ public:
     String get_name() const override { return name; }
 
     virtual Status execute_match(const std::string& column_name, const 
std::string& match_query_str,
-                                 size_t input_rows_count, const ColumnString* 
datas,
+                                 size_t input_rows_count, const ColumnString* 
string_col,
                                  InvertedIndexCtx* inverted_index_ctx,
+                                 const ColumnArray::Offsets64* array_offsets,
                                  ColumnUInt8::Container& result) override {
         return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>();
     }
@@ -130,8 +150,9 @@ public:
     String get_name() const override { return name; }
 
     virtual Status execute_match(const std::string& column_name, const 
std::string& match_query_str,
-                                 size_t input_rows_count, const ColumnString* 
datas,
+                                 size_t input_rows_count, const ColumnString* 
string_col,
                                  InvertedIndexCtx* inverted_index_ctx,
+                                 const ColumnArray::Offsets64* array_offsets,
                                  ColumnUInt8::Container& result) override {
         return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>();
     }
@@ -145,8 +166,9 @@ public:
     String get_name() const override { return name; }
 
     virtual Status execute_match(const std::string& column_name, const 
std::string& match_query_str,
-                                 size_t input_rows_count, const ColumnString* 
datas,
+                                 size_t input_rows_count, const ColumnString* 
string_col,
                                  InvertedIndexCtx* inverted_index_ctx,
+                                 const ColumnArray::Offsets64* array_offsets,
                                  ColumnUInt8::Container& result) override {
         return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>();
     }
@@ -160,8 +182,9 @@ public:
     String get_name() const override { return name; }
 
     virtual Status execute_match(const std::string& column_name, const 
std::string& match_query_str,
-                                 size_t input_rows_count, const ColumnString* 
datas,
+                                 size_t input_rows_count, const ColumnString* 
string_col,
                                  InvertedIndexCtx* inverted_index_ctx,
+                                 const ColumnArray::Offsets64* array_offsets,
                                  ColumnUInt8::Container& result) override {
         return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>();
     }
@@ -175,8 +198,9 @@ public:
     String get_name() const override { return name; }
 
     virtual Status execute_match(const std::string& column_name, const 
std::string& match_query_str,
-                                 size_t input_rows_count, const ColumnString* 
datas,
+                                 size_t input_rows_count, const ColumnString* 
string_col,
                                  InvertedIndexCtx* inverted_index_ctx,
+                                 const ColumnArray::Offsets64* array_offsets,
                                  ColumnUInt8::Container& result) override {
         return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>();
     }
diff --git a/regression-test/data/inverted_index_p0/test_array_index.out 
b/regression-test/data/inverted_index_p0/test_array_index.out
index 4fd98fbd3d..0d7529c8b6 100644
--- a/regression-test/data/inverted_index_p0/test_array_index.out
+++ b/regression-test/data/inverted_index_p0/test_array_index.out
@@ -56,3 +56,15 @@
 -- !sql --
 4      [40, 50, 60]    \N
 
+-- !sql --
+1      [10, 20, 30]    ["i", "love", "china"]
+
+-- !sql --
+1      [10, 20, 30]    ["i", "love", "china"]
+2      [20, 30, 40]    ["i", "love", "north korea"]
+
+-- !sql --
+2      [20, 30, 40]    ["i", "love", "north korea"]
+
+-- !sql --
+2      [20, 30, 40]    ["i", "love", "north korea"]
diff --git a/regression-test/suites/inverted_index_p0/test_array_index.groovy 
b/regression-test/suites/inverted_index_p0/test_array_index.groovy
index 1811a44b22..7fd95f5ad1 100644
--- a/regression-test/suites/inverted_index_p0/test_array_index.groovy
+++ b/regression-test/suites/inverted_index_p0/test_array_index.groovy
@@ -64,4 +64,10 @@ suite("test_array_index"){
     qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 40 ORDER BY 
id;"
     qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 50 ORDER BY 
id;"
     qt_sql "SELECT * FROM $indexTblName WHERE int_array element_eq 60 ORDER BY 
id;"
+
+    sql " ALTER TABLE $indexTblName drop index c_array_idx; "
+    qt_sql "SELECT * FROM $indexTblName WHERE c_array MATCH 'china' ORDER BY 
id;"
+    qt_sql "SELECT * FROM $indexTblName WHERE c_array MATCH 'love' ORDER BY 
id;"
+    qt_sql "SELECT * FROM $indexTblName WHERE c_array MATCH 'north' ORDER BY 
id;"
+    qt_sql "SELECT * FROM $indexTblName WHERE c_array MATCH 'korea' ORDER BY 
id;"
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to