This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-1.2-lts by this push:
new 8df05d6d81 [Enhencement](like) fallback to re2 if hyperscan failed
pick #18350 (#18468)
8df05d6d81 is described below
commit 8df05d6d81036b08673676e7757eab5d45fab37f
Author: Kang <[email protected]>
AuthorDate: Sun Apr 9 09:19:56 2023 +0800
[Enhencement](like) fallback to re2 if hyperscan failed pick #18350 (#18468)
cherry-pick #18350
---
be/src/vec/functions/like.cpp | 218 ++++++++++++++++++++++++++++++------------
1 file changed, 157 insertions(+), 61 deletions(-)
diff --git a/be/src/vec/functions/like.cpp b/be/src/vec/functions/like.cpp
index 629e8fe1fc..9f4b10bae2 100644
--- a/be/src/vec/functions/like.cpp
+++ b/be/src/vec/functions/like.cpp
@@ -48,17 +48,26 @@ Status LikeSearchState::clone(LikeSearchState& cloned) {
cloned.escape_char = escape_char;
cloned.set_search_string(search_string);
- if (hs_database) {
- std::string re_pattern;
- FunctionLike::convert_like_pattern(this, pattern_str, &re_pattern);
-
+ std::string re_pattern;
+ FunctionLike::convert_like_pattern(this, pattern_str, &re_pattern);
+ if (hs_database) { // use hyperscan
hs_database_t* database = nullptr;
hs_scratch_t* scratch = nullptr;
RETURN_IF_ERROR(FunctionLike::hs_prepare(nullptr, re_pattern.c_str(),
&database, &scratch));
-
cloned.hs_database.reset(database);
cloned.hs_scratch.reset(scratch);
- }
+ } else { // fallback to re2
+ cloned.hs_database.reset();
+ cloned.hs_scratch.reset();
+
+ RE2::Options opts;
+ opts.set_never_nl(false);
+ opts.set_dot_nl(true);
+ cloned.regex = std::make_unique<RE2>(re_pattern, opts);
+ if (!cloned.regex->ok()) {
+ return Status::InternalError("Invalid regex expression: {}",
re_pattern);
+ }
+ }
return Status::OK();
}
@@ -200,10 +209,14 @@ Status
FunctionLikeBase::constant_substring_fn_scalar(LikeSearchState* state, co
Status FunctionLikeBase::constant_regex_fn_scalar(LikeSearchState* state,
const StringRef& val,
const StringValue& pattern,
unsigned char* result) {
- auto ret = hs_scan(state->hs_database.get(), val.data, val.size, 0,
state->hs_scratch.get(),
- state->hs_match_handler, (void*)result);
- if (ret != HS_SUCCESS && ret != HS_SCAN_TERMINATED) {
- return Status::RuntimeError(fmt::format("hyperscan error: {}", ret));
+ if (state->hs_database) { // use hyperscan
+ auto ret = hs_scan(state->hs_database.get(), val.data, val.size, 0,
state->hs_scratch.get(),
+ state->hs_match_handler, (void*)result);
+ if (ret != HS_SUCCESS && ret != HS_SCAN_TERMINATED) {
+ return Status::RuntimeError(fmt::format("hyperscan error: {}",
ret));
+ }
+ } else { // fallback to re2
+ *result = RE2::PartialMatch(re2::StringPiece(val.data, val.size),
*state->regex.get());
}
return Status::OK();
@@ -215,16 +228,26 @@ Status
FunctionLikeBase::regexp_fn_scalar(LikeSearchState* state, const StringRe
hs_database_t* database = nullptr;
hs_scratch_t* scratch = nullptr;
- RETURN_IF_ERROR(hs_prepare(nullptr, re_pattern.c_str(), &database,
&scratch));
-
- auto ret = hs_scan(database, val.data, val.size, 0, scratch,
state->hs_match_handler,
- (void*)result);
- if (ret != HS_SUCCESS && ret != HS_SCAN_TERMINATED) {
- return Status::RuntimeError(fmt::format("hyperscan error: {}", ret));
- }
+ if (hs_prepare(nullptr, re_pattern.c_str(), &database, &scratch).ok()) {
// use hyperscan
+ auto ret = hs_scan(database, val.data, val.size, 0, scratch,
state->hs_match_handler,
+ (void*)result);
+ if (ret != HS_SUCCESS && ret != HS_SCAN_TERMINATED) {
+ return Status::RuntimeError(fmt::format("hyperscan error: {}",
ret));
+ }
- hs_free_scratch(scratch);
- hs_free_database(database);
+ hs_free_scratch(scratch);
+ hs_free_database(database);
+ } else { // fallback to re2
+ RE2::Options opts;
+ opts.set_never_nl(false);
+ opts.set_dot_nl(true);
+ re2::RE2 re(re_pattern, opts);
+ if (re.ok()) {
+ *result = RE2::PartialMatch(re2::StringPiece(val.data, val.size),
re);
+ } else {
+ return Status::RuntimeError("Invalid pattern: {}",
pattern.debug_string());
+ }
+ }
return Status::OK();
}
@@ -233,13 +256,21 @@ Status
FunctionLikeBase::constant_regex_fn(LikeSearchState* state, const ColumnS
const StringValue& pattern,
ColumnUInt8::Container& result) {
auto sz = val.size();
- for (size_t i = 0; i < sz; i++) {
- const auto& str_ref = val.get_data_at(i);
- auto ret = hs_scan(state->hs_database.get(), str_ref.data,
str_ref.size, 0,
- state->hs_scratch.get(), state->hs_match_handler,
- (void*)(result.data() + i));
- if (ret != HS_SUCCESS && ret != HS_SCAN_TERMINATED) {
- return Status::RuntimeError(fmt::format("hyperscan error: {}",
ret));
+ if (state->hs_database) { // use hyperscan
+ for (size_t i = 0; i < sz; i++) {
+ const auto& str_ref = val.get_data_at(i);
+ auto ret = hs_scan(state->hs_database.get(), str_ref.data,
str_ref.size, 0,
+ state->hs_scratch.get(), state->hs_match_handler,
+ (void*)(result.data() + i));
+ if (ret != HS_SUCCESS && ret != HS_SCAN_TERMINATED) {
+ return Status::RuntimeError(fmt::format("hyperscan error: {}",
ret));
+ }
+ }
+ } else { // fallback to re2
+ for (size_t i = 0; i < sz; i++) {
+ const auto& str_ref = val.get_data_at(i);
+ *(result.data() + i) =
RE2::PartialMatch(re2::StringPiece(str_ref.data, str_ref.size),
+ *state->regex.get());
}
}
@@ -252,21 +283,36 @@ Status FunctionLikeBase::regexp_fn(LikeSearchState*
state, const ColumnString& v
hs_database_t* database = nullptr;
hs_scratch_t* scratch = nullptr;
- RETURN_IF_ERROR(hs_prepare(nullptr, re_pattern.c_str(), &database,
&scratch));
+ if (hs_prepare(nullptr, re_pattern.c_str(), &database, &scratch).ok()) {
// use hyperscan
+ auto sz = val.size();
+ for (size_t i = 0; i < sz; i++) {
+ const auto& str_ref = val.get_data_at(i);
+ auto ret = hs_scan(database, str_ref.data, str_ref.size, 0,
scratch,
+ state->hs_match_handler, (void*)(result.data() +
i));
+ if (ret != HS_SUCCESS && ret != HS_SCAN_TERMINATED) {
+ return Status::RuntimeError(fmt::format("hyperscan error: {}",
ret));
+ }
+ }
- auto sz = val.size();
- for (size_t i = 0; i < sz; i++) {
- const auto& str_ref = val.get_data_at(i);
- auto ret = hs_scan(database, str_ref.data, str_ref.size, 0, scratch,
- state->hs_match_handler, (void*)(result.data() +
i));
- if (ret != HS_SUCCESS && ret != HS_SCAN_TERMINATED) {
- return Status::RuntimeError(fmt::format("hyperscan error: {}",
ret));
+ hs_free_scratch(scratch);
+ hs_free_database(database);
+ } else { // fallback to re2
+ RE2::Options opts;
+ opts.set_never_nl(false);
+ opts.set_dot_nl(true);
+ re2::RE2 re(re_pattern, opts);
+ if (re.ok()) {
+ auto sz = val.size();
+ for (size_t i = 0; i < sz; i++) {
+ const auto& str_ref = val.get_data_at(i);
+ *(result.data() + i) =
+ RE2::PartialMatch(re2::StringPiece(str_ref.data,
str_ref.size), re);
+ }
+ } else {
+ return Status::RuntimeError("Invalid pattern: {}",
pattern.debug_string());
}
}
- hs_free_scratch(scratch);
- hs_free_database(database);
-
return Status::OK();
}
@@ -276,12 +322,20 @@ Status
FunctionLikeBase::constant_regex_fn_predicate(LikeSearchState* state,
ColumnUInt8::Container&
result, uint16_t* sel,
size_t sz) {
auto data_ptr = reinterpret_cast<const StringRef*>(val.get_data().data());
- for (size_t i = 0; i < sz; i++) {
- auto ret = hs_scan(state->hs_database.get(), data_ptr[sel[i]].data,
data_ptr[sel[i]].size,
- 0, state->hs_scratch.get(), state->hs_match_handler,
- (void*)(result.data() + i));
- if (ret != HS_SUCCESS && ret != HS_SCAN_TERMINATED) {
- return Status::RuntimeError(fmt::format("hyperscan error: {}",
ret));
+ if (state->hs_database) { // use hyperscan
+ for (size_t i = 0; i < sz; i++) {
+ auto ret = hs_scan(state->hs_database.get(),
data_ptr[sel[i]].data, data_ptr[sel[i]].size,
+ 0, state->hs_scratch.get(),
state->hs_match_handler,
+ (void*)(result.data() + i));
+ if (ret != HS_SUCCESS && ret != HS_SCAN_TERMINATED) {
+ return Status::RuntimeError(fmt::format("hyperscan error: {}",
ret));
+ }
+ }
+ } else { // fallback to re2
+ for (size_t i = 0; i < sz; i++) {
+ *(result.data() + i) = RE2::PartialMatch(
+ re2::StringPiece(data_ptr[sel[i]].data,
data_ptr[sel[i]].size),
+ *state->regex.get());
}
}
@@ -297,26 +351,41 @@ Status
FunctionLikeBase::regexp_fn_predicate(LikeSearchState* state,
hs_database_t* database = nullptr;
hs_scratch_t* scratch = nullptr;
- RETURN_IF_ERROR(hs_prepare(nullptr, re_pattern.c_str(), &database,
&scratch));
+ if (hs_prepare(nullptr, re_pattern.c_str(), &database, &scratch).ok()) {
// use hyperscan
+ auto data_ptr = reinterpret_cast<const
StringRef*>(val.get_data().data());
+ for (size_t i = 0; i < sz; i++) {
+ auto ret = hs_scan(database, data_ptr[sel[i]].data,
data_ptr[sel[i]].size, 0, scratch,
+ state->hs_match_handler, (void*)(result.data() +
i));
+ if (ret != HS_SUCCESS && ret != HS_SCAN_TERMINATED) {
+ return Status::RuntimeError(fmt::format("hyperscan error: {}",
ret));
+ }
+ }
- auto data_ptr = reinterpret_cast<const StringRef*>(val.get_data().data());
- for (size_t i = 0; i < sz; i++) {
- auto ret = hs_scan(database, data_ptr[sel[i]].data,
data_ptr[sel[i]].size, 0, scratch,
- state->hs_match_handler, (void*)(result.data() +
i));
- if (ret != HS_SUCCESS && ret != HS_SCAN_TERMINATED) {
- return Status::RuntimeError(fmt::format("hyperscan error: {}",
ret));
+ hs_free_scratch(scratch);
+ hs_free_database(database);
+ } else { // fallback to re2
+ RE2::Options opts;
+ opts.set_never_nl(false);
+ opts.set_dot_nl(true);
+ re2::RE2 re(re_pattern, opts);
+ if (re.ok()) {
+ auto data_ptr = reinterpret_cast<const
StringRef*>(val.get_data().data());
+ for (size_t i = 0; i < sz; i++) {
+ *(result.data() + i) = RE2::PartialMatch(
+ re2::StringPiece(data_ptr[sel[i]].data,
data_ptr[sel[i]].size), re);
+ }
+ } else {
+ return Status::RuntimeError("Invalid pattern: {}",
pattern.debug_string());
}
}
- hs_free_scratch(scratch);
- hs_free_database(database);
-
return Status::OK();
}
// hyperscan compile expression to database and allocate scratch space
Status FunctionLikeBase::hs_prepare(FunctionContext* context, const char*
expression,
hs_database_t** database, hs_scratch_t**
scratch) {
+ return Status::RuntimeError("hs_compile inject error:");
hs_compile_error_t* compile_err;
auto res = hs_compile(expression, HS_FLAG_DOTALL | HS_FLAG_ALLOWEMPTY,
HS_MODE_BLOCK, NULL,
database, &compile_err);
@@ -651,10 +720,24 @@ Status FunctionLike::prepare(FunctionContext* context,
FunctionContext::Function
hs_database_t* database = nullptr;
hs_scratch_t* scratch = nullptr;
- RETURN_IF_ERROR(hs_prepare(context, re_pattern.c_str(), &database,
&scratch));
-
- state->search_state.hs_database.reset(database);
- state->search_state.hs_scratch.reset(scratch);
+ if (hs_prepare(context, re_pattern.c_str(), &database,
&scratch).ok()) {
+ // use hyperscan
+ state->search_state.hs_database.reset(database);
+ state->search_state.hs_scratch.reset(scratch);
+ } else {
+ // fallback to re2
+ // reset hs_database to nullptr to indicate not use hyperscan
+ state->search_state.hs_database.reset();
+ state->search_state.hs_scratch.reset();
+
+ RE2::Options opts;
+ opts.set_never_nl(false);
+ opts.set_dot_nl(true);
+ state->search_state.regex = std::make_unique<RE2>(re_pattern,
opts);
+ if (!state->search_state.regex->ok()) {
+ return Status::InternalError("Invalid regex expression:
{}", pattern_str);
+ }
+ }
state->function = constant_regex_fn;
state->predicate_like_function = constant_regex_fn_predicate;
@@ -703,10 +786,23 @@ Status FunctionRegexp::prepare(FunctionContext* context,
} else {
hs_database_t* database = nullptr;
hs_scratch_t* scratch = nullptr;
- RETURN_IF_ERROR(hs_prepare(context, pattern_str.c_str(),
&database, &scratch));
-
- state->search_state.hs_database.reset(database);
- state->search_state.hs_scratch.reset(scratch);
+ if (hs_prepare(context, pattern_str.c_str(), &database,
&scratch).ok()) {
+ // use hyperscan
+ state->search_state.hs_database.reset(database);
+ state->search_state.hs_scratch.reset(scratch);
+ } else {
+ // fallback to re2
+ // reset hs_database to nullptr to indicate not use hyperscan
+ state->search_state.hs_database.reset();
+ state->search_state.hs_scratch.reset();
+ RE2::Options opts;
+ opts.set_never_nl(false);
+ opts.set_dot_nl(true);
+ state->search_state.regex = std::make_unique<RE2>(pattern_str,
opts);
+ if (!state->search_state.regex->ok()) {
+ return Status::InternalError("Invalid regex expression:
{}", pattern_str);
+ }
+ }
state->function = constant_regex_fn;
state->predicate_like_function = constant_regex_fn_predicate;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]