This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new 9fa0889d1c5 [fix](json) support last as index in array path (#55180)
9fa0889d1c5 is described below
commit 9fa0889d1c518d079a03b3fd78ff7d791a0911cd
Author: Jerry Hu <[email protected]>
AuthorDate: Wed Aug 27 18:29:16 2025 +0800
[fix](json) support last as index in array path (#55180)
### What problem does this PR solve?
Issue Number: close #xxx
Related PR: #xxx
Problem Summary:
### Release note
None
### Check List (For Author)
- Test <!-- At least one of them must be included. -->
- [ ] Regression test
- [ ] Unit Test
- [ ] Manual test (add detailed scripts or steps below)
- [ ] No need to test or manual test. Explain why:
- [ ] This is a refactor/code format and no logic has been changed.
- [ ] Previous test can cover this change.
- [ ] No code files have been changed.
- [ ] Other reason <!-- Add your reason? -->
- Behavior changed:
- [ ] No.
- [ ] Yes. <!-- Explain the behavior change -->
- Does this need documentation?
- [ ] No.
- [ ] Yes. <!-- Add document PR link here. eg:
https://github.com/apache/doris-website/pull/1214 -->
### Check List (For Reviewer who merge this PR)
- [ ] Confirm the release note
- [ ] Confirm test cases
- [ ] Confirm document
- [ ] Add branch pick label <!-- Add branch pick label that this PR
should merge into -->
---
be/src/exprs/json_functions.h | 27 +++++++++-----
be/src/vec/functions/function_json.cpp | 39 +++++++++++++++++----
.../json_functions/test_json_function.out | Bin 1341 -> 1452 bytes
.../json_functions/test_json_function.groovy | 5 +++
4 files changed, 57 insertions(+), 14 deletions(-)
diff --git a/be/src/exprs/json_functions.h b/be/src/exprs/json_functions.h
index 11970eb8c46..f93e1e145b2 100644
--- a/be/src/exprs/json_functions.h
+++ b/be/src/exprs/json_functions.h
@@ -48,15 +48,19 @@ enum JsonFunctionType {
};
struct JsonPath {
- std::string key; // key of a json object
- int idx; // array index of a json array, -1 means not set, -2
means *
- bool is_valid; // true if the path is successfully parsed
+ std::string key; // key of a json object
+ int idx; // array index of a json array, -1 means
not set, -2 means *
+ bool is_valid; // true if the path is successfully parsed
+ bool is_reverse_index = false; // true if the path is last index, like
'$.a[LAST(1)]'
- JsonPath(const std::string& key_, int idx_, bool is_valid_)
- : key(key_), idx(idx_), is_valid(is_valid_) {}
+ JsonPath(std::string key_, int idx_, bool is_valid_, bool
is_reverse_index_)
+ : key(std::move(key_)),
+ idx(idx_),
+ is_valid(is_valid_),
+ is_reverse_index(is_reverse_index_) {}
- JsonPath(std::string&& key_, int idx_, bool is_valid_)
- : key(std::move(key_)), idx(idx_), is_valid(is_valid_) {}
+ JsonPath(std::string key_, int idx_, bool is_valid_)
+ : JsonPath(std::move(key_), idx_, is_valid_, false) {}
std::string to_string() const {
std::stringstream ss;
@@ -68,6 +72,12 @@ struct JsonPath {
}
if (idx == -2) {
ss << "[*]";
+ } else if (is_reverse_index) {
+ if (idx > 0) {
+ ss << "[last-" << idx << "]";
+ } else {
+ ss << "[last]";
+ }
} else if (idx > -1) {
ss << "[" << idx << "]";
}
@@ -75,7 +85,8 @@ struct JsonPath {
}
std::string debug_string() const {
- return fmt::format("key:{}, idx:{}, valid:{}", key, idx, is_valid);
+ return fmt::format("key:{}, idx:{}, valid:{}, is_reverse_index:{}",
key, idx, is_valid,
+ is_reverse_index);
}
};
diff --git a/be/src/vec/functions/function_json.cpp
b/be/src/vec/functions/function_json.cpp
index 22a6871805a..8299ba14ee8 100644
--- a/be/src/vec/functions/function_json.cpp
+++ b/be/src/vec/functions/function_json.cpp
@@ -78,7 +78,8 @@ class FunctionContext;
} // namespace doris
namespace doris::vectorized {
-static const re2::RE2 JSON_PATTERN("^([^\\\"\\[\\]]*)(?:\\[([0-9]+|\\*)\\])?");
+static const re2::RE2 JSON_PATTERN(
+ R"(^([^\"\[\]]*)(?:\[([0-9]+|\*|last(?:\s*-\s*([0-9]+)\s*)?)\])?)");
template <typename T, typename U>
void char_split(std::vector<T>& res, const U& var, char p) {
@@ -112,18 +113,28 @@ void get_parsed_paths(const T& path_exprs,
std::vector<JsonPath>* parsed_paths)
for (int i = 1; i < path_exprs.size(); i++) {
std::string col;
std::string index;
- if (UNLIKELY(!RE2::FullMatch(path_exprs[i], JSON_PATTERN, &col,
&index))) {
+ std::string last_index_offset;
+ if (UNLIKELY(!RE2::FullMatch(path_exprs[i], JSON_PATTERN, &col, &index,
+ &last_index_offset))) {
parsed_paths->emplace_back("", -1, false);
} else {
int idx = -1;
+ bool is_reverse_index = false;
if (!index.empty()) {
if (index == "*") {
idx = -2;
+ } else if (index.starts_with("last")) {
+ is_reverse_index = true;
+ if (!last_index_offset.empty()) {
+ idx = atoi(last_index_offset.c_str());
+ } else {
+ idx = 0;
+ }
} else {
idx = atoi(index.c_str());
}
}
- parsed_paths->emplace_back(col, idx, true);
+ parsed_paths->emplace_back(col, idx, true, is_reverse_index);
}
}
}
@@ -177,6 +188,8 @@ rapidjson::Value* match_value(const std::vector<JsonPath>&
parsed_paths, rapidjs
root = array_obj;
} else if (index >= root->Size()) {
return nullptr;
+ } else if (parsed_paths[i].is_reverse_index) {
+ root = &((*root)[root->Size() - index - 1]);
} else {
root = &((*root)[index]);
}
@@ -1259,21 +1272,31 @@ private:
for (int i = 1; i < path_exprs.size(); i++) {
std::string col;
std::string index;
- if (UNLIKELY(!RE2::FullMatch(path_exprs[i], JSON_PATTERN, &col,
&index))) {
+ std::string last_index_offset;
+ if (UNLIKELY(!RE2::FullMatch(path_exprs[i], JSON_PATTERN, &col,
&index,
+ &last_index_offset))) {
return Status::RuntimeError(
"Invalid JSON path expression. The error is around
character position {}",
i + 1);
} else {
int idx = -1;
+ bool is_reverse_index = false;
if (!index.empty()) {
if (index == "*") {
return Status::RuntimeError(
"In this situation, path expressions may not
contain the * token");
+ } else if (index.starts_with("last")) {
+ is_reverse_index = true;
+ if (!last_index_offset.empty()) {
+ idx = atoi(last_index_offset.c_str());
+ } else {
+ idx = 0;
+ }
} else {
idx = atoi(index.c_str());
}
}
- parsed_paths->emplace_back(col, idx, true);
+ parsed_paths->emplace_back(col, idx, true, is_reverse_index);
}
}
return Status::OK();
@@ -1467,11 +1490,15 @@ public:
if (root->IsArray()) {
if (index >= root->Size()) {
// array append new value
- if (is_insert && i + 1 == parsed_paths.size()) {
+ if (is_insert && i + 1 == parsed_paths.size() &&
+ !parsed_paths[i].is_reverse_index) {
root->PushBack(*value, mem_allocator);
}
return;
} else {
+ if (parsed_paths[i].is_reverse_index) {
+ index = root->Size() - index - 1;
+ }
root = &((*root)[index]);
}
} else {
diff --git
a/regression-test/data/query_p0/sql_functions/json_functions/test_json_function.out
b/regression-test/data/query_p0/sql_functions/json_functions/test_json_function.out
index fddcd249f4c..3e35a8ab2c9 100644
Binary files
a/regression-test/data/query_p0/sql_functions/json_functions/test_json_function.out
and
b/regression-test/data/query_p0/sql_functions/json_functions/test_json_function.out
differ
diff --git
a/regression-test/suites/query_p0/sql_functions/json_functions/test_json_function.groovy
b/regression-test/suites/query_p0/sql_functions/json_functions/test_json_function.groovy
index be4076020f0..e31d367139e 100644
---
a/regression-test/suites/query_p0/sql_functions/json_functions/test_json_function.groovy
+++
b/regression-test/suites/query_p0/sql_functions/json_functions/test_json_function.groovy
@@ -61,6 +61,11 @@ suite("test_json_function", "arrow_flight_sql") {
qt_sql "SELECT
json_unquote('\"\\\\u0044\\\\u004F\\\\u0052\\\\u0049\\\\u0053\"');"
qt_sql "SELECT json_extract('[1, 2, 3]', '\$.[1]');"
+ qt_array_last1 "SELECT json_extract('[1, 2, 3]', '\$[last]');"
+ qt_array_last2 "SELECT json_extract('[1, 2, 3]', '\$[last -1]');"
+ qt_array_last3 "SELECT json_extract('[1, 2, 3]', '\$[last- 2]');"
+ qt_array_last4 "SELECT json_extract('[1, 2, 3]', '\$[last - 2 ]');"
+ qt_array_last5 "SELECT json_extract('[1, 2, 3]', '\$[last - 3]');"
qt_sql "SELECT json_extract('{\"id\": 123, \"name\": \"doris\"}', '\$.id',
'\$.name');"
qt_sql "SELECT json_extract('{\"id\": 123, \"name\": \"doris\"}', null,
'\$.id');"
qt_sql "SELECT json_extract(null, '\$.id');"
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]