This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new 9fa0889d1c5 [fix](json) support last as index in array path (#55180)
9fa0889d1c5 is described below

commit 9fa0889d1c518d079a03b3fd78ff7d791a0911cd
Author: Jerry Hu <[email protected]>
AuthorDate: Wed Aug 27 18:29:16 2025 +0800

    [fix](json) support last as index in array path (#55180)
    
    ### What problem does this PR solve?
    
    Issue Number: close #xxx
    
    Related PR: #xxx
    
    Problem Summary:
    
    ### Release note
    
    None
    
    ### Check List (For Author)
    
    - Test <!-- At least one of them must be included. -->
        - [ ] Regression test
        - [ ] Unit Test
        - [ ] Manual test (add detailed scripts or steps below)
        - [ ] No need to test or manual test. Explain why:
    - [ ] This is a refactor/code format and no logic has been changed.
            - [ ] Previous test can cover this change.
            - [ ] No code files have been changed.
            - [ ] Other reason <!-- Add your reason?  -->
    
    - Behavior changed:
        - [ ] No.
        - [ ] Yes. <!-- Explain the behavior change -->
    
    - Does this need documentation?
        - [ ] No.
    - [ ] Yes. <!-- Add document PR link here. eg:
    https://github.com/apache/doris-website/pull/1214 -->
    
    ### Check List (For Reviewer who merge this PR)
    
    - [ ] Confirm the release note
    - [ ] Confirm test cases
    - [ ] Confirm document
    - [ ] Add branch pick label <!-- Add branch pick label that this PR
    should merge into -->
---
 be/src/exprs/json_functions.h                      |  27 +++++++++-----
 be/src/vec/functions/function_json.cpp             |  39 +++++++++++++++++----
 .../json_functions/test_json_function.out          | Bin 1341 -> 1452 bytes
 .../json_functions/test_json_function.groovy       |   5 +++
 4 files changed, 57 insertions(+), 14 deletions(-)

diff --git a/be/src/exprs/json_functions.h b/be/src/exprs/json_functions.h
index 11970eb8c46..f93e1e145b2 100644
--- a/be/src/exprs/json_functions.h
+++ b/be/src/exprs/json_functions.h
@@ -48,15 +48,19 @@ enum JsonFunctionType {
 };
 
 struct JsonPath {
-    std::string key; // key of a json object
-    int idx;         // array index of a json array, -1 means not set, -2 
means *
-    bool is_valid;   // true if the path is successfully parsed
+    std::string key;               // key of a json object
+    int idx;                       // array index of a json array, -1 means 
not set, -2 means *
+    bool is_valid;                 // true if the path is successfully parsed
+    bool is_reverse_index = false; // true if the path is last index, like 
'$.a[LAST(1)]'
 
-    JsonPath(const std::string& key_, int idx_, bool is_valid_)
-            : key(key_), idx(idx_), is_valid(is_valid_) {}
+    JsonPath(std::string key_, int idx_, bool is_valid_, bool 
is_reverse_index_)
+            : key(std::move(key_)),
+              idx(idx_),
+              is_valid(is_valid_),
+              is_reverse_index(is_reverse_index_) {}
 
-    JsonPath(std::string&& key_, int idx_, bool is_valid_)
-            : key(std::move(key_)), idx(idx_), is_valid(is_valid_) {}
+    JsonPath(std::string key_, int idx_, bool is_valid_)
+            : JsonPath(std::move(key_), idx_, is_valid_, false) {}
 
     std::string to_string() const {
         std::stringstream ss;
@@ -68,6 +72,12 @@ struct JsonPath {
         }
         if (idx == -2) {
             ss << "[*]";
+        } else if (is_reverse_index) {
+            if (idx > 0) {
+                ss << "[last-" << idx << "]";
+            } else {
+                ss << "[last]";
+            }
         } else if (idx > -1) {
             ss << "[" << idx << "]";
         }
@@ -75,7 +85,8 @@ struct JsonPath {
     }
 
     std::string debug_string() const {
-        return fmt::format("key:{}, idx:{}, valid:{}", key, idx, is_valid);
+        return fmt::format("key:{}, idx:{}, valid:{}, is_reverse_index:{}", 
key, idx, is_valid,
+                           is_reverse_index);
     }
 };
 
diff --git a/be/src/vec/functions/function_json.cpp 
b/be/src/vec/functions/function_json.cpp
index 22a6871805a..8299ba14ee8 100644
--- a/be/src/vec/functions/function_json.cpp
+++ b/be/src/vec/functions/function_json.cpp
@@ -78,7 +78,8 @@ class FunctionContext;
 } // namespace doris
 
 namespace doris::vectorized {
-static const re2::RE2 JSON_PATTERN("^([^\\\"\\[\\]]*)(?:\\[([0-9]+|\\*)\\])?");
+static const re2::RE2 JSON_PATTERN(
+        R"(^([^\"\[\]]*)(?:\[([0-9]+|\*|last(?:\s*-\s*([0-9]+)\s*)?)\])?)");
 
 template <typename T, typename U>
 void char_split(std::vector<T>& res, const U& var, char p) {
@@ -112,18 +113,28 @@ void get_parsed_paths(const T& path_exprs, 
std::vector<JsonPath>* parsed_paths)
     for (int i = 1; i < path_exprs.size(); i++) {
         std::string col;
         std::string index;
-        if (UNLIKELY(!RE2::FullMatch(path_exprs[i], JSON_PATTERN, &col, 
&index))) {
+        std::string last_index_offset;
+        if (UNLIKELY(!RE2::FullMatch(path_exprs[i], JSON_PATTERN, &col, &index,
+                                     &last_index_offset))) {
             parsed_paths->emplace_back("", -1, false);
         } else {
             int idx = -1;
+            bool is_reverse_index = false;
             if (!index.empty()) {
                 if (index == "*") {
                     idx = -2;
+                } else if (index.starts_with("last")) {
+                    is_reverse_index = true;
+                    if (!last_index_offset.empty()) {
+                        idx = atoi(last_index_offset.c_str());
+                    } else {
+                        idx = 0;
+                    }
                 } else {
                     idx = atoi(index.c_str());
                 }
             }
-            parsed_paths->emplace_back(col, idx, true);
+            parsed_paths->emplace_back(col, idx, true, is_reverse_index);
         }
     }
 }
@@ -177,6 +188,8 @@ rapidjson::Value* match_value(const std::vector<JsonPath>& 
parsed_paths, rapidjs
                     root = array_obj;
                 } else if (index >= root->Size()) {
                     return nullptr;
+                } else if (parsed_paths[i].is_reverse_index) {
+                    root = &((*root)[root->Size() - index - 1]);
                 } else {
                     root = &((*root)[index]);
                 }
@@ -1259,21 +1272,31 @@ private:
         for (int i = 1; i < path_exprs.size(); i++) {
             std::string col;
             std::string index;
-            if (UNLIKELY(!RE2::FullMatch(path_exprs[i], JSON_PATTERN, &col, 
&index))) {
+            std::string last_index_offset;
+            if (UNLIKELY(!RE2::FullMatch(path_exprs[i], JSON_PATTERN, &col, 
&index,
+                                         &last_index_offset))) {
                 return Status::RuntimeError(
                         "Invalid JSON path expression. The error is around 
character position {}",
                         i + 1);
             } else {
                 int idx = -1;
+                bool is_reverse_index = false;
                 if (!index.empty()) {
                     if (index == "*") {
                         return Status::RuntimeError(
                                 "In this situation, path expressions may not 
contain the * token");
+                    } else if (index.starts_with("last")) {
+                        is_reverse_index = true;
+                        if (!last_index_offset.empty()) {
+                            idx = atoi(last_index_offset.c_str());
+                        } else {
+                            idx = 0;
+                        }
                     } else {
                         idx = atoi(index.c_str());
                     }
                 }
-                parsed_paths->emplace_back(col, idx, true);
+                parsed_paths->emplace_back(col, idx, true, is_reverse_index);
             }
         }
         return Status::OK();
@@ -1467,11 +1490,15 @@ public:
                 if (root->IsArray()) {
                     if (index >= root->Size()) {
                         // array append new value
-                        if (is_insert && i + 1 == parsed_paths.size()) {
+                        if (is_insert && i + 1 == parsed_paths.size() &&
+                            !parsed_paths[i].is_reverse_index) {
                             root->PushBack(*value, mem_allocator);
                         }
                         return;
                     } else {
+                        if (parsed_paths[i].is_reverse_index) {
+                            index = root->Size() - index - 1;
+                        }
                         root = &((*root)[index]);
                     }
                 } else {
diff --git 
a/regression-test/data/query_p0/sql_functions/json_functions/test_json_function.out
 
b/regression-test/data/query_p0/sql_functions/json_functions/test_json_function.out
index fddcd249f4c..3e35a8ab2c9 100644
Binary files 
a/regression-test/data/query_p0/sql_functions/json_functions/test_json_function.out
 and 
b/regression-test/data/query_p0/sql_functions/json_functions/test_json_function.out
 differ
diff --git 
a/regression-test/suites/query_p0/sql_functions/json_functions/test_json_function.groovy
 
b/regression-test/suites/query_p0/sql_functions/json_functions/test_json_function.groovy
index be4076020f0..e31d367139e 100644
--- 
a/regression-test/suites/query_p0/sql_functions/json_functions/test_json_function.groovy
+++ 
b/regression-test/suites/query_p0/sql_functions/json_functions/test_json_function.groovy
@@ -61,6 +61,11 @@ suite("test_json_function", "arrow_flight_sql") {
     qt_sql "SELECT 
json_unquote('\"\\\\u0044\\\\u004F\\\\u0052\\\\u0049\\\\u0053\"');"
 
     qt_sql "SELECT json_extract('[1, 2, 3]', '\$.[1]');"
+    qt_array_last1 "SELECT json_extract('[1, 2, 3]', '\$[last]');"
+    qt_array_last2 "SELECT json_extract('[1, 2, 3]', '\$[last -1]');"
+    qt_array_last3 "SELECT json_extract('[1, 2, 3]', '\$[last- 2]');"
+    qt_array_last4 "SELECT json_extract('[1, 2, 3]', '\$[last - 2 ]');"
+    qt_array_last5 "SELECT json_extract('[1, 2, 3]', '\$[last - 3]');"
     qt_sql "SELECT json_extract('{\"id\": 123, \"name\": \"doris\"}', '\$.id', 
'\$.name');"
     qt_sql "SELECT json_extract('{\"id\": 123, \"name\": \"doris\"}', null, 
'\$.id');"
     qt_sql "SELECT json_extract(null, '\$.id');"


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to