xiaokang commented on code in PR #19185:
URL: https://github.com/apache/doris/pull/19185#discussion_r1192152539


##########
be/src/util/jsonb_document.h:
##########
@@ -213,6 +226,109 @@ class JsonbDocument {
     char payload_[0];
 };
 
+/// A simple input stream class for the JSON path parser.
+class Stream {
+public:
+    /// Creates an input stream reading from a character string.
+    /// @param string  the input string
+    /// @param length  the length of the input string
+    Stream(const char* string, size_t length) : m_position(string), 
m_end(string + length) {}
+
+    /// Returns a pointer to the current position in the stream.
+    const char* position() const { return m_position; }
+
+    /// Returns a pointer to the position just after the end of the stream.
+    const char* end() const { return m_end; }
+
+    /// Returns the number of bytes remaining in the stream.
+    size_t remaining() const {
+        assert(m_position <= m_end);
+        return m_end - m_position;
+    }
+
+    /// Tells if the stream has been exhausted.
+    bool exhausted() const { return remaining() == 0; }
+
+    /// Reads the next byte from the stream and moves the position forward.
+    char read() {
+        assert(!exhausted());
+        return *m_position++;
+    }
+
+    /// Reads the next byte from the stream without moving the position 
forward.
+    char peek() const {
+        assert(!exhausted());
+        return *m_position;
+    }
+
+    /// Moves the position to the next non-whitespace character.
+    void skip_whitespace() {
+        m_position = std::find_if_not(m_position, m_end, [](char c) { return 
std::isspace(c); });
+    }
+
+    /// Moves the position n bytes forward.
+    void skip(size_t n) {
+        assert(remaining() >= n);
+        m_position += n;
+        skip_whitespace();
+    }
+
+    void clear_legPtr() { legPtr = nullptr; }
+
+    void set_legPtr(char* ptr) {
+        clear_legPtr();
+        legPtr = ptr;
+    }
+
+    char* get_legPtr() { return legPtr; }
+
+    void clear_legLen() { legLen = 0; }
+
+    void add_legLen() { legLen++; }
+
+    unsigned int get_legLen() { return legLen; }
+
+    void remove_escapes() {
+        int new_len = 0;
+        for (int i = 0; i < legLen; i++) {
+            if (legPtr[i] != '\\') {
+                legPtr[new_len++] = legPtr[i];
+            }
+        }
+        legPtr[new_len] = '\0';
+        legLen = new_len;
+    }
+
+    void set_hasEscapes(bool has) { hasEscapes = has; }
+
+    bool get_hasEscapes() { return hasEscapes; }
+
+private:
+    /// The current position in the stream.
+    const char* m_position;
+
+    /// The end of the stream.
+    const char* const m_end;
+
+    ///path leg ptr
+    char* legPtr;

Review Comment:
   leg_ptr



##########
be/src/util/jsonb_document.h:
##########
@@ -1072,103 +1186,165 @@ inline const char* JsonbValue::getValuePtr() const {
 }
 
 inline JsonbValue* JsonbValue::findPath(const char* key_path, unsigned int 
kp_len,
-                                        const char* delim = ".", hDictFind 
handler = nullptr) {
+                                        hDictFind handler = nullptr) {
     if (!key_path) return nullptr;
     if (kp_len == 0) return this;
+    Stream stream(key_path, kp_len);
+    stream.skip_whitespace();
+    if (stream.exhausted() || stream.read() != SCOPE) return nullptr;
 
-    // skip $ and . at beginning
-    if (kp_len > 0 && *key_path == '$') {
-        key_path++;
-        kp_len--;
-        if (kp_len > 0 && *key_path == '.') {
-            key_path++;
-            kp_len--;
-        }
-    }
+    JsonbValue* pval = this;
 
-    if (kp_len == 0) return this;
+    while (pval && !stream.exhausted()) {
+        stream.skip_whitespace();
+        stream.clear_legPtr();
+        stream.clear_legLen();
 
-    if (!delim) delim = "."; // default delimiter
+        if (!JsonbPath::parsePath(&stream, pval)) {
+            return nullptr;
+        }
 
-    JsonbValue* pval = this;
-    const char* fence = key_path + kp_len;
-    char idx_buf[21]; // buffer to parse array index (integer value)
-
-    while (pval && key_path < fence) {
-        const char* key = key_path;
-        unsigned int klen = 0;
-        const char* left_bracket = nullptr;
-        const char* right_bracket = nullptr;
-        size_t idx_len = 0;
-        // find the current key and [] bracket position
-        for (; key_path != fence && *key_path != *delim; ++key_path, ++klen) {
-            if ('[' == *key_path) {
-                left_bracket = key_path;
-            } else if (']' == *key_path) {
-                right_bracket = key_path;
-            }
+        if (stream.get_legLen() == 0) {
+            return nullptr;
         }
 
-        // check brackets and array index length
-        if (left_bracket || right_bracket) {
-            if (!left_bracket || !right_bracket) {
-                return nullptr;
+        if (LIKELY(pval->type_ == JsonbType::T_Object)) {
+            if (stream.get_legLen() == 1 && *stream.get_legPtr() == WILDCARD) {
+                return pval;
+            } else if (stream.get_hasEscapes()) {
+                stream.remove_escapes();
             }
-            // check the last char is ]
-            if (key + klen - 1 != right_bracket) {
-                return nullptr;
+
+            pval = ((ObjectVal*)pval)->find(stream.get_legPtr(), 
stream.get_legLen(), handler);
+
+            if (!pval) return nullptr;
+        } else if (LIKELY(pval->type_ == JsonbType::T_Array)) {
+            int index = 0;
+            std::string idx_string(stream.get_legPtr(), stream.get_legLen());

Review Comment:
   try to use string_view to avoid memory copy



##########
be/src/util/jsonb_document.h:
##########
@@ -213,6 +226,109 @@ class JsonbDocument {
     char payload_[0];
 };
 
+/// A simple input stream class for the JSON path parser.
+class Stream {
+public:
+    /// Creates an input stream reading from a character string.
+    /// @param string  the input string
+    /// @param length  the length of the input string
+    Stream(const char* string, size_t length) : m_position(string), 
m_end(string + length) {}
+
+    /// Returns a pointer to the current position in the stream.
+    const char* position() const { return m_position; }
+
+    /// Returns a pointer to the position just after the end of the stream.
+    const char* end() const { return m_end; }
+
+    /// Returns the number of bytes remaining in the stream.
+    size_t remaining() const {
+        assert(m_position <= m_end);
+        return m_end - m_position;
+    }
+
+    /// Tells if the stream has been exhausted.
+    bool exhausted() const { return remaining() == 0; }
+
+    /// Reads the next byte from the stream and moves the position forward.
+    char read() {
+        assert(!exhausted());
+        return *m_position++;
+    }
+
+    /// Reads the next byte from the stream without moving the position 
forward.
+    char peek() const {
+        assert(!exhausted());
+        return *m_position;
+    }
+
+    /// Moves the position to the next non-whitespace character.
+    void skip_whitespace() {
+        m_position = std::find_if_not(m_position, m_end, [](char c) { return 
std::isspace(c); });
+    }
+
+    /// Moves the position n bytes forward.
+    void skip(size_t n) {
+        assert(remaining() >= n);
+        m_position += n;
+        skip_whitespace();
+    }
+
+    void clear_legPtr() { legPtr = nullptr; }
+
+    void set_legPtr(char* ptr) {
+        clear_legPtr();
+        legPtr = ptr;
+    }
+
+    char* get_legPtr() { return legPtr; }
+
+    void clear_legLen() { legLen = 0; }
+
+    void add_legLen() { legLen++; }
+
+    unsigned int get_legLen() { return legLen; }
+
+    void remove_escapes() {
+        int new_len = 0;
+        for (int i = 0; i < legLen; i++) {
+            if (legPtr[i] != '\\') {
+                legPtr[new_len++] = legPtr[i];
+            }
+        }
+        legPtr[new_len] = '\0';
+        legLen = new_len;
+    }
+
+    void set_hasEscapes(bool has) { hasEscapes = has; }
+
+    bool get_hasEscapes() { return hasEscapes; }
+
+private:
+    /// The current position in the stream.
+    const char* m_position;
+
+    /// The end of the stream.
+    const char* const m_end;
+
+    ///path leg ptr
+    char* legPtr;
+
+    ///path leg len
+    unsigned int legLen;

Review Comment:
   leg_len



##########
be/src/util/jsonb_document.h:
##########
@@ -213,6 +226,109 @@ class JsonbDocument {
     char payload_[0];
 };
 
+/// A simple input stream class for the JSON path parser.
+class Stream {
+public:
+    /// Creates an input stream reading from a character string.
+    /// @param string  the input string
+    /// @param length  the length of the input string
+    Stream(const char* string, size_t length) : m_position(string), 
m_end(string + length) {}
+
+    /// Returns a pointer to the current position in the stream.
+    const char* position() const { return m_position; }
+
+    /// Returns a pointer to the position just after the end of the stream.
+    const char* end() const { return m_end; }
+
+    /// Returns the number of bytes remaining in the stream.
+    size_t remaining() const {
+        assert(m_position <= m_end);
+        return m_end - m_position;
+    }
+
+    /// Tells if the stream has been exhausted.
+    bool exhausted() const { return remaining() == 0; }
+
+    /// Reads the next byte from the stream and moves the position forward.
+    char read() {
+        assert(!exhausted());
+        return *m_position++;
+    }
+
+    /// Reads the next byte from the stream without moving the position 
forward.
+    char peek() const {
+        assert(!exhausted());
+        return *m_position;
+    }
+
+    /// Moves the position to the next non-whitespace character.
+    void skip_whitespace() {
+        m_position = std::find_if_not(m_position, m_end, [](char c) { return 
std::isspace(c); });
+    }
+
+    /// Moves the position n bytes forward.
+    void skip(size_t n) {
+        assert(remaining() >= n);
+        m_position += n;
+        skip_whitespace();
+    }
+
+    void clear_legPtr() { legPtr = nullptr; }
+
+    void set_legPtr(char* ptr) {
+        clear_legPtr();
+        legPtr = ptr;
+    }
+
+    char* get_legPtr() { return legPtr; }
+
+    void clear_legLen() { legLen = 0; }
+
+    void add_legLen() { legLen++; }
+
+    unsigned int get_legLen() { return legLen; }
+
+    void remove_escapes() {
+        int new_len = 0;
+        for (int i = 0; i < legLen; i++) {
+            if (legPtr[i] != '\\') {
+                legPtr[new_len++] = legPtr[i];
+            }
+        }
+        legPtr[new_len] = '\0';
+        legLen = new_len;
+    }
+
+    void set_hasEscapes(bool has) { hasEscapes = has; }
+
+    bool get_hasEscapes() { return hasEscapes; }
+
+private:
+    /// The current position in the stream.
+    const char* m_position;
+
+    /// The end of the stream.
+    const char* const m_end;
+
+    ///path leg ptr
+    char* legPtr;
+
+    ///path leg len
+    unsigned int legLen;
+
+    ///
+    bool hasEscapes = false;

Review Comment:
   has_escapes



##########
be/src/util/jsonb_document.h:
##########
@@ -1072,103 +1186,165 @@ inline const char* JsonbValue::getValuePtr() const {
 }
 
 inline JsonbValue* JsonbValue::findPath(const char* key_path, unsigned int 
kp_len,
-                                        const char* delim = ".", hDictFind 
handler = nullptr) {
+                                        hDictFind handler = nullptr) {
     if (!key_path) return nullptr;
     if (kp_len == 0) return this;
+    Stream stream(key_path, kp_len);
+    stream.skip_whitespace();
+    if (stream.exhausted() || stream.read() != SCOPE) return nullptr;
 
-    // skip $ and . at beginning
-    if (kp_len > 0 && *key_path == '$') {
-        key_path++;
-        kp_len--;
-        if (kp_len > 0 && *key_path == '.') {
-            key_path++;
-            kp_len--;
-        }
-    }
+    JsonbValue* pval = this;
 
-    if (kp_len == 0) return this;
+    while (pval && !stream.exhausted()) {
+        stream.skip_whitespace();
+        stream.clear_legPtr();
+        stream.clear_legLen();
 
-    if (!delim) delim = "."; // default delimiter
+        if (!JsonbPath::parsePath(&stream, pval)) {
+            return nullptr;
+        }
 
-    JsonbValue* pval = this;
-    const char* fence = key_path + kp_len;
-    char idx_buf[21]; // buffer to parse array index (integer value)
-
-    while (pval && key_path < fence) {
-        const char* key = key_path;
-        unsigned int klen = 0;
-        const char* left_bracket = nullptr;
-        const char* right_bracket = nullptr;
-        size_t idx_len = 0;
-        // find the current key and [] bracket position
-        for (; key_path != fence && *key_path != *delim; ++key_path, ++klen) {
-            if ('[' == *key_path) {
-                left_bracket = key_path;
-            } else if (']' == *key_path) {
-                right_bracket = key_path;
-            }
+        if (stream.get_legLen() == 0) {
+            return nullptr;
         }
 
-        // check brackets and array index length
-        if (left_bracket || right_bracket) {
-            if (!left_bracket || !right_bracket) {
-                return nullptr;
+        if (LIKELY(pval->type_ == JsonbType::T_Object)) {
+            if (stream.get_legLen() == 1 && *stream.get_legPtr() == WILDCARD) {
+                return pval;
+            } else if (stream.get_hasEscapes()) {
+                stream.remove_escapes();
             }
-            // check the last char is ]
-            if (key + klen - 1 != right_bracket) {
-                return nullptr;
+
+            pval = ((ObjectVal*)pval)->find(stream.get_legPtr(), 
stream.get_legLen(), handler);
+
+            if (!pval) return nullptr;
+        } else if (LIKELY(pval->type_ == JsonbType::T_Array)) {
+            int index = 0;
+            std::string idx_string(stream.get_legPtr(), stream.get_legLen());
+
+            if (stream.get_legLen() == 1 && *stream.get_legPtr() == WILDCARD) {
+                return pval;
+            } else if (std::string(stream.get_legPtr(), 4) == LAST) {
+                auto pos = idx_string.find(MINUS);

Review Comment:
   Is it possible since == LAST



##########
be/src/util/jsonb_document.h:
##########
@@ -1072,103 +1186,165 @@ inline const char* JsonbValue::getValuePtr() const {
 }
 
 inline JsonbValue* JsonbValue::findPath(const char* key_path, unsigned int 
kp_len,
-                                        const char* delim = ".", hDictFind 
handler = nullptr) {
+                                        hDictFind handler = nullptr) {
     if (!key_path) return nullptr;
     if (kp_len == 0) return this;
+    Stream stream(key_path, kp_len);
+    stream.skip_whitespace();
+    if (stream.exhausted() || stream.read() != SCOPE) return nullptr;
 
-    // skip $ and . at beginning
-    if (kp_len > 0 && *key_path == '$') {
-        key_path++;
-        kp_len--;
-        if (kp_len > 0 && *key_path == '.') {
-            key_path++;
-            kp_len--;
-        }
-    }
+    JsonbValue* pval = this;
 
-    if (kp_len == 0) return this;
+    while (pval && !stream.exhausted()) {
+        stream.skip_whitespace();
+        stream.clear_legPtr();
+        stream.clear_legLen();
 
-    if (!delim) delim = "."; // default delimiter
+        if (!JsonbPath::parsePath(&stream, pval)) {
+            return nullptr;
+        }
 
-    JsonbValue* pval = this;
-    const char* fence = key_path + kp_len;
-    char idx_buf[21]; // buffer to parse array index (integer value)
-
-    while (pval && key_path < fence) {
-        const char* key = key_path;
-        unsigned int klen = 0;
-        const char* left_bracket = nullptr;
-        const char* right_bracket = nullptr;
-        size_t idx_len = 0;
-        // find the current key and [] bracket position
-        for (; key_path != fence && *key_path != *delim; ++key_path, ++klen) {
-            if ('[' == *key_path) {
-                left_bracket = key_path;
-            } else if (']' == *key_path) {
-                right_bracket = key_path;
-            }
+        if (stream.get_legLen() == 0) {
+            return nullptr;
         }
 
-        // check brackets and array index length
-        if (left_bracket || right_bracket) {
-            if (!left_bracket || !right_bracket) {
-                return nullptr;
+        if (LIKELY(pval->type_ == JsonbType::T_Object)) {
+            if (stream.get_legLen() == 1 && *stream.get_legPtr() == WILDCARD) {
+                return pval;
+            } else if (stream.get_hasEscapes()) {
+                stream.remove_escapes();
             }
-            // check the last char is ]
-            if (key + klen - 1 != right_bracket) {
-                return nullptr;
+
+            pval = ((ObjectVal*)pval)->find(stream.get_legPtr(), 
stream.get_legLen(), handler);
+
+            if (!pval) return nullptr;
+        } else if (LIKELY(pval->type_ == JsonbType::T_Array)) {
+            int index = 0;
+            std::string idx_string(stream.get_legPtr(), stream.get_legLen());
+
+            if (stream.get_legLen() == 1 && *stream.get_legPtr() == WILDCARD) {
+                return pval;
+            } else if (std::string(stream.get_legPtr(), 4) == LAST) {
+                auto pos = idx_string.find(MINUS);
+
+                if (pos != std::string::npos) {
+                    idx_string = idx_string.substr(pos + 1);
+                    size_t num = ((ArrayVal*)pval)->numElem();
+                    if (std::stoi(idx_string) > num) {
+                        return nullptr; //invalid json path
+                    }
+                    index = num - 1 - std::stoi(idx_string);
+                } else if (stream.get_legLen() == 4) {
+                    index = ((ArrayVal*)pval)->numElem() - 1;
+                } else {
+                    return nullptr; //invalid json path
+                }
+            } else {
+                std::string::size_type pos;
+                index = std::stoi(idx_string, &pos, 10);

Review Comment:
   handle negative index here.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to