This is an automated email from the ASF dual-hosted git repository.

eldenmoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 1deab37312d [Fix](Row store) fix row store with invalid json string in 
variant type (#39394)
1deab37312d is described below

commit 1deab37312dd44c528213e18ba5c61b471e0aa8a
Author: lihangyu <15605149...@163.com>
AuthorDate: Thu Aug 15 18:51:26 2024 +0800

    [Fix](Row store) fix row store with invalid json string in variant type 
(#39394)
    
    Previous we allow invalid text as variant in PR #37794 and store as
    string type.But in encoding rowstore we CHECK the json is valid and
    store as jsonb binary field.In this PR we support the invalid json
    encoding as row store
---
 .../data_types/serde/data_type_object_serde.cpp    | 30 ++++++++++++++++------
 .../data/variant_p0/variant_with_rowstore.out      |  3 +++
 .../suites/variant_p0/variant_with_rowstore.groovy | 18 +++++++++++++
 3 files changed, 43 insertions(+), 8 deletions(-)

diff --git a/be/src/vec/data_types/serde/data_type_object_serde.cpp 
b/be/src/vec/data_types/serde/data_type_object_serde.cpp
index c19a5f18595..49efa8c829c 100644
--- a/be/src/vec/data_types/serde/data_type_object_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_object_serde.cpp
@@ -98,19 +98,33 @@ void DataTypeObjectSerDe::write_one_cell_to_jsonb(const 
IColumn& column, JsonbWr
     JsonbParser json_parser;
     // encode as jsonb
     bool succ = json_parser.parse(value_str.data(), value_str.size());
-    // maybe more graceful, it is ok to check here since data could be parsed
-    CHECK(succ);
-    result.writeStartBinary();
-    result.writeBinary(json_parser.getWriter().getOutput()->getBuffer(),
-                       json_parser.getWriter().getOutput()->getSize());
-    result.writeEndBinary();
+    if (!succ) {
+        // not a valid json insert raw text
+        result.writeStartString();
+        result.writeString(value_str.data(), value_str.size());
+        result.writeEndString();
+    } else {
+        // write a json binary
+        result.writeStartBinary();
+        result.writeBinary(json_parser.getWriter().getOutput()->getBuffer(),
+                           json_parser.getWriter().getOutput()->getSize());
+        result.writeEndBinary();
+    }
 }
 
 void DataTypeObjectSerDe::read_one_cell_from_jsonb(IColumn& column, const 
JsonbValue* arg) const {
     auto& variant = assert_cast<ColumnObject&>(column);
     Field field;
-    auto blob = static_cast<const JsonbBlobVal*>(arg);
-    field.assign_jsonb(blob->getBlob(), blob->getBlobLen());
+    if (arg->isBinary()) {
+        const auto* blob = static_cast<const JsonbBlobVal*>(arg);
+        field.assign_jsonb(blob->getBlob(), blob->getBlobLen());
+    } else if (arg->isString()) {
+        // not a valid jsonb type, insert as string
+        const auto* str = static_cast<const JsonbStringVal*>(arg);
+        field.assign_string(str->getBlob(), str->getBlobLen());
+    } else {
+        throw doris::Exception(ErrorCode::INTERNAL_ERROR, "Invalid jsonb 
type");
+    }
     variant.insert(field);
 }
 
diff --git a/regression-test/data/variant_p0/variant_with_rowstore.out 
b/regression-test/data/variant_p0/variant_with_rowstore.out
index 6c34622bec8..763825b37a6 100644
--- a/regression-test/data/variant_p0/variant_with_rowstore.out
+++ b/regression-test/data/variant_p0/variant_with_rowstore.out
@@ -32,3 +32,6 @@
 -- !point_select --
 -1     {"a":1123}      {"a":1123}
 
+-- !sql --
+1      1|[""]
+
diff --git a/regression-test/suites/variant_p0/variant_with_rowstore.groovy 
b/regression-test/suites/variant_p0/variant_with_rowstore.groovy
index 771f776b3e7..d1946b8123c 100644
--- a/regression-test/suites/variant_p0/variant_with_rowstore.groovy
+++ b/regression-test/suites/variant_p0/variant_with_rowstore.groovy
@@ -108,4 +108,22 @@ suite("regression_test_variant_rowstore", "variant_type"){
         // stmt.setInt(1, -3)
         // qe_point_select stmt
     }
+
+    sql "DROP TABLE IF EXISTS table_rs_invalid_json"
+    sql """
+        CREATE TABLE table_rs_invalid_json
+        (
+            col0 BIGINT  NOT NULL,
+            coljson VARIANT NOT NULL, INDEX colvariant_idx(coljson) USING 
INVERTED
+        )
+        UNIQUE KEY(col0)
+        DISTRIBUTED BY HASH(col0) BUCKETS 4
+        PROPERTIES (
+            "enable_unique_key_merge_on_write" = "true",
+            "store_row_column"="true",
+            "replication_num" = "1"
+        );
+    """
+    sql """insert into table_rs_invalid_json values (1, '1|[""]')"""
+    qt_sql "select * from table_rs_invalid_json where col0 = 1"
 }
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to