(doris) branch master updated: [feature](jsonb) json type support group by and distinct (#57679)

yiguolei Mon, 17 Nov 2025 18:58:21 -0800

This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git



The following commit(s) were added to refs/heads/master by this push:
     new 7e0157bf95f [feature](jsonb) json type support group by and distinct 
(#57679)
7e0157bf95f is described below

commit 7e0157bf95f653ad1e17d04d3a5cd081de430d1c
Author: Mryange <[email protected]>
AuthorDate: Tue Nov 18 10:58:08 2025 +0800

    [feature](jsonb) json type support group by and distinct (#57679)
    
    ### What problem does this PR solve?
    
    https://github.com/apache/doris-website/pull/3045
    
    ```sql
    mysql> SELECT * FROM test_jsonb_groupby;
    +------+---------------+
    | id   | j             |
    +------+---------------+
    |    1 | {"a":1,"b":2} |
    |    2 | {"a":1,"b":3} |
    |    3 | {"a":2,"b":2} |
    |    4 | {"a":2,"b":2} |
    |    5 | {"a":1,"b":2} |
    |    6 | {"a":2,"b":2} |
    +------+---------------+
    6 rows in set (0.07 sec)
    
    mysql> SELECT j, COUNT(*) FROM test_jsonb_groupby GROUP BY j;
    +---------------+----------+
    | j             | COUNT(*) |
    +---------------+----------+
    | {"a":1,"b":3} |        1 |
    | {"a":2,"b":2} |        3 |
    | {"a":1,"b":2} |        2 |
    +---------------+----------+
    
    mysql> SELECT DISTINCT j FROM test_jsonb_groupby;
    +---------------+
    | j             |
    +---------------+
    | {"a":1,"b":3} |
    | {"a":2,"b":2} |
    | {"a":1,"b":2} |
    +---------------+
    ```
    
    ### Release note
    
    None
    
    ### Check List (For Author)
    
    - Test <!-- At least one of them must be included. -->
        - [x] Regression test
        - [ ] Unit Test
        - [ ] Manual test (add detailed scripts or steps below)
        - [ ] No need to test or manual test. Explain why:
    - [ ] This is a refactor/code format and no logic has been changed.
            - [ ] Previous test can cover this change.
            - [ ] No code files have been changed.
            - [ ] Other reason <!-- Add your reason?  -->
    
    - Behavior changed:
        - [x] No.
        - [ ] Yes. <!-- Explain the behavior change -->
    
    - Does this need documentation?
        - [ ] No.
    - [x] Yes. <!-- Add document PR link here. eg:
    https://github.com/apache/doris-website/pull/1214 -->
    
    ### Check List (For Reviewer who merge this PR)
    
    - [ ] Confirm the release note
    - [ ] Confirm test cases
    - [ ] Confirm document
    - [ ] Add branch pick label <!-- Add branch pick label that this PR
    should merge into -->
---
 be/src/vec/common/hash_table/hash_key_type.h       |   3 +-
 .../json/json_group_by_and_distinct.out            |  25 +++++
 .../json/json_group_by_and_distinct.groovy         | 112 +++++++++++++++++++++
 3 files changed, 139 insertions(+), 1 deletion(-)

diff --git a/be/src/vec/common/hash_table/hash_key_type.h 
b/be/src/vec/common/hash_table/hash_key_type.h
index fe370059742..52d264371cb 100644
--- a/be/src/vec/common/hash_table/hash_key_type.h
+++ b/be/src/vec/common/hash_table/hash_key_type.h
@@ -103,7 +103,8 @@ inline HashKeyType get_hash_key_type(const 
std::vector<vectorized::DataTypePtr>&
     auto t = remove_nullable(data_types[0]);
     // serialized cannot be used in the case of single column, because the 
join operator will have some processing of column nullable, resulting in 
incorrect serialized results.
     if (!t->have_maximum_size_of_value()) {
-        if (is_string_type(t->get_primitive_type()) || t->get_primitive_type() 
== TYPE_ARRAY) {
+        if (is_string_type(t->get_primitive_type()) || t->get_primitive_type() 
== TYPE_ARRAY ||
+            t->get_primitive_type() == TYPE_JSONB) {
             return HashKeyType::string_key;
         }
         throw Exception(ErrorCode::INTERNAL_ERROR, "meet invalid type, 
type={}", t->get_name());
diff --git 
a/regression-test/data/datatype_p0/json/json_group_by_and_distinct.out 
b/regression-test/data/datatype_p0/json/json_group_by_and_distinct.out
new file mode 100644
index 00000000000..e5f6b0a29f1
--- /dev/null
+++ b/regression-test/data/datatype_p0/json/json_group_by_and_distinct.out
@@ -0,0 +1,25 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !order --
+{"a":2,"b":2}  3
+{"a":1,"b":2}  2
+{"a":1,"b":3}  1
+
+-- !order --
+{"a":1,"b":2}
+{"a":1,"b":3}
+{"a":2,"b":2}
+
+-- !order --
+{"a":1,"b":2}
+{"b":2,"a":1}
+
+-- !order --
+{"a":1,"b":2}  2
+
+-- !order --
+123
+123
+
+-- !order --
+123    2
+
diff --git 
a/regression-test/suites/datatype_p0/json/json_group_by_and_distinct.groovy 
b/regression-test/suites/datatype_p0/json/json_group_by_and_distinct.groovy
new file mode 100644
index 00000000000..8ba0196e6a7
--- /dev/null
+++ b/regression-test/suites/datatype_p0/json/json_group_by_and_distinct.groovy
@@ -0,0 +1,112 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_json_group_by_and_distinct", "p0") {
+    
+
+    sql """
+        drop table if exists test_jsonb_groupby;
+    """
+    sql """
+    CREATE TABLE IF NOT EXISTS test_jsonb_groupby (
+              `id` INT ,
+              `j` jsonb
+            ) ENGINE=OLAP
+            DUPLICATE KEY(`id`)
+            DISTRIBUTED BY HASH(`id`) BUCKETS 1
+            PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1",
+            "storage_format" = "V2"
+    );
+    """
+
+
+    sql """
+        insert into test_jsonb_groupby values (1, '{"a":1, "b":2}'), (2, 
'{"a":1, "b":3}'), (3, '{"a":2, "b":2}') , (4, '{"a":2, "b":2}') , (5, '{"a":1, 
"b":2}') , (6, '{"a":2, "b":2}') ;
+    """
+
+    qt_order"""
+        select j, count(*) as cnt from test_jsonb_groupby group by j order by 
cnt desc, cast(j as string);
+    """
+
+    qt_order"""
+        select distinct j from test_jsonb_groupby order by cast(j as string);
+    """
+
+
+    sql """
+        drop table if exists test_jsonb_obj;
+    """
+
+    sql """
+         CREATE TABLE IF NOT EXISTS test_jsonb_obj (
+              `id` INT ,
+              `j` jsonb
+            ) ENGINE=OLAP
+            DUPLICATE KEY(`id`)
+            DISTRIBUTED BY HASH(`id`) BUCKETS 1
+            PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1",
+            "storage_format" = "V2"
+        );
+    """
+
+    sql """
+        insert into test_jsonb_obj values (1,'{"a":1, "b":2}'), (2,'{"b":2, 
"a":1}');
+    """
+
+    qt_order"""
+        select j from test_jsonb_obj group by j order by cast(j as string);
+    """
+
+    qt_order"""
+        select SORT_JSON_OBJECT_KEYS(j), count(*) from test_jsonb_obj group by 
SORT_JSON_OBJECT_KEYS(j) order by cast(SORT_JSON_OBJECT_KEYS(j) as string);
+    """
+
+
+
+
+    sql """
+        drop table if exists test_jsonb_number;
+    """
+
+    sql """
+         CREATE TABLE IF NOT EXISTS test_jsonb_number (
+              `id` INT ,
+              `j` jsonb
+            ) ENGINE=OLAP
+            DUPLICATE KEY(`id`)
+            DISTRIBUTED BY HASH(`id`) BUCKETS 1
+            PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1",
+            "storage_format" = "V2"
+        );
+    """
+
+
+    sql """
+        insert into test_jsonb_number values (1,to_json( cast(123 as 
bigint))), (2,to_json(cast(123 as tinyint)));
+    """
+
+    qt_order"""
+        select j from test_jsonb_number group by j order by cast(j as string);
+    """
+
+    qt_order"""
+        select NORMALIZE_JSON_NUMBERS_TO_DOUBLE(j), count(*) from 
test_jsonb_number group by NORMALIZE_JSON_NUMBERS_TO_DOUBLE(j) order by 
cast(NORMALIZE_JSON_NUMBERS_TO_DOUBLE(j) as string);
+    """
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(doris) branch master updated: [feature](jsonb) json type support group by and distinct (#57679)

Reply via email to