This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 7e0157bf95f [feature](jsonb) json type support group by and distinct
(#57679)
7e0157bf95f is described below
commit 7e0157bf95f653ad1e17d04d3a5cd081de430d1c
Author: Mryange <[email protected]>
AuthorDate: Tue Nov 18 10:58:08 2025 +0800
[feature](jsonb) json type support group by and distinct (#57679)
### What problem does this PR solve?
https://github.com/apache/doris-website/pull/3045
```sql
mysql> SELECT * FROM test_jsonb_groupby;
+------+---------------+
| id | j |
+------+---------------+
| 1 | {"a":1,"b":2} |
| 2 | {"a":1,"b":3} |
| 3 | {"a":2,"b":2} |
| 4 | {"a":2,"b":2} |
| 5 | {"a":1,"b":2} |
| 6 | {"a":2,"b":2} |
+------+---------------+
6 rows in set (0.07 sec)
mysql> SELECT j, COUNT(*) FROM test_jsonb_groupby GROUP BY j;
+---------------+----------+
| j | COUNT(*) |
+---------------+----------+
| {"a":1,"b":3} | 1 |
| {"a":2,"b":2} | 3 |
| {"a":1,"b":2} | 2 |
+---------------+----------+
mysql> SELECT DISTINCT j FROM test_jsonb_groupby;
+---------------+
| j |
+---------------+
| {"a":1,"b":3} |
| {"a":2,"b":2} |
| {"a":1,"b":2} |
+---------------+
```
### Release note
None
### Check List (For Author)
- Test <!-- At least one of them must be included. -->
- [x] Regression test
- [ ] Unit Test
- [ ] Manual test (add detailed scripts or steps below)
- [ ] No need to test or manual test. Explain why:
- [ ] This is a refactor/code format and no logic has been changed.
- [ ] Previous test can cover this change.
- [ ] No code files have been changed.
- [ ] Other reason <!-- Add your reason? -->
- Behavior changed:
- [x] No.
- [ ] Yes. <!-- Explain the behavior change -->
- Does this need documentation?
- [ ] No.
- [x] Yes. <!-- Add document PR link here. eg:
https://github.com/apache/doris-website/pull/1214 -->
### Check List (For Reviewer who merge this PR)
- [ ] Confirm the release note
- [ ] Confirm test cases
- [ ] Confirm document
- [ ] Add branch pick label <!-- Add branch pick label that this PR
should merge into -->
---
be/src/vec/common/hash_table/hash_key_type.h | 3 +-
.../json/json_group_by_and_distinct.out | 25 +++++
.../json/json_group_by_and_distinct.groovy | 112 +++++++++++++++++++++
3 files changed, 139 insertions(+), 1 deletion(-)
diff --git a/be/src/vec/common/hash_table/hash_key_type.h
b/be/src/vec/common/hash_table/hash_key_type.h
index fe370059742..52d264371cb 100644
--- a/be/src/vec/common/hash_table/hash_key_type.h
+++ b/be/src/vec/common/hash_table/hash_key_type.h
@@ -103,7 +103,8 @@ inline HashKeyType get_hash_key_type(const
std::vector<vectorized::DataTypePtr>&
auto t = remove_nullable(data_types[0]);
// serialized cannot be used in the case of single column, because the
join operator will have some processing of column nullable, resulting in
incorrect serialized results.
if (!t->have_maximum_size_of_value()) {
- if (is_string_type(t->get_primitive_type()) || t->get_primitive_type()
== TYPE_ARRAY) {
+ if (is_string_type(t->get_primitive_type()) || t->get_primitive_type()
== TYPE_ARRAY ||
+ t->get_primitive_type() == TYPE_JSONB) {
return HashKeyType::string_key;
}
throw Exception(ErrorCode::INTERNAL_ERROR, "meet invalid type,
type={}", t->get_name());
diff --git
a/regression-test/data/datatype_p0/json/json_group_by_and_distinct.out
b/regression-test/data/datatype_p0/json/json_group_by_and_distinct.out
new file mode 100644
index 00000000000..e5f6b0a29f1
--- /dev/null
+++ b/regression-test/data/datatype_p0/json/json_group_by_and_distinct.out
@@ -0,0 +1,25 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !order --
+{"a":2,"b":2} 3
+{"a":1,"b":2} 2
+{"a":1,"b":3} 1
+
+-- !order --
+{"a":1,"b":2}
+{"a":1,"b":3}
+{"a":2,"b":2}
+
+-- !order --
+{"a":1,"b":2}
+{"b":2,"a":1}
+
+-- !order --
+{"a":1,"b":2} 2
+
+-- !order --
+123
+123
+
+-- !order --
+123 2
+
diff --git
a/regression-test/suites/datatype_p0/json/json_group_by_and_distinct.groovy
b/regression-test/suites/datatype_p0/json/json_group_by_and_distinct.groovy
new file mode 100644
index 00000000000..8ba0196e6a7
--- /dev/null
+++ b/regression-test/suites/datatype_p0/json/json_group_by_and_distinct.groovy
@@ -0,0 +1,112 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_json_group_by_and_distinct", "p0") {
+
+
+ sql """
+ drop table if exists test_jsonb_groupby;
+ """
+ sql """
+ CREATE TABLE IF NOT EXISTS test_jsonb_groupby (
+ `id` INT ,
+ `j` jsonb
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`id`)
+ DISTRIBUTED BY HASH(`id`) BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "storage_format" = "V2"
+ );
+ """
+
+
+ sql """
+ insert into test_jsonb_groupby values (1, '{"a":1, "b":2}'), (2,
'{"a":1, "b":3}'), (3, '{"a":2, "b":2}') , (4, '{"a":2, "b":2}') , (5, '{"a":1,
"b":2}') , (6, '{"a":2, "b":2}') ;
+ """
+
+ qt_order"""
+ select j, count(*) as cnt from test_jsonb_groupby group by j order by
cnt desc, cast(j as string);
+ """
+
+ qt_order"""
+ select distinct j from test_jsonb_groupby order by cast(j as string);
+ """
+
+
+ sql """
+ drop table if exists test_jsonb_obj;
+ """
+
+ sql """
+ CREATE TABLE IF NOT EXISTS test_jsonb_obj (
+ `id` INT ,
+ `j` jsonb
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`id`)
+ DISTRIBUTED BY HASH(`id`) BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "storage_format" = "V2"
+ );
+ """
+
+ sql """
+ insert into test_jsonb_obj values (1,'{"a":1, "b":2}'), (2,'{"b":2,
"a":1}');
+ """
+
+ qt_order"""
+ select j from test_jsonb_obj group by j order by cast(j as string);
+ """
+
+ qt_order"""
+ select SORT_JSON_OBJECT_KEYS(j), count(*) from test_jsonb_obj group by
SORT_JSON_OBJECT_KEYS(j) order by cast(SORT_JSON_OBJECT_KEYS(j) as string);
+ """
+
+
+
+
+ sql """
+ drop table if exists test_jsonb_number;
+ """
+
+ sql """
+ CREATE TABLE IF NOT EXISTS test_jsonb_number (
+ `id` INT ,
+ `j` jsonb
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`id`)
+ DISTRIBUTED BY HASH(`id`) BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "storage_format" = "V2"
+ );
+ """
+
+
+ sql """
+ insert into test_jsonb_number values (1,to_json( cast(123 as
bigint))), (2,to_json(cast(123 as tinyint)));
+ """
+
+ qt_order"""
+ select j from test_jsonb_number group by j order by cast(j as string);
+ """
+
+ qt_order"""
+ select NORMALIZE_JSON_NUMBERS_TO_DOUBLE(j), count(*) from
test_jsonb_number group by NORMALIZE_JSON_NUMBERS_TO_DOUBLE(j) order by
cast(NORMALIZE_JSON_NUMBERS_TO_DOUBLE(j) as string);
+ """
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]