This is an automated email from the ASF dual-hosted git repository. gavinchou pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 051128f172a [test](doc) add table design example in doris's doc to regression test (#42444) 051128f172a is described below commit 051128f172a0e6c26109160752293d95746e29f8 Author: yagagagaga <zhangminkefromflyd...@gmail.com> AuthorDate: Wed Oct 30 19:32:23 2024 +0800 [test](doc) add table design example in doris's doc to regression test (#42444) --- .../org/apache/doris/regression/suite/Suite.groovy | 25 ++ .../doc/table-design/auto-increment.md.groovy | 296 ++++++++++++++++++++ .../doc/table-design/best-practice.md.groovy | 193 +++++++++++++ .../table-design/data-model/aggregate.md.groovy | 89 ++++++ .../table-design/data-model/duplicate.md.groovy | 77 +++++ .../doc/table-design/data-model/unique.md.groovy | 63 +++++ .../doc/table-design/data-partition.md.groovy | 311 +++++++++++++++++++++ .../doc/table-design/index/bloomfilter.md.groovy | 43 +++ .../table-design/index/inverted-index.md.groovy | 122 ++++++++ .../index/ngram-bloomfilter-index.md.groovy | 80 ++++++ .../doc/table-design/index/prefix-index.md.groovy | 50 ++++ .../suites/doc/table-design/row-store.md.groovy | 48 ++++ .../doc/table-design/schema-change.md.groovy | 211 ++++++++++++++ .../suites/doc/table-design/test_data/test.csv | 2 + 14 files changed, 1610 insertions(+) diff --git a/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy b/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy index 997c6c7c66f..6dda050fbf3 100644 --- a/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy +++ b/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy @@ -971,6 +971,31 @@ class Suite implements GroovyInterceptable { Assert.assertEquals(0, code) } + String cmd(String cmd, int timeoutSecond = 0) { + var processBuilder = new ProcessBuilder() + processBuilder.command("/bin/bash", "-c", cmd) + var process = processBuilder.start() + def outBuf = new StringBuilder() + def errBuf = new StringBuilder() + process.consumeProcessOutput(outBuf, errBuf) + var reader = new BufferedReader(new InputStreamReader(process.getInputStream())); + String line + while ((line = reader.readLine()) != null) { + System.out.println(line) + } + // wait until cmd finish + if (timeoutSecond > 0) { + process.waitForOrKill(timeoutSecond * 1000) + } else { + process.waitFor() + } + if (process.exitValue() != 0) { + println outBuf + throw new RuntimeException(errBuf.toString()) + } + return outBuf.toString() + } + void sshExec(String username, String host, String cmd, boolean alert=true) { String command = "ssh ${username}@${host} '${cmd}'" def cmds = ["/bin/bash", "-c", command] diff --git a/regression-test/suites/doc/table-design/auto-increment.md.groovy b/regression-test/suites/doc/table-design/auto-increment.md.groovy new file mode 100644 index 00000000000..f78d53c1b13 --- /dev/null +++ b/regression-test/suites/doc/table-design/auto-increment.md.groovy @@ -0,0 +1,296 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.junit.jupiter.api.Assertions; + +suite("docs/table-design/auto-increment.md") { + try { + multi_sql "create database if not exists demo; use demo;" + sql "drop table if exists `demo`.`tbl`" + sql """ + CREATE TABLE `demo`.`tbl` ( + `id` BIGINT NOT NULL AUTO_INCREMENT, + `value` BIGINT NOT NULL + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + DISTRIBUTED BY HASH(`id`) BUCKETS 10 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ) + """ + + sql "drop table if exists `demo`.`tbl`" + sql """ + CREATE TABLE `demo`.`tbl` ( + `id` BIGINT NOT NULL AUTO_INCREMENT(100), + `value` BIGINT NOT NULL + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + DISTRIBUTED BY HASH(`id`) BUCKETS 10 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ) + """ + + sql "drop table if exists `demo`.`tbl`" + sql """ + CREATE TABLE `demo`.`tbl` ( + `uid` BIGINT NOT NULL, + `name` BIGINT NOT NULL, + `id` BIGINT NOT NULL AUTO_INCREMENT, + `value` BIGINT NOT NULL + ) ENGINE=OLAP + DUPLICATE KEY(`uid`, `name`) + DISTRIBUTED BY HASH(`uid`) BUCKETS 10 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ) + """ + + sql "drop table if exists `demo`.`tbl`" + sql """ + CREATE TABLE `demo`.`tbl` ( + `id` BIGINT NOT NULL AUTO_INCREMENT, + `name` varchar(65533) NOT NULL, + `value` int(11) NOT NULL + ) ENGINE=OLAP + UNIQUE KEY(`id`) + DISTRIBUTED BY HASH(`id`) BUCKETS 10 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ) + """ + + sql "drop table if exists `demo`.`tbl`" + sql """ + CREATE TABLE `demo`.`tbl` ( + `text` varchar(65533) NOT NULL, + `id` BIGINT NOT NULL AUTO_INCREMENT, + ) ENGINE=OLAP + UNIQUE KEY(`text`) + DISTRIBUTED BY HASH(`text`) BUCKETS 10 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ) + """ + + sql "drop table if exists `demo`.`tbl`" + sql """ + CREATE TABLE `demo`.`tbl` ( + `text` varchar(65533) NOT NULL, + `id` BIGINT NOT NULL AUTO_INCREMENT, + ) ENGINE=OLAP + UNIQUE KEY(`text`) + DISTRIBUTED BY HASH(`text`) BUCKETS 10 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ) + """ + + sql "drop table if exists `demo`.`tbl`" + sql """ + CREATE TABLE `demo`.`tbl` ( + `id` BIGINT NOT NULL AUTO_INCREMENT, + `name` varchar(65533) NOT NULL, + `value` int(11) NOT NULL + ) ENGINE=OLAP + UNIQUE KEY(`id`) + DISTRIBUTED BY HASH(`id`) BUCKETS 10 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ) + """ + multi_sql """ + insert into tbl(name, value) values("Bob", 10), ("Alice", 20), ("Jack", 30); + select * from tbl order by id; + """ + + cmd """ + curl --location-trusted -u ${context.config.jdbcUser}:${context.config.jdbcPassword} -H "columns:name,value" -H "column_separator:," -T ${context.file.parent}/test_data/test.csv http://${context.config.feHttpAddress}/api/demo/tbl/_stream_load + """ + sql "select * from tbl order by id" + + multi_sql """ + insert into tbl(id, name, value) values(null, "Doris", 60), (null, "Nereids", 70); + select * from tbl order by id; + """ + + sql "drop table if exists `demo`.`tbl2`" + multi_sql """ + CREATE TABLE `demo`.`tbl2` ( + `id` BIGINT NOT NULL AUTO_INCREMENT, + `name` varchar(65533) NOT NULL, + `value` int(11) NOT NULL DEFAULT "0" + ) ENGINE=OLAP + UNIQUE KEY(`id`) + DISTRIBUTED BY HASH(`id`) BUCKETS 10 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "enable_unique_key_merge_on_write" = "true" + ); + + insert into tbl2(id, name, value) values(1, "Bob", 10), (2, "Alice", 20), (3, "Jack", 30); + select * from tbl2 order by id; + + set enable_unique_key_partial_update=true; + set enable_insert_strict=false; + insert into tbl2(id, name) values(1, "modified"), (4, "added"); + + select * from tbl2 order by id; + """ + + sql "drop table if exists `demo`.`tbl3`" + multi_sql """ + CREATE TABLE `demo`.`tbl3` ( + `id` BIGINT NOT NULL, + `name` varchar(100) NOT NULL, + `score` BIGINT NOT NULL, + `aid` BIGINT NOT NULL AUTO_INCREMENT + ) ENGINE=OLAP + UNIQUE KEY(`id`) + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "enable_unique_key_merge_on_write" = "true" + ); + + insert into tbl3(id, name, score) values(1, "Doris", 100), (2, "Nereids", 200), (3, "Bob", 300); + select * from tbl3 order by id; + + set enable_unique_key_partial_update=true; + set enable_insert_strict=false; + insert into tbl3(id, score) values(1, 999), (2, 888); + select * from tbl3 order by id; + + insert into tbl3(id, aid) values(1, 1000), (3, 500); + select * from tbl3 order by id; + """ + + sql "drop table if exists `demo`.`dwd_dup_tbl`" + sql """ + CREATE TABLE `demo`.`dwd_dup_tbl` ( + `user_id` varchar(50) NOT NULL, + `dim1` varchar(50) NOT NULL, + `dim2` varchar(50) NOT NULL, + `dim3` varchar(50) NOT NULL, + `dim4` varchar(50) NOT NULL, + `dim5` varchar(50) NOT NULL, + `visit_time` DATE NOT NULL + ) ENGINE=OLAP + DUPLICATE KEY(`user_id`) + DISTRIBUTED BY HASH(`user_id`) BUCKETS 32 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ) + """ + sql "drop table if exists `demo`.`dictionary_tbl`" + sql """ + CREATE TABLE `demo`.`dictionary_tbl` ( + `user_id` varchar(50) NOT NULL, + `aid` BIGINT NOT NULL AUTO_INCREMENT + ) ENGINE=OLAP + UNIQUE KEY(`user_id`) + DISTRIBUTED BY HASH(`user_id`) BUCKETS 32 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "enable_unique_key_merge_on_write" = "true" + ) + """ + sql """ + insert into dictionary_tbl(user_id) + select user_id from dwd_dup_tbl group by user_id + """ + sql """ + insert into dictionary_tbl(user_id) + select dwd_dup_tbl.user_id from dwd_dup_tbl left join dictionary_tbl + on dwd_dup_tbl.user_id = dictionary_tbl.user_id where dwd_dup_tbl.visit_time > '2023-12-10' and dictionary_tbl.user_id is NULL + """ + sql "drop table if exists `demo`.`dws_agg_tbl`" + sql """ + CREATE TABLE `demo`.`dws_agg_tbl` ( + `dim1` varchar(50) NOT NULL, + `dim3` varchar(50) NOT NULL, + `dim5` varchar(50) NOT NULL, + `user_id_bitmap` BITMAP BITMAP_UNION NOT NULL, + `pv` BIGINT SUM NOT NULL + ) ENGINE=OLAP + AGGREGATE KEY(`dim1`,`dim3`,`dim5`) + DISTRIBUTED BY HASH(`dim1`) BUCKETS 32 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ) + """ + sql """ + insert into dws_agg_tbl + select dwd_dup_tbl.dim1, dwd_dup_tbl.dim3, dwd_dup_tbl.dim5, BITMAP_UNION(TO_BITMAP(dictionary_tbl.aid)), COUNT(1) + from dwd_dup_tbl INNER JOIN dictionary_tbl on dwd_dup_tbl.user_id = dictionary_tbl.user_id + group by dwd_dup_tbl.dim1, dwd_dup_tbl.dim3, dwd_dup_tbl.dim5 + """ + sql """ + select dim1, dim3, dim5, bitmap_count(user_id_bitmap) as uv, pv from dws_agg_tbl + """ + + sql "drop table if exists `demo`.`records_tbl`" + sql """ + CREATE TABLE `demo`.`records_tbl` ( + `user_id` int(11) NOT NULL COMMENT "", + `name` varchar(26) NOT NULL COMMENT "", + `address` varchar(41) NOT NULL COMMENT "", + `city` varchar(11) NOT NULL COMMENT "", + `nation` varchar(16) NOT NULL COMMENT "", + `region` varchar(13) NOT NULL COMMENT "", + `phone` varchar(16) NOT NULL COMMENT "", + `mktsegment` varchar(11) NOT NULL COMMENT "" + ) DUPLICATE KEY (`user_id`, `name`) + DISTRIBUTED BY HASH(`user_id`) BUCKETS 10 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ) + """ + sql "select * from records_tbl order by user_id, name limit 100" + sql "select * from records_tbl order by user_id, name limit 100 offset 100" + + sql "drop table if exists `demo`.`records_tbl2`" + sql """ + CREATE TABLE `demo`.`records_tbl2` ( + `user_id` int(11) NOT NULL COMMENT "", + `name` varchar(26) NOT NULL COMMENT "", + `address` varchar(41) NOT NULL COMMENT "", + `city` varchar(11) NOT NULL COMMENT "", + `nation` varchar(16) NOT NULL COMMENT "", + `region` varchar(13) NOT NULL COMMENT "", + `phone` varchar(16) NOT NULL COMMENT "", + `mktsegment` varchar(11) NOT NULL COMMENT "", + `unique_value` BIGINT NOT NULL AUTO_INCREMENT + ) DUPLICATE KEY (`user_id`, `name`) + DISTRIBUTED BY HASH(`user_id`) BUCKETS 10 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ) + """ + sql "select * from records_tbl2 order by unique_value limit 100" + sql "select * from records_tbl2 where unique_value > 99 order by unique_value limit 100" + sql """ + select user_id, name, address, city, nation, region, phone, mktsegment + from records_tbl2, (select unique_value as max_value from records_tbl2 order by unique_value limit 1 offset 9999) as previous_data + where records_tbl2.unique_value > previous_data.max_value + order by unique_value limit 100 + """ + } catch (Throwable t) { + Assertions.fail("examples in docs/table-design/auto-increment.md failed to exec, please fix it", t) + } +} diff --git a/regression-test/suites/doc/table-design/best-practice.md.groovy b/regression-test/suites/doc/table-design/best-practice.md.groovy new file mode 100644 index 00000000000..44ce1c5a5ae --- /dev/null +++ b/regression-test/suites/doc/table-design/best-practice.md.groovy @@ -0,0 +1,193 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.junit.jupiter.api.Assertions; + +suite("docs/table-design/best-practice.md") { + try { + sql "drop table if exists session_data" + sql """ + -- 例如 允许 KEY 重复仅追加新数据的日志数据分析 + CREATE TABLE session_data + ( + visitorid SMALLINT, + sessionid BIGINT, + visittime DATETIME, + city CHAR(20), + province CHAR(20), + ip varchar(32), + brower CHAR(20), + url VARCHAR(1024) + ) + DUPLICATE KEY(visitorid, sessionid) -- 只用于指定排序列,相同的 KEY 行不会合并 + DISTRIBUTED BY HASH(sessionid, visitorid) BUCKETS 10 + PROPERTIES ("replication_num" = "1") + """ + + sql "drop table if exists site_visit" + sql """ + -- 例如 网站流量分析 + CREATE TABLE site_visit + ( + siteid INT, + city SMALLINT, + username VARCHAR(32), + pv BIGINT SUM DEFAULT '0' -- PV 浏览量计算 + ) + AGGREGATE KEY(siteid, city, username) -- 相同的 KEY 行会合并,非 KEY 列会根据指定的聚合函数进行聚合 + DISTRIBUTED BY HASH(siteid) BUCKETS 10 + PROPERTIES ("replication_num" = "1") + """ + + sql "drop table if exists sales_order" + sql """ + -- 例如 订单去重分析 + CREATE TABLE sales_order + ( + orderid BIGINT, + status TINYINT, + username VARCHAR(32), + amount BIGINT DEFAULT '0' + ) + UNIQUE KEY(orderid) -- 相同的 KEY 行会合并 + DISTRIBUTED BY HASH(orderid) BUCKETS 10 + PROPERTIES ("replication_num" = "1") + """ + + sql "drop table if exists sale_detail_bloom" + sql """ + -- 创建示例:通过在建表语句的 PROPERTIES 里加上"bloom_filter_columns"="k1,k2,k3" + -- 例如下面我们对表里的 saler_id,category_id 创建了 BloomFilter 索引。 + CREATE TABLE IF NOT EXISTS sale_detail_bloom ( + sale_date date NOT NULL COMMENT "销售时间", + customer_id int NOT NULL COMMENT "客户编号", + saler_id int NOT NULL COMMENT "销售员", + sku_id int NOT NULL COMMENT "商品编号", + category_id int NOT NULL COMMENT "商品分类", + sale_count int NOT NULL COMMENT "销售数量", + sale_price DECIMAL(12,2) NOT NULL COMMENT "单价", + sale_amt DECIMAL(20,2) COMMENT "销售总金额" + ) + Duplicate KEY(sale_date, customer_id,saler_id,sku_id,category_id) + DISTRIBUTED BY HASH(saler_id) BUCKETS 10 + PROPERTIES ( + "bloom_filter_columns"="saler_id,category_id", + "replication_num" = "1" + ) + """ + + sql "drop table if exists nb_table" + sql """ + -- 创建示例:表创建时指定 + CREATE TABLE `nb_table` ( + `siteid` int(11) NULL DEFAULT "10" COMMENT "", + `citycode` smallint(6) NULL COMMENT "", + `username` varchar(32) NULL DEFAULT "" COMMENT "", + INDEX idx_ngrambf (`username`) USING NGRAM_BF PROPERTIES("gram_size"="3", "bf_size"="256") COMMENT 'username ngram_bf index' + ) ENGINE=OLAP + AGGREGATE KEY(`siteid`, `citycode`, `username`) COMMENT "OLAP" + DISTRIBUTED BY HASH(`siteid`) BUCKETS 10 + PROPERTIES ( + "replication_num" = "1" + ) + -- PROPERTIES("gram_size"="3", "bf_size"="256"),分别表示 gram 的个数和 bloom filter 的字节数。 + -- gram 的个数跟实际查询场景相关,通常设置为大部分查询字符串的长度,bloom filter 字节数,可以通过测试得出,通常越大过滤效果越好,可以从 256 开始进行验证测试看看效果。当然字节数越大也会带来索引存储、内存 cost 上升。 + -- 如果数据基数比较高,字节数可以不用设置过大,如果基数不是很高,可以通过增加字节数来提升过滤效果。 + """ + + multi_sql """ + drop table if exists tbl_unique_merge_on_write; + drop table if exists tbl_unique_merge_on_write_p; + """ + multi_sql """ + -- 以 Unique 模型的 Merge-on-Write 表为例 + -- Unique 模型的写时合并实现,与聚合模型就是完全不同的两种模型了,查询性能更接近于 duplicate 模型, + -- 在有主键约束需求的场景上相比聚合模型有较大的查询性能优势,尤其是在聚合查询以及需要用索引过滤大量数据的查询中。 + + -- 非分区表 + CREATE TABLE IF NOT EXISTS tbl_unique_merge_on_write + ( + `user_id` LARGEINT NOT NULL COMMENT "用户id", + `username` VARCHAR(50) NOT NULL COMMENT "用户昵称", + `register_time` DATE COMMENT "用户注册时间", + `city` VARCHAR(20) COMMENT "用户所在城市", + `age` SMALLINT COMMENT "用户年龄", + `sex` TINYINT COMMENT "用户性别", + `phone` LARGEINT COMMENT "用户电话", + `address` VARCHAR(500) COMMENT "用户地址" + ) + UNIQUE KEY(`user_id`, `username`) + -- 3-5G 的数据量 + DISTRIBUTED BY HASH(`user_id`) BUCKETS 10 + PROPERTIES ( + -- 在 1.2.0 版本中,作为一个新的 feature,写时合并默认关闭,用户可以通过添加下面的 property 来开启 + "enable_unique_key_merge_on_write" = "true" , + "replication_num" = "1" + ); + + -- 分区表 + CREATE TABLE IF NOT EXISTS tbl_unique_merge_on_write_p + ( + `user_id` LARGEINT NOT NULL COMMENT "用户id", + `username` VARCHAR(50) NOT NULL COMMENT "用户昵称", + `register_time` DATE COMMENT "用户注册时间", + `city` VARCHAR(20) COMMENT "用户所在城市", + `age` SMALLINT COMMENT "用户年龄", + `sex` TINYINT COMMENT "用户性别", + `phone` LARGEINT COMMENT "用户电话", + `address` VARCHAR(500) COMMENT "用户地址" + ) + UNIQUE KEY(`user_id`, `username`, `register_time`) + PARTITION BY RANGE(`register_time`) ( + PARTITION p00010101_1899 VALUES [('0001-01-01'), ('1900-01-01')), + PARTITION p19000101 VALUES [('1900-01-01'), ('1900-01-02')), + PARTITION p19000102 VALUES [('1900-01-02'), ('1900-01-03')), + PARTITION p19000103 VALUES [('1900-01-03'), ('1900-01-04')), + PARTITION p19000104_1999 VALUES [('1900-01-04'), ('2000-01-01')), + FROM ("2000-01-01") TO ("2022-01-01") INTERVAL 1 YEAR, + PARTITION p30001231 VALUES [('3000-12-31'), ('3001-01-01')), + PARTITION p99991231 VALUES [('9999-12-31'), (MAXVALUE)) + ) + -- 默认 3-5G 的数据量 + DISTRIBUTED BY HASH(`user_id`) BUCKETS 10 + PROPERTIES ( + -- 在 1.2.0 版本中,作为一个新的 feature,写时合并默认关闭,用户可以通过添加下面的 property 来开启 + "enable_unique_key_merge_on_write" = "true", + -- 动态分区调度的单位。可指定为 HOUR、DAY、WEEK、MONTH、YEAR。分别表示按小时、按天、按星期、按月、按年进行分区创建或删除。 + "dynamic_partition.time_unit" = "MONTH", + -- 动态分区的起始偏移,为负数。根据 time_unit 属性的不同,以当天(星期/月)为基准,分区范围在此偏移之前的分区将会被删除(TTL)。如果不填写,则默认为 -2147483648,即不删除历史分区。 + "dynamic_partition.start" = "-3000", + -- 动态分区的结束偏移,为正数。根据 time_unit 属性的不同,以当天(星期/月)为基准,提前创建对应范围的分区。 + "dynamic_partition.end" = "10", + -- 动态创建的分区名前缀(必选)。 + "dynamic_partition.prefix" = "p", + -- 动态创建的分区所对应的分桶数量。 + "dynamic_partition.buckets" = "10", + "dynamic_partition.enable" = "true", + -- 动态创建的分区所对应的副本数量,如果不填写,则默认为该表创建时指定的副本数量 3。 + "dynamic_partition.replication_num" = "1", + "replication_num" = "1" + ); + + -- 分区创建查看 + -- 实际创建的分区数需要结合 dynamic_partition.start、end 以及 PARTITION BY RANGE 的设置共同决定 + show partitions from tbl_unique_merge_on_write_p; + """ + } catch (Throwable t) { + Assertions.fail("examples in docs/table-design/best-practice.md failed to exec, please fix it", t) + } +} diff --git a/regression-test/suites/doc/table-design/data-model/aggregate.md.groovy b/regression-test/suites/doc/table-design/data-model/aggregate.md.groovy new file mode 100644 index 00000000000..cc6cc576e97 --- /dev/null +++ b/regression-test/suites/doc/table-design/data-model/aggregate.md.groovy @@ -0,0 +1,89 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.junit.jupiter.api.Assertions; + +suite("docs/table-design/data-model/aggregate.md") { + try { + multi_sql """ + CREATE TABLE IF NOT EXISTS example_tbl_agg1 + ( + `user_id` LARGEINT NOT NULL COMMENT "用户id", + `date` DATE NOT NULL COMMENT "数据灌入日期时间", + `city` VARCHAR(20) COMMENT "用户所在城市", + `age` SMALLINT COMMENT "用户年龄", + `sex` TINYINT COMMENT "用户性别", + `last_visit_date` DATETIME REPLACE DEFAULT "1970-01-01 00:00:00" COMMENT "用户最后一次访问时间", + `cost` BIGINT SUM DEFAULT "0" COMMENT "用户总消费", + `max_dwell_time` INT MAX DEFAULT "0" COMMENT "用户最大停留时间", + `min_dwell_time` INT MIN DEFAULT "99999" COMMENT "用户最小停留时间" + ) + AGGREGATE KEY(`user_id`, `date`, `city`, `age`, `sex`) + DISTRIBUTED BY HASH(`user_id`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + + multi_sql """ + insert into example_tbl_agg1 values + (10000,"2017-10-01","北京",20,0,"2017-10-01 06:00:00",20,10,10), + (10000,"2017-10-01","北京",20,0,"2017-10-01 07:00:00",15,2,2), + (10001,"2017-10-01","北京",30,1,"2017-10-01 17:05:45",2,22,22), + (10002,"2017-10-02","上海",20,1,"2017-10-02 12:59:12",200,5,5), + (10003,"2017-10-02","广州",32,0,"2017-10-02 11:20:00",30,11,11), + (10004,"2017-10-01","深圳",35,0,"2017-10-01 10:00:15",100,3,3), + (10004,"2017-10-03","深圳",35,0,"2017-10-03 10:20:22",11,6,6); + """ + + multi_sql """ + insert into example_tbl_agg1 values + (10004,"2017-10-03","深圳",35,0,"2017-10-03 11:22:00",44,19,19), + (10005,"2017-10-03","长沙",29,1,"2017-10-03 18:11:02",3,1,1); + """ + + sql "drop table if exists aggstate" + multi_sql """ + set enable_agg_state=true; + create table aggstate( + k1 int null, + k2 agg_state<sum(int)> generic, + k3 agg_state<group_concat(string)> generic + ) + aggregate key (k1) + distributed BY hash(k1) buckets 3 + properties("replication_num" = "1"); + """ + + multi_sql """ + insert into aggstate values(1,sum_state(1),group_concat_state('a')); + insert into aggstate values(1,sum_state(2),group_concat_state('b')); + insert into aggstate values(1,sum_state(3),group_concat_state('c')); + """ + + multi_sql "insert into aggstate values(2,sum_state(4),group_concat_state('d'));" + multi_sql "select sum_merge(k2) from aggstate;" + multi_sql "select group_concat_merge(k3) from aggstate;" + multi_sql "insert into aggstate select 3,sum_union(k2),group_concat_union(k3) from aggstate;" + multi_sql """ + select sum_merge(k2) , group_concat_merge(k3)from aggstate; + select sum_merge(k2) , group_concat_merge(k3)from aggstate where k1 != 2; + """ + } catch (Throwable t) { + Assertions.fail("examples in docs/table-design/data-model/aggregate.md failed to exec, please fix it", t) + } +} diff --git a/regression-test/suites/doc/table-design/data-model/duplicate.md.groovy b/regression-test/suites/doc/table-design/data-model/duplicate.md.groovy new file mode 100644 index 00000000000..e8360f480f2 --- /dev/null +++ b/regression-test/suites/doc/table-design/data-model/duplicate.md.groovy @@ -0,0 +1,77 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.junit.jupiter.api.Assertions; + +suite("docs/table-design/data-model/duplicate.md") { + try { + multi_sql """ + CREATE TABLE IF NOT EXISTS example_tbl_by_default + ( + `timestamp` DATETIME NOT NULL COMMENT "日志时间", + `type` INT NOT NULL COMMENT "日志类型", + `error_code` INT COMMENT "错误码", + `error_msg` VARCHAR(1024) COMMENT "错误详细信息", + `op_id` BIGINT COMMENT "负责人id", + `op_time` DATETIME COMMENT "处理时间" + ) + DISTRIBUTED BY HASH(`type`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + desc example_tbl_by_default; + """ + + multi_sql """ + CREATE TABLE IF NOT EXISTS example_tbl_duplicate_without_keys_by_default + ( + `timestamp` DATETIME NOT NULL COMMENT "日志时间", + `type` INT NOT NULL COMMENT "日志类型", + `error_code` INT COMMENT "错误码", + `error_msg` VARCHAR(1024) COMMENT "错误详细信息", + `op_id` BIGINT COMMENT "负责人id", + `op_time` DATETIME COMMENT "处理时间" + ) + DISTRIBUTED BY HASH(`type`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "enable_duplicate_without_keys_by_default" = "true" + ); + desc example_tbl_duplicate_without_keys_by_default; + """ + + multi_sql """ + CREATE TABLE IF NOT EXISTS example_tbl_duplicate + ( + `timestamp` DATETIME NOT NULL COMMENT "日志时间", + `type` INT NOT NULL COMMENT "日志类型", + `error_code` INT COMMENT "错误码", + `error_msg` VARCHAR(1024) COMMENT "错误详细信息", + `op_id` BIGINT COMMENT "负责人id", + `op_time` DATETIME COMMENT "处理时间" + ) + DUPLICATE KEY(`timestamp`, `type`, `error_code`) + DISTRIBUTED BY HASH(`type`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + desc example_tbl_duplicate; + """ + } catch (Throwable t) { + Assertions.fail("examples in docs/table-design/data-model/duplicate.md failed to exec, please fix it", t) + } +} diff --git a/regression-test/suites/doc/table-design/data-model/unique.md.groovy b/regression-test/suites/doc/table-design/data-model/unique.md.groovy new file mode 100644 index 00000000000..7c5e7783b06 --- /dev/null +++ b/regression-test/suites/doc/table-design/data-model/unique.md.groovy @@ -0,0 +1,63 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.junit.jupiter.api.Assertions; + +suite("docs/table-design/data-model/unique.md") { + try { + multi_sql """ + CREATE TABLE IF NOT EXISTS example_tbl_unique + ( + `user_id` LARGEINT NOT NULL COMMENT "用户id", + `username` VARCHAR(50) NOT NULL COMMENT "用户昵称", + `city` VARCHAR(20) COMMENT "用户所在城市", + `age` SMALLINT COMMENT "用户年龄", + `sex` TINYINT COMMENT "用户性别", + `phone` LARGEINT COMMENT "用户电话", + `address` VARCHAR(500) COMMENT "用户地址", + `register_time` DATETIME COMMENT "用户注册时间" + ) + UNIQUE KEY(`user_id`, `username`) + DISTRIBUTED BY HASH(`user_id`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + + multi_sql """ + CREATE TABLE IF NOT EXISTS example_tbl_unique_merge_on_write + ( + `user_id` LARGEINT NOT NULL COMMENT "用户id", + `username` VARCHAR(50) NOT NULL COMMENT "用户昵称", + `city` VARCHAR(20) COMMENT "用户所在城市", + `age` SMALLINT COMMENT "用户年龄", + `sex` TINYINT COMMENT "用户性别", + `phone` LARGEINT COMMENT "用户电话", + `address` VARCHAR(500) COMMENT "用户地址", + `register_time` DATETIME COMMENT "用户注册时间" + ) + UNIQUE KEY(`user_id`, `username`) + DISTRIBUTED BY HASH(`user_id`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "enable_unique_key_merge_on_write" = "true" + ); + """ + } catch (Throwable t) { + Assertions.fail("examples in docs/table-design/data-model/unique.md failed to exec, please fix it", t) + } +} diff --git a/regression-test/suites/doc/table-design/data-partition.md.groovy b/regression-test/suites/doc/table-design/data-partition.md.groovy new file mode 100644 index 00000000000..ac81c6d8dbe --- /dev/null +++ b/regression-test/suites/doc/table-design/data-partition.md.groovy @@ -0,0 +1,311 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.junit.jupiter.api.Assertions; + +suite("docs/table-design/data-partition.md") { + try { + sql "drop table if exists example_range_tbl" + multi_sql """ + -- Range Partition + CREATE TABLE IF NOT EXISTS example_range_tbl + ( + `user_id` LARGEINT NOT NULL COMMENT "用户id", + `date` DATE NOT NULL COMMENT "数据灌入日期时间", + `timestamp` DATETIME NOT NULL COMMENT "数据灌入的时间戳", + `city` VARCHAR(20) COMMENT "用户所在城市", + `age` SMALLINT COMMENT "用户年龄", + `sex` TINYINT COMMENT "用户性别", + `last_visit_date` DATETIME REPLACE DEFAULT "1970-01-01 00:00:00" COMMENT "用户最后一次访问时间", + `cost` BIGINT SUM DEFAULT "0" COMMENT "用户总消费", + `max_dwell_time` INT MAX DEFAULT "0" COMMENT "用户最大停留时间", + `min_dwell_time` INT MIN DEFAULT "99999" COMMENT "用户最小停留时间" + ) + ENGINE=OLAP + AGGREGATE KEY(`user_id`, `date`, `timestamp`, `city`, `age`, `sex`) + PARTITION BY RANGE(`date`) + ( + PARTITION `p201701` VALUES LESS THAN ("2017-02-01"), + PARTITION `p201702` VALUES LESS THAN ("2017-03-01"), + PARTITION `p201703` VALUES LESS THAN ("2017-04-01"), + PARTITION `p2018` VALUES [("2018-01-01"), ("2019-01-01")) + ) + DISTRIBUTED BY HASH(`user_id`) BUCKETS 16 + PROPERTIES + ( + "replication_num" = "1" + ); + """ + + sql "show create table example_range_tbl" + sql "show partitions from example_range_tbl" + sql """ALTER TABLE example_range_tbl ADD PARTITION p201704 VALUES LESS THAN("2020-05-01") DISTRIBUTED BY HASH(`user_id`) BUCKETS 5""" + + sql "drop table if exists null_list" + multi_sql """ + create table null_list( + k0 varchar null + ) + partition by list (k0) + ( + PARTITION pX values in ((NULL)) + ) + DISTRIBUTED BY HASH(`k0`) BUCKETS 1 + properties("replication_num" = "1"); + insert into null_list values (null); + select * from null_list; + """ + + sql "drop table if exists null_range" + multi_sql """ + create table null_range( + k0 int null + ) + partition by range (k0) + ( + PARTITION p10 values less than (10), + PARTITION p100 values less than (100), + PARTITION pMAX values less than (maxvalue) + ) + DISTRIBUTED BY HASH(`k0`) BUCKETS 1 + properties("replication_num" = "1"); + insert into null_range values (null); + select * from null_range partition(p10); + """ + + sql "drop table if exists null_range2" + sql """ + create table null_range2( + k0 int null + ) + partition by range (k0) + ( + PARTITION p200 values [("100"), ("200")) + ) + DISTRIBUTED BY HASH(`k0`) BUCKETS 1 + properties("replication_num" = "1") + """ + try { + sql " insert into null_range2 values (null) " + Assertions.fail("The SQL above should throw an exception as follows:\n\t\terrCode = 2, detailMessage = Insert has filtered data in strict mode. url: http://127.0.0.1:8040/api/_load_error_log?file=__shard_0/error_log_insert_stmt_b3a6d1f1fac74750-b3bb5d6e92a66da4_b3a6d1f1fac74750_b3bb5d6e92a66da4") + } catch (Exception e) { + assertTrue(e.getMessage().contains("errCode = 2, detailMessage = Insert has filtered data in strict mode. url:")) + } + + sql "drop table if exists tbl1" + sql """ + CREATE TABLE tbl1 + ( + k1 DATE + ) + PARTITION BY RANGE(k1) () + DISTRIBUTED BY HASH(k1) + PROPERTIES + ( + "dynamic_partition.enable" = "true", + "dynamic_partition.time_unit" = "DAY", + "dynamic_partition.start" = "-7", + "dynamic_partition.end" = "3", + "dynamic_partition.prefix" = "p", + "dynamic_partition.buckets" = "32", + "replication_num" = "1" + ) + """ + + sql "drop table if exists tbl1" + sql """ + CREATE TABLE tbl1 + ( + k1 DATETIME, + ) + PARTITION BY RANGE(k1) () + DISTRIBUTED BY HASH(k1) + PROPERTIES + ( + "dynamic_partition.enable" = "true", + "dynamic_partition.time_unit" = "WEEK", + "dynamic_partition.start" = "-2", + "dynamic_partition.end" = "2", + "dynamic_partition.prefix" = "p", + "dynamic_partition.buckets" = "8", + "replication_num" = "1" + ) + """ + + sql "drop table if exists tbl1" + sql """ + CREATE TABLE tbl1 + ( + k1 DATE + ) + PARTITION BY RANGE(k1) () + DISTRIBUTED BY HASH(k1) + PROPERTIES + ( + "dynamic_partition.enable" = "true", + "dynamic_partition.time_unit" = "MONTH", + "dynamic_partition.end" = "2", + "dynamic_partition.prefix" = "p", + "dynamic_partition.buckets" = "8", + "dynamic_partition.start_day_of_month" = "3", + "replication_num" = "1" + ) + """ + + sql "SHOW DYNAMIC PARTITION TABLES" + sql """ ADMIN SET FRONTEND CONFIG ("dynamic_partition_enable" = "true") """ + cmd """ curl --location-trusted -u ${context.config.jdbcUser}:${context.config.jdbcPassword} -XGET http://${context.config.feHttpAddress}/api/_set_config?dynamic_partition_enable=true """ + + sql """ ADMIN SET FRONTEND CONFIG ("dynamic_partition_check_interval_seconds" = "7200") """ + cmd """ curl --location-trusted -u ${context.config.jdbcUser}:${context.config.jdbcPassword} -XGET http://${context.config.feHttpAddress}/api/_set_config?dynamic_partition_check_interval_seconds=432000 """ + + sql "drop table if exists `DAILY_TRADE_VALUE`" + sql """ + CREATE TABLE `DAILY_TRADE_VALUE` + ( + `TRADE_DATE` datev2 NOT NULL COMMENT '交易日期', + `TRADE_ID` varchar(40) NOT NULL COMMENT '交易编号', + ) + UNIQUE KEY(`TRADE_DATE`, `TRADE_ID`) + PARTITION BY RANGE(`TRADE_DATE`) + ( + PARTITION p_2000 VALUES [('2000-01-01'), ('2001-01-01')), + PARTITION p_2001 VALUES [('2001-01-01'), ('2002-01-01')), + PARTITION p_2002 VALUES [('2002-01-01'), ('2003-01-01')), + PARTITION p_2003 VALUES [('2003-01-01'), ('2004-01-01')), + PARTITION p_2004 VALUES [('2004-01-01'), ('2005-01-01')), + PARTITION p_2005 VALUES [('2005-01-01'), ('2006-01-01')), + PARTITION p_2006 VALUES [('2006-01-01'), ('2007-01-01')), + PARTITION p_2007 VALUES [('2007-01-01'), ('2008-01-01')), + PARTITION p_2008 VALUES [('2008-01-01'), ('2009-01-01')), + PARTITION p_2009 VALUES [('2009-01-01'), ('2010-01-01')), + PARTITION p_2010 VALUES [('2010-01-01'), ('2011-01-01')), + PARTITION p_2011 VALUES [('2011-01-01'), ('2012-01-01')), + PARTITION p_2012 VALUES [('2012-01-01'), ('2013-01-01')), + PARTITION p_2013 VALUES [('2013-01-01'), ('2014-01-01')), + PARTITION p_2014 VALUES [('2014-01-01'), ('2015-01-01')), + PARTITION p_2015 VALUES [('2015-01-01'), ('2016-01-01')), + PARTITION p_2016 VALUES [('2016-01-01'), ('2017-01-01')), + PARTITION p_2017 VALUES [('2017-01-01'), ('2018-01-01')), + PARTITION p_2018 VALUES [('2018-01-01'), ('2019-01-01')), + PARTITION p_2019 VALUES [('2019-01-01'), ('2020-01-01')), + PARTITION p_2020 VALUES [('2020-01-01'), ('2021-01-01')), + PARTITION p_2021 VALUES [('2021-01-01'), ('2022-01-01')) + ) + DISTRIBUTED BY HASH(`TRADE_DATE`) BUCKETS 10 + PROPERTIES ( + "replication_num" = "1" + ) + """ + + sql "drop table if exists `date_table`" + sql """ + CREATE TABLE `date_table` ( + `TIME_STAMP` datev2 NOT NULL COMMENT '采集日期' + ) ENGINE=OLAP + DUPLICATE KEY(`TIME_STAMP`) + AUTO PARTITION BY RANGE (date_trunc(`TIME_STAMP`, 'month')) + ( + ) + DISTRIBUTED BY HASH(`TIME_STAMP`) BUCKETS 10 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ) + """ + + sql "drop table if exists `str_table`" + sql """ + CREATE TABLE `str_table` ( + `str` varchar not null + ) ENGINE=OLAP + DUPLICATE KEY(`str`) + AUTO PARTITION BY LIST (`str`) + ( + ) + DISTRIBUTED BY HASH(`str`) BUCKETS 10 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ) + """ + + sql "drop table if exists auto_null_list" + multi_sql """ + create table auto_null_list( + k0 varchar null + ) + auto partition by list (k0) + ( + ) + DISTRIBUTED BY HASH(`k0`) BUCKETS 1 + properties("replication_num" = "1"); + + insert into auto_null_list values (null); + select * from auto_null_list; + select * from auto_null_list partition(pX); + """ + + try { + sql "drop table if exists `range_table_nullable`" + sql """ + CREATE TABLE `range_table_nullable` ( + `k1` INT, + `k2` DATETIMEV2(3), + `k3` DATETIMEV2(6) + ) ENGINE=OLAP + DUPLICATE KEY(`k1`) + AUTO PARTITION BY RANGE (date_trunc(`k2`, 'day')) + ( + ) + DISTRIBUTED BY HASH(`k1`) BUCKETS 16 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ) + """ + Assertions.fail("The SQL above should throw an exception as follows:\n\t\terrCode = 2, detailMessage = AUTO RANGE PARTITION doesn't support NULL column") + } catch (Exception e) { + assertTrue(e.getMessage().contains("errCode = 2, detailMessage = AUTO RANGE PARTITION doesn't support NULL column")) + } + + sql "drop table if exists `DAILY_TRADE_VALUE`" + sql """ + CREATE TABLE `DAILY_TRADE_VALUE` + ( + `TRADE_DATE` datev2 NOT NULL COMMENT '交易日期', + `TRADE_ID` varchar(40) NOT NULL COMMENT '交易编号', + ) + UNIQUE KEY(`TRADE_DATE`, `TRADE_ID`) + AUTO PARTITION BY RANGE (date_trunc(`TRADE_DATE`, 'year')) + ( + ) + DISTRIBUTED BY HASH(`TRADE_DATE`) BUCKETS 10 + PROPERTIES ( + "replication_num" = "1" + ) + """ + def res1 = sql "show partitions from `DAILY_TRADE_VALUE`" + assertTrue(res1.isEmpty()) + + def res2 = multi_sql """ + insert into `DAILY_TRADE_VALUE` values ('2012-12-13', 1), ('2008-02-03', 2), ('2014-11-11', 3); + show partitions from `DAILY_TRADE_VALUE`; + """ + assertTrue(res2[1].size() == 3) + + } catch (Throwable t) { + Assertions.fail("examples in docs/table-design/data-partition.md failed to exec, please fix it", t) + } +} diff --git a/regression-test/suites/doc/table-design/index/bloomfilter.md.groovy b/regression-test/suites/doc/table-design/index/bloomfilter.md.groovy new file mode 100644 index 00000000000..7e0b44359d9 --- /dev/null +++ b/regression-test/suites/doc/table-design/index/bloomfilter.md.groovy @@ -0,0 +1,43 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.junit.jupiter.api.Assertions; + +suite("docs/table-design/index/bloomfilter.md") { + try { + multi_sql """ + CREATE TABLE IF NOT EXISTS sale_detail_bloom ( + sale_date date NOT NULL COMMENT "Sale date", + customer_id int NOT NULL COMMENT "Customer ID", + saler_id int NOT NULL COMMENT "Salesperson", + sku_id int NOT NULL COMMENT "Product ID", + category_id int NOT NULL COMMENT "Product category", + sale_count int NOT NULL COMMENT "Sales quantity", + sale_price DECIMAL(12,2) NOT NULL COMMENT "Unit price", + sale_amt DECIMAL(20,2) COMMENT "Total sales amount" + ) + DUPLICATE KEY(sale_date, customer_id, saler_id, sku_id, category_id) + DISTRIBUTED BY HASH(saler_id) BUCKETS 10 + PROPERTIES ( + "replication_num" = "1", + "bloom_filter_columns"="saler_id,category_id" + ); + """ + } catch (Throwable t) { + Assertions.fail("examples in docs/table-design/index/bloomfilter.md failed to exec, please fix it", t) + } +} diff --git a/regression-test/suites/doc/table-design/index/inverted-index.md.groovy b/regression-test/suites/doc/table-design/index/inverted-index.md.groovy new file mode 100644 index 00000000000..0359245afdc --- /dev/null +++ b/regression-test/suites/doc/table-design/index/inverted-index.md.groovy @@ -0,0 +1,122 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.junit.jupiter.api.Assertions; + +suite("docs/table-design/index/inverted-index.md.groovy") { + def waitUntilSchemaChangeDone = { tbl -> + waitForSchemaChangeDone({ + sql " SHOW ALTER TABLE COLUMN FROM test_inverted_index WHERE TableName='${tbl}' ORDER BY createtime DESC LIMIT 1 " + }) + } + try { + sql """ SELECT TOKENIZE('武汉长江大桥','"parser"="chinese","parser_mode"="fine_grained"') """ + sql """ SELECT TOKENIZE('武汉市长江大桥','"parser"="chinese","parser_mode"="fine_grained"') """ + sql """ SELECT TOKENIZE('武汉市长江大桥','"parser"="chinese","parser_mode"="coarse_grained"') """ + sql """ SELECT TOKENIZE('I love CHINA','"parser"="english"') """ + sql """ SELECT TOKENIZE('I love CHINA 我爱我的祖国','"parser"="unicode"') """ + + sql "DROP DATABASE IF EXISTS test_inverted_index;" + multi_sql """ + CREATE DATABASE test_inverted_index; + + USE test_inverted_index; + + -- 创建表的同时创建了 comment 的倒排索引 idx_comment + -- USING INVERTED 指定索引类型是倒排索引 + -- PROPERTIES("parser" = "english") 指定采用 "english" 分词,还支持 "chinese" 中文分词和 "unicode" 中英文多语言混合分词,如果不指定 "parser" 参数表示不分词 + + CREATE TABLE hackernews_1m + ( + `id` BIGINT, + `deleted` TINYINT, + `type` String, + `author` String, + `timestamp` DateTimeV2, + `comment` String, + `dead` TINYINT, + `parent` BIGINT, + `poll` BIGINT, + `children` Array<BIGINT>, + `url` String, + `score` INT, + `title` String, + `parts` Array<INT>, + `descendants` INT, + INDEX idx_comment (`comment`) USING INVERTED PROPERTIES("parser" = "english") COMMENT 'inverted index for comment' + ) + DUPLICATE KEY(`id`) + DISTRIBUTED BY HASH(`id`) BUCKETS 10 + PROPERTIES ("replication_num" = "1"); + """ + + sql """ SELECT count() FROM hackernews_1m """ + sql """ SELECT count() FROM hackernews_1m WHERE comment LIKE '%OLAP%' """ + sql """ SELECT count() FROM hackernews_1m WHERE comment MATCH_ANY 'OLAP' """ + multi_sql """ + SELECT count() FROM hackernews_1m WHERE comment LIKE '%OLTP%'; + SELECT count() FROM hackernews_1m WHERE comment MATCH_ANY 'OLTP'; + """ + multi_sql """ + SELECT count() FROM hackernews_1m WHERE comment LIKE '%OLAP%' AND comment LIKE '%OLTP%'; + SELECT count() FROM hackernews_1m WHERE comment MATCH_ALL 'OLAP OLTP'; + """ + multi_sql """ + SELECT count() FROM hackernews_1m WHERE comment LIKE '%OLAP%' OR comment LIKE '%OLTP%'; + SELECT count() FROM hackernews_1m WHERE comment MATCH_ANY 'OLAP OLTP'; + """ + sql """ SELECT count() FROM hackernews_1m WHERE timestamp > '2007-08-23 04:17:00' """ + sql """ CREATE INDEX idx_timestamp ON hackernews_1m(timestamp) USING INVERTED """ + waitUntilSchemaChangeDone("hackernews_1m") + if (!isCloudMode()) { + sql """ BUILD INDEX idx_timestamp ON hackernews_1m """ + } + sql """ SHOW ALTER TABLE COLUMN """ + sql """ SHOW BUILD INDEX """ + sql """ SELECT count() FROM hackernews_1m WHERE timestamp > '2007-08-23 04:17:00' """ + + multi_sql """ + SELECT count() FROM hackernews_1m WHERE parent = 11189; + ALTER TABLE hackernews_1m ADD INDEX idx_parent(parent) USING INVERTED; + """ + + waitUntilSchemaChangeDone("hackernews_1m") + if (!isCloudMode()) { + sql "BUILD INDEX idx_parent ON hackernews_1m;" + } + multi_sql """ + SHOW ALTER TABLE COLUMN; + SHOW BUILD INDEX; + SELECT count() FROM hackernews_1m WHERE parent = 11189; + """ + multi_sql """ + SELECT count() FROM hackernews_1m WHERE author = 'faster'; + ALTER TABLE hackernews_1m ADD INDEX idx_author(author) USING INVERTED; + """ + waitUntilSchemaChangeDone("hackernews_1m") + if (!isCloudMode()) { + sql "BUILD INDEX idx_author ON hackernews_1m" + } + multi_sql """ + SHOW ALTER TABLE COLUMN; + SHOW BUILD INDEX order by CreateTime desc limit 1; + SELECT count() FROM hackernews_1m WHERE author = 'faster'; + """ + } catch (Throwable t) { + Assertions.fail("examples in docs/table-design/index/inverted-index.md failed to exec, please fix it", t) + } +} diff --git a/regression-test/suites/doc/table-design/index/ngram-bloomfilter-index.md.groovy b/regression-test/suites/doc/table-design/index/ngram-bloomfilter-index.md.groovy new file mode 100644 index 00000000000..f42b455559b --- /dev/null +++ b/regression-test/suites/doc/table-design/index/ngram-bloomfilter-index.md.groovy @@ -0,0 +1,80 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.junit.jupiter.api.Assertions + +suite("docs/table-design/index/ngram-bloomfilter-index.md") { + try { + sql "DROP TABLE IF EXISTS `amazon_reviews`" + sql """ + CREATE TABLE `amazon_reviews` ( + `review_date` int(11) NULL, + `marketplace` varchar(20) NULL, + `customer_id` bigint(20) NULL, + `review_id` varchar(40) NULL, + `product_id` varchar(10) NULL, + `product_parent` bigint(20) NULL, + `product_title` varchar(500) NULL, + `product_category` varchar(50) NULL, + `star_rating` smallint(6) NULL, + `helpful_votes` int(11) NULL, + `total_votes` int(11) NULL, + `vine` boolean NULL, + `verified_purchase` boolean NULL, + `review_headline` varchar(500) NULL, + `review_body` string NULL + ) ENGINE=OLAP + DUPLICATE KEY(`review_date`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`review_date`) BUCKETS 16 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "compression" = "ZSTD" + ) + """ + + var f = new File("amazon_reviews_2010.snappy.parquet") + if (!f.exists()) { + f.delete() + } + cmd("wget ${getS3Url()}/regression/doc/amazon_reviews_2010.snappy.parquet") + cmd("""curl --location-trusted -u ${context.config.jdbcUser}:${context.config.jdbcPassword} -T amazon_reviews_2010.snappy.parquet -H "format:parquet" http://${context.config.feHttpAddress}/api/${curDbName}/amazon_reviews/_stream_load""") + + sql " SELECT COUNT() FROM amazon_reviews " + sql """ + SELECT + product_id, + any(product_title), + AVG(star_rating) AS rating, + COUNT() AS count + FROM + amazon_reviews + WHERE + review_body LIKE '%is super awesome%' + GROUP BY + product_id + ORDER BY + count DESC, + rating DESC, + product_id + LIMIT 5 + """ + sql """ ALTER TABLE amazon_reviews ADD INDEX review_body_ngram_idx(review_body) USING NGRAM_BF PROPERTIES("gram_size"="10", "bf_size"="10240") """ + } catch (Throwable t) { + Assertions.fail("examples in docs/table-design/index/ngram-bloomfilter-index.md failed to exec, please fix it", t) + } +} diff --git a/regression-test/suites/doc/table-design/index/prefix-index.md.groovy b/regression-test/suites/doc/table-design/index/prefix-index.md.groovy new file mode 100644 index 00000000000..b2740eb361c --- /dev/null +++ b/regression-test/suites/doc/table-design/index/prefix-index.md.groovy @@ -0,0 +1,50 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.junit.jupiter.api.Assertions; + +suite("docs/table-design/index/prefix-index.md") { + try { + sql "DROP TABLE IF EXISTS tbale1" + sql "DROP TABLE IF EXISTS tbale2" + + sql """ + CREATE TABLE IF NOT EXISTS `table1` ( + user_id BIGINT, + age INT, + message VARCHAR(100), + max_dwell_time BIGINT, + min_dwell_time DATETIME + ) PROPERTIES ("replication_num" = "1") + """ + + sql """ + CREATE TABLE IF NOT EXISTS `table2` ( + user_id VARCHAR(20), + age INT, + message VARCHAR(100), + max_dwell_time BIGINT, + min_dwell_time DATETIME + ) PROPERTIES ("replication_num" = "1") + """ + + sql "SELECT * FROM table1 WHERE user_id=1829239 and age=20" + sql "SELECT * FROM table1 WHERE age=20" + } catch (Throwable t) { + Assertions.fail("examples in docs/table-design/index/prefix-index.md failed to exec, please fix it", t) + } +} diff --git a/regression-test/suites/doc/table-design/row-store.md.groovy b/regression-test/suites/doc/table-design/row-store.md.groovy new file mode 100644 index 00000000000..6a8d89d6374 --- /dev/null +++ b/regression-test/suites/doc/table-design/row-store.md.groovy @@ -0,0 +1,48 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.junit.jupiter.api.Assertions; + +suite("docs/table-design/row-store.md") { + try { + sql "DROP TABLE IF EXISTS `tbl_point_query`" + multi_sql """ + CREATE TABLE `tbl_point_query` ( + `key` int(11) NULL, + `v1` decimal(27, 9) NULL, + `v2` varchar(30) NULL, + `v3` varchar(30) NULL, + `v4` date NULL, + `v5` datetime NULL, + `v6` float NULL, + `v7` datev2 NULL + ) ENGINE=OLAP + UNIQUE KEY(`key`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`key`) BUCKETS 1 + PROPERTIES ( + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "row_store_columns" = "key,v1,v3,v5,v7", + "row_store_page_size" = "4096", + "replication_num" = "1" + ); + """ + } catch (Throwable t) { + Assertions.fail("examples in docs/table-design/row-store.md failed to exec, please fix it", t) + } +} diff --git a/regression-test/suites/doc/table-design/schema-change.md.groovy b/regression-test/suites/doc/table-design/schema-change.md.groovy new file mode 100644 index 00000000000..c23fc69f034 --- /dev/null +++ b/regression-test/suites/doc/table-design/schema-change.md.groovy @@ -0,0 +1,211 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.junit.jupiter.api.Assertions; + +suite("docs/table-design/schema-change.md") { + try { + def waitUntilSchemaChangeDone = { tbl -> + waitForSchemaChangeDone({ + sql " SHOW ALTER TABLE COLUMN FROM example_db WHERE TableName='${tbl}' ORDER BY createtime DESC LIMIT 1 " + }) + } + + multi_sql "create database if not exists example_db; use example_db; drop table if exists my_table;" + sql """ + CREATE TABLE IF NOT EXISTS example_db.my_table( + col1 int, + col2 int, + col3 int, + col4 int, + col5 int + ) DUPLICATE KEY(col1, col2, col3) + DISTRIBUTED BY RANDOM BUCKETS 1 + ROLLUP ( + example_rollup_index (col1, col3, col4, col5) + ) + PROPERTIES ( + "replication_num" = "1" + ) + """ + sql """ + ALTER TABLE example_db.my_table + ADD COLUMN new_key_col INT KEY DEFAULT "0" AFTER col1 + TO example_rollup_index + """ + waitUntilSchemaChangeDone("my_table") + sql """ + ALTER TABLE example_db.my_table + ADD COLUMN new_val_col INT DEFAULT "0" AFTER col4 + TO example_rollup_index + """ + waitUntilSchemaChangeDone("my_table") + + sql "drop table if exists example_db.my_table" + sql """ + CREATE TABLE IF NOT EXISTS example_db.my_table( + col1 int, + col2 int, + col3 int, + col4 int SUM, + col5 int MAX + ) AGGREGATE KEY(col1, col2, col3) + DISTRIBUTED BY HASH(col1) BUCKETS 1 + ROLLUP ( + example_rollup_index (col1, col3, col4, col5) + ) + PROPERTIES ( + "replication_num" = "1" + ) + """ + sql """ + ALTER TABLE example_db.my_table + ADD COLUMN new_key_col INT DEFAULT "0" AFTER col1 + TO example_rollup_index + """ + waitUntilSchemaChangeDone("my_table") + sql """ + ALTER TABLE example_db.my_table + ADD COLUMN new_val_col INT SUM DEFAULT "0" AFTER col4 + TO example_rollup_index + """ + waitUntilSchemaChangeDone("my_table") + + sql """ + ALTER TABLE example_db.my_table + ADD COLUMN (c1 INT DEFAULT "1", c2 FLOAT SUM DEFAULT "0") + TO example_rollup_index + """ + waitUntilSchemaChangeDone("my_table") + + sql """ + ALTER TABLE example_db.my_table + DROP COLUMN col3 + FROM example_rollup_index + """ + waitUntilSchemaChangeDone("my_table") + + + sql "drop table if exists example_db.my_table" + sql """ + CREATE TABLE IF NOT EXISTS example_db.my_table( + col0 int, + col1 int DEFAULT "1", + col2 int, + col3 varchar(32), + col4 int SUM, + col5 varchar(32) REPLACE DEFAULT "abc" + ) AGGREGATE KEY(col0, col1, col2, col3) + DISTRIBUTED BY HASH(col0) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ) + """ + sql """ + ALTER TABLE example_db.my_table + MODIFY COLUMN col1 BIGINT KEY DEFAULT "1" AFTER col2 + """ + waitUntilSchemaChangeDone("my_table") + sql """ + ALTER TABLE example_db.my_table + MODIFY COLUMN col5 VARCHAR(64) REPLACE DEFAULT "abc" + """ + waitUntilSchemaChangeDone("my_table") + sql """ + ALTER TABLE example_db.my_table + MODIFY COLUMN col3 varchar(50) KEY NULL comment 'to 50' + """ + waitUntilSchemaChangeDone("my_table") + + sql "drop table if exists my_table" + sql """ + CREATE TABLE IF NOT EXISTS example_db.my_table( + k1 int DEFAULT "1", + k2 int, + k3 varchar(32), + k4 date, + v1 int SUM, + v2 int MAX, + ) AGGREGATE KEY(k1, k2, k3, k4) + DISTRIBUTED BY HASH(k1) BUCKETS 1 + ROLLUP ( + example_rollup_index(k1, k2, k3, v1, v2) + ) + PROPERTIES ( + "replication_num" = "1" + ) + """ + sql """ + ALTER TABLE example_db.my_table + ORDER BY (k3,k1,k2,v2,v1) + FROM example_rollup_index + """ + waitUntilSchemaChangeDone("my_table") + + sql "drop table if exists example_db.tbl1" + sql """ + CREATE TABLE IF NOT EXISTS example_db.tbl1( + k1 int, + k2 int, + k3 int + ) AGGREGATE KEY(k1, k2, k3) + DISTRIBUTED BY HASH(k1) BUCKETS 1 + ROLLUP ( + rollup1 (k1, k2), + rollup2 (k2) + ) + PROPERTIES ( + "replication_num" = "1" + ) + """ + sql """ + ALTER TABLE tbl1 + ADD COLUMN k4 INT default "1" to rollup1, + ADD COLUMN k4 INT default "1" to rollup2, + ADD COLUMN k5 INT default "1" to rollup2 + """ + waitUntilSchemaChangeDone("tbl1") + + sql "drop table if exists example_db.my_table" + sql """ + CREATE TABLE IF NOT EXISTS example_db.my_table( + k1 int DEFAULT "1", + k2 int, + k3 varchar(32), + k4 date, + v1 int SUM, + ) AGGREGATE KEY(k1, k2, k3, k4) + DISTRIBUTED BY HASH(k1) BUCKETS 1 + ROLLUP ( + example_rollup_index(k1, k3, k2, v1) + ) + PROPERTIES ( + "replication_num" = "1" + ) + """ + sql """ + ALTER TABLE example_db.my_table + ADD COLUMN v2 INT MAX DEFAULT "0" TO example_rollup_index, + ORDER BY (k3,k1,k2,v2,v1) FROM example_rollup_index + """ + waitUntilSchemaChangeDone("my_table") + + sql "SHOW ALTER TABLE COLUMN" + } catch (Throwable t) { + Assertions.fail("examples in docs/table-design/schema-change.md failed to exec, please fix it", t) + } +} diff --git a/regression-test/suites/doc/table-design/test_data/test.csv b/regression-test/suites/doc/table-design/test_data/test.csv new file mode 100644 index 00000000000..c34e65603ab --- /dev/null +++ b/regression-test/suites/doc/table-design/test_data/test.csv @@ -0,0 +1,2 @@ +Tom,40 +John,50 --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org