This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris-website.git
The following commit(s) were added to refs/heads/master by this push: new 0806fd45467 add docs for row store (#1032) 0806fd45467 is described below commit 0806fd45467c03ad6425c7b5bb92c6e393a309b9 Author: Kang <kxiao.ti...@gmail.com> AuthorDate: Sun Sep 1 23:30:13 2024 +0800 add docs for row store (#1032) --- docs/table-design/row-store.md | 80 +++++++++++++++++++++ .../current/table-design/row-store.md | 82 ++++++++++++++++++++++ .../version-2.0/table-design/row-store.md | 66 +++++++++++++++++ .../version-2.1/table-design/row-store.md | 82 ++++++++++++++++++++++ .../version-3.0/table-design/row-store.md | 82 ++++++++++++++++++++++ sidebars.json | 1 + .../version-2.0/table-design/row-store.md | 66 +++++++++++++++++ .../version-2.1/table-design/row-store.md | 80 +++++++++++++++++++++ .../version-3.0/table-design/row-store.md | 80 +++++++++++++++++++++ versioned_sidebars/version-2.0-sidebars.json | 1 + versioned_sidebars/version-2.1-sidebars.json | 1 + versioned_sidebars/version-3.0-sidebars.json | 1 + 12 files changed, 622 insertions(+) diff --git a/docs/table-design/row-store.md b/docs/table-design/row-store.md new file mode 100644 index 00000000000..39fb3b6ec19 --- /dev/null +++ b/docs/table-design/row-store.md @@ -0,0 +1,80 @@ +--- +{ + "title": "Hybrid Storage", + "language": "en" +} +--- + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +# Hybrid Storage + +Doris defaults to columnar storage, where each column is stored contiguously. Columnar storage offers excellent performance for analytical scenarios (such as aggregation, filtering, sorting, etc.), as it only reads the necessary columns, reducing unnecessary IO. However, in point query scenarios (such as `SELECT *`), all columns need to be read, requiring an IO operation for each column, which can lead to IOPS becoming a bottleneck, especially for wide tables with many columns (e.g., hun [...] + +To address the IOPS bottleneck in point query scenarios, starting from version 2.0.0, Doris supports hybrid storage. When users create tables, they can specify whether to enable row storage. With row storage enabled, each row only requires one IO operation for point queries (such as `SELECT *`), significantly improving performance. + +## Syntax + +When creating a table, specify whether to enable row storage, which columns to enable row storage for, and the storage compression unit size page_size in the table's PROPERTIES. + +1. Whether to enable row storage: defaults to false (not enabled). +``` +"store_row_column" = "true" +``` + +2. Which columns to enable row storage for: if 1 is enabled, all columns are enabled by default. If you need to specify that only some columns are enabled for row storage, set the row_store_columns parameter, formatted as a comma-separated list of column names. +``` +"row_store_columns" = "column1,column2,column3" +``` + +3. Row storage page_size: defaults to 16KB. +``` +"row_store_page_size" = "16384" +``` + +The page is the smallest unit of storage read/write operations, and page_size is the size of the row storage page. This means that reading one row also requires generating an IO for a page. The larger the value, the better the compression effect and the lower the storage space usage, but the higher the IO overhead for point queries (since one IO reads at least one page), and vice versa. The smaller the value, the higher the storage space, the better the point query performance. The defau [...] + +## Example + +The example below creates an 8-column table, where "key,v1,v3,v5,v7" are the 5 columns enabled for row storage. To optimize for high-concurrency point query performance, the page_size is configured to 4KB. + +``` +CREATE TABLE `tbl_point_query` ( + `key` int(11) NULL, + `v1` decimal(27, 9) NULL, + `v2` varchar(30) NULL, + `v3` varchar(30) NULL, + `v4` date NULL, + `v5` datetime NULL, + `v6` float NULL, + `v7` datev2 NULL +) ENGINE=OLAP +UNIQUE KEY(`key`) +COMMENT 'OLAP' +DISTRIBUTED BY HASH(`key`) BUCKETS 1 +PROPERTIES ( + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "row_store_columns" = "key,v1,v3,v5,v7", + "row_store_page_size" = "4096" +); +``` + +For more information on point query usage, please refer to [High-Concurrent Point Query](../query/high-concurrent-point-query). diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/current/table-design/row-store.md b/i18n/zh-CN/docusaurus-plugin-content-docs/current/table-design/row-store.md new file mode 100644 index 00000000000..6db3a09ad6d --- /dev/null +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/current/table-design/row-store.md @@ -0,0 +1,82 @@ +--- +{ + "title": "行列混存", + "language": "zh-CN" +} +--- + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +# 行列混存 + +Doris 默认采用列式存储,每个列连续存储,在分析场景(如聚合,过滤,排序等)有很好的性能,因为只需要读取所需要的列减少不必要的 IO 。但是在点查场景(比如 `SELECT *`),需要读取所有列,每个列都需要一次 IO 导致 IOPS 成为瓶颈,特别对列多的宽表(比如上百列)尤为明显。 + +为了解决点查场景 IOPS 的瓶颈问题,Doris 2.0.0 版本开始支持行列混存,用户建表时指定开启行存后,点查(比如 `SELECT *`)每一行只需要一次 IO,性能有数量级提升。 + + +## 使用语法 + +建表时在表的 PROPERTIES 中指定是否开启行存,哪些列开启行存,行存的存储压缩单元大小 page_size。 + +1. 是否开启行存:默认为 false 不开启 +``` +"store_row_column" = "true" +``` + +2. 哪些列开启行存:如果1 开启,默认所有列开启行存,如果需要指定部分列开启行存,设置 row_store_columns 参数,格式为逗号分割的列名 +``` +"row_store_columns" = "column1,column2,column3" +``` + +3. 行存 page_size:默认为 16KB。 +``` +"row_store_page_size" = "16384" +``` + +page 是存储读写的最小单元,page_size 是行存 page 的大小,也就是说读一行也需要产生一个 page 的IO。这个值越大压缩效果越好存储空间占用越低,但是点查时IO开销越大性能越低(因为一次IO至少读一个page),反过来值越小存储空间约高,点查性能越好。默认值16KB是大多数情况下比较均衡的选择,如果更偏向查询性能可以配置较小的值比如 4KB 甚至更低,如果更偏向存储空间可以配置较大的值比如 64KB 甚至更高。 + + +## 使用实例 + +下面的例子创建一个 8 列的表,其中 "key,v1,v3,v5,v7" 这 5 列开启行存,为了高并发点查性能配置 page_size 为 4KB。 + +``` +CREATE TABLE `tbl_point_query` ( + `key` int(11) NULL, + `v1` decimal(27, 9) NULL, + `v2` varchar(30) NULL, + `v3` varchar(30) NULL, + `v4` date NULL, + `v5` datetime NULL, + `v6` float NULL, + `v7` datev2 NULL +) ENGINE=OLAP +UNIQUE KEY(`key`) +COMMENT 'OLAP' +DISTRIBUTED BY HASH(`key`) BUCKETS 1 +PROPERTIES ( + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "row_store_columns" = "key,v1,v3,v5,v7", + "row_store_page_size" = "4096" +); +``` + +更多点查的使用请参考 [高并发点查](../query/high-concurrent-point-query) 。 diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.0/table-design/row-store.md b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.0/table-design/row-store.md new file mode 100644 index 00000000000..9dc07066675 --- /dev/null +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.0/table-design/row-store.md @@ -0,0 +1,66 @@ +--- +{ + "title": "行列混存", + "language": "zh-CN" +} +--- + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +# 行列混存 + +Doris 默认采用列式存储,每个列连续存储,在分析场景(如聚合,过滤,排序等)有很好的性能,因为只需要读取所需要的列减少不必要的 IO 。但是在点查场景(比如 `SELECT *`),需要读取所有列,每个列都需要一次 IO 导致 IOPS 成为瓶颈,特别对列多的宽表(比如上百列)尤为明显。 + +为了解决点查场景 IOPS 的瓶颈问题,Doris 2.0.0 版本开始支持行列混存,用户建表时指定开启行存后,点查(比如 `SELECT *`)每一行只需要一次 IO,性能有数量级提升。 + + +## 使用语法 + +建表时在表的 PROPERTIES 中指定是否开启行存,默认为 false 不开启 +``` +"store_row_column" = "true" +``` + +## 使用实例 + +下面的例子创建一个 8 列的表,开启行存。 + +``` +CREATE TABLE `tbl_point_query` ( + `key` int(11) NULL, + `v1` decimal(27, 9) NULL, + `v2` varchar(30) NULL, + `v3` varchar(30) NULL, + `v4` date NULL, + `v5` datetime NULL, + `v6` float NULL, + `v7` datev2 NULL +) ENGINE=OLAP +UNIQUE KEY(`key`) +COMMENT 'OLAP' +DISTRIBUTED BY HASH(`key`) BUCKETS 1 +PROPERTIES ( + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "store_row_column" = "true" +); +``` + +更多点查的使用请参考 [高并发点查](../query/high-concurrent-point-query) 。 diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/table-design/row-store.md b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/table-design/row-store.md new file mode 100644 index 00000000000..6db3a09ad6d --- /dev/null +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/table-design/row-store.md @@ -0,0 +1,82 @@ +--- +{ + "title": "行列混存", + "language": "zh-CN" +} +--- + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +# 行列混存 + +Doris 默认采用列式存储,每个列连续存储,在分析场景(如聚合,过滤,排序等)有很好的性能,因为只需要读取所需要的列减少不必要的 IO 。但是在点查场景(比如 `SELECT *`),需要读取所有列,每个列都需要一次 IO 导致 IOPS 成为瓶颈,特别对列多的宽表(比如上百列)尤为明显。 + +为了解决点查场景 IOPS 的瓶颈问题,Doris 2.0.0 版本开始支持行列混存,用户建表时指定开启行存后,点查(比如 `SELECT *`)每一行只需要一次 IO,性能有数量级提升。 + + +## 使用语法 + +建表时在表的 PROPERTIES 中指定是否开启行存,哪些列开启行存,行存的存储压缩单元大小 page_size。 + +1. 是否开启行存:默认为 false 不开启 +``` +"store_row_column" = "true" +``` + +2. 哪些列开启行存:如果1 开启,默认所有列开启行存,如果需要指定部分列开启行存,设置 row_store_columns 参数,格式为逗号分割的列名 +``` +"row_store_columns" = "column1,column2,column3" +``` + +3. 行存 page_size:默认为 16KB。 +``` +"row_store_page_size" = "16384" +``` + +page 是存储读写的最小单元,page_size 是行存 page 的大小,也就是说读一行也需要产生一个 page 的IO。这个值越大压缩效果越好存储空间占用越低,但是点查时IO开销越大性能越低(因为一次IO至少读一个page),反过来值越小存储空间约高,点查性能越好。默认值16KB是大多数情况下比较均衡的选择,如果更偏向查询性能可以配置较小的值比如 4KB 甚至更低,如果更偏向存储空间可以配置较大的值比如 64KB 甚至更高。 + + +## 使用实例 + +下面的例子创建一个 8 列的表,其中 "key,v1,v3,v5,v7" 这 5 列开启行存,为了高并发点查性能配置 page_size 为 4KB。 + +``` +CREATE TABLE `tbl_point_query` ( + `key` int(11) NULL, + `v1` decimal(27, 9) NULL, + `v2` varchar(30) NULL, + `v3` varchar(30) NULL, + `v4` date NULL, + `v5` datetime NULL, + `v6` float NULL, + `v7` datev2 NULL +) ENGINE=OLAP +UNIQUE KEY(`key`) +COMMENT 'OLAP' +DISTRIBUTED BY HASH(`key`) BUCKETS 1 +PROPERTIES ( + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "row_store_columns" = "key,v1,v3,v5,v7", + "row_store_page_size" = "4096" +); +``` + +更多点查的使用请参考 [高并发点查](../query/high-concurrent-point-query) 。 diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/version-3.0/table-design/row-store.md b/i18n/zh-CN/docusaurus-plugin-content-docs/version-3.0/table-design/row-store.md new file mode 100644 index 00000000000..6db3a09ad6d --- /dev/null +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-3.0/table-design/row-store.md @@ -0,0 +1,82 @@ +--- +{ + "title": "行列混存", + "language": "zh-CN" +} +--- + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +# 行列混存 + +Doris 默认采用列式存储,每个列连续存储,在分析场景(如聚合,过滤,排序等)有很好的性能,因为只需要读取所需要的列减少不必要的 IO 。但是在点查场景(比如 `SELECT *`),需要读取所有列,每个列都需要一次 IO 导致 IOPS 成为瓶颈,特别对列多的宽表(比如上百列)尤为明显。 + +为了解决点查场景 IOPS 的瓶颈问题,Doris 2.0.0 版本开始支持行列混存,用户建表时指定开启行存后,点查(比如 `SELECT *`)每一行只需要一次 IO,性能有数量级提升。 + + +## 使用语法 + +建表时在表的 PROPERTIES 中指定是否开启行存,哪些列开启行存,行存的存储压缩单元大小 page_size。 + +1. 是否开启行存:默认为 false 不开启 +``` +"store_row_column" = "true" +``` + +2. 哪些列开启行存:如果1 开启,默认所有列开启行存,如果需要指定部分列开启行存,设置 row_store_columns 参数,格式为逗号分割的列名 +``` +"row_store_columns" = "column1,column2,column3" +``` + +3. 行存 page_size:默认为 16KB。 +``` +"row_store_page_size" = "16384" +``` + +page 是存储读写的最小单元,page_size 是行存 page 的大小,也就是说读一行也需要产生一个 page 的IO。这个值越大压缩效果越好存储空间占用越低,但是点查时IO开销越大性能越低(因为一次IO至少读一个page),反过来值越小存储空间约高,点查性能越好。默认值16KB是大多数情况下比较均衡的选择,如果更偏向查询性能可以配置较小的值比如 4KB 甚至更低,如果更偏向存储空间可以配置较大的值比如 64KB 甚至更高。 + + +## 使用实例 + +下面的例子创建一个 8 列的表,其中 "key,v1,v3,v5,v7" 这 5 列开启行存,为了高并发点查性能配置 page_size 为 4KB。 + +``` +CREATE TABLE `tbl_point_query` ( + `key` int(11) NULL, + `v1` decimal(27, 9) NULL, + `v2` varchar(30) NULL, + `v3` varchar(30) NULL, + `v4` date NULL, + `v5` datetime NULL, + `v6` float NULL, + `v7` datev2 NULL +) ENGINE=OLAP +UNIQUE KEY(`key`) +COMMENT 'OLAP' +DISTRIBUTED BY HASH(`key`) BUCKETS 1 +PROPERTIES ( + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "row_store_columns" = "key,v1,v3,v5,v7", + "row_store_page_size" = "4096" +); +``` + +更多点查的使用请参考 [高并发点查](../query/high-concurrent-point-query) 。 diff --git a/sidebars.json b/sidebars.json index 9ac06de4256..24893274295 100644 --- a/sidebars.json +++ b/sidebars.json @@ -109,6 +109,7 @@ "table-design/data-model/tips" ] }, + "table-design/row-store", "table-design/data-partition", "table-design/schema-change", { diff --git a/versioned_docs/version-2.0/table-design/row-store.md b/versioned_docs/version-2.0/table-design/row-store.md new file mode 100644 index 00000000000..33719d20661 --- /dev/null +++ b/versioned_docs/version-2.0/table-design/row-store.md @@ -0,0 +1,66 @@ +--- +{ + "title": "Hybrid Storage", + "language": "en" +} +--- + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +# Hybrid Storage + +Doris defaults to columnar storage, where each column is stored contiguously. Columnar storage offers excellent performance for analytical scenarios (such as aggregation, filtering, sorting, etc.), as it only reads the necessary columns, reducing unnecessary IO. However, in point query scenarios (such as `SELECT *`), all columns need to be read, requiring an IO operation for each column, which can lead to IOPS becoming a bottleneck, especially for wide tables with many columns (e.g., hun [...] + +To address the IOPS bottleneck in point query scenarios, starting from version 2.0.0, Doris supports hybrid storage. When users create tables, they can specify whether to enable row storage. With row storage enabled, each row only requires one IO operation for point queries (such as `SELECT *`), significantly improving performance. + +## Syntax + +When creating a table, specify whether to enable row storage, defaults to false (not enabled). +``` +"store_row_column" = "true" +``` + + +## Example + +The example below creates an 8-column table, where row storage is enabled. + +``` +CREATE TABLE `tbl_point_query` ( + `key` int(11) NULL, + `v1` decimal(27, 9) NULL, + `v2` varchar(30) NULL, + `v3` varchar(30) NULL, + `v4` date NULL, + `v5` datetime NULL, + `v6` float NULL, + `v7` datev2 NULL +) ENGINE=OLAP +UNIQUE KEY(`key`) +COMMENT 'OLAP' +DISTRIBUTED BY HASH(`key`) BUCKETS 1 +PROPERTIES ( + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "store_row_column" = "true" +); +``` + +For more information on point query usage, please refer to [High-Concurrent Point Query](../query/high-concurrent-point-query). diff --git a/versioned_docs/version-2.1/table-design/row-store.md b/versioned_docs/version-2.1/table-design/row-store.md new file mode 100644 index 00000000000..39fb3b6ec19 --- /dev/null +++ b/versioned_docs/version-2.1/table-design/row-store.md @@ -0,0 +1,80 @@ +--- +{ + "title": "Hybrid Storage", + "language": "en" +} +--- + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +# Hybrid Storage + +Doris defaults to columnar storage, where each column is stored contiguously. Columnar storage offers excellent performance for analytical scenarios (such as aggregation, filtering, sorting, etc.), as it only reads the necessary columns, reducing unnecessary IO. However, in point query scenarios (such as `SELECT *`), all columns need to be read, requiring an IO operation for each column, which can lead to IOPS becoming a bottleneck, especially for wide tables with many columns (e.g., hun [...] + +To address the IOPS bottleneck in point query scenarios, starting from version 2.0.0, Doris supports hybrid storage. When users create tables, they can specify whether to enable row storage. With row storage enabled, each row only requires one IO operation for point queries (such as `SELECT *`), significantly improving performance. + +## Syntax + +When creating a table, specify whether to enable row storage, which columns to enable row storage for, and the storage compression unit size page_size in the table's PROPERTIES. + +1. Whether to enable row storage: defaults to false (not enabled). +``` +"store_row_column" = "true" +``` + +2. Which columns to enable row storage for: if 1 is enabled, all columns are enabled by default. If you need to specify that only some columns are enabled for row storage, set the row_store_columns parameter, formatted as a comma-separated list of column names. +``` +"row_store_columns" = "column1,column2,column3" +``` + +3. Row storage page_size: defaults to 16KB. +``` +"row_store_page_size" = "16384" +``` + +The page is the smallest unit of storage read/write operations, and page_size is the size of the row storage page. This means that reading one row also requires generating an IO for a page. The larger the value, the better the compression effect and the lower the storage space usage, but the higher the IO overhead for point queries (since one IO reads at least one page), and vice versa. The smaller the value, the higher the storage space, the better the point query performance. The defau [...] + +## Example + +The example below creates an 8-column table, where "key,v1,v3,v5,v7" are the 5 columns enabled for row storage. To optimize for high-concurrency point query performance, the page_size is configured to 4KB. + +``` +CREATE TABLE `tbl_point_query` ( + `key` int(11) NULL, + `v1` decimal(27, 9) NULL, + `v2` varchar(30) NULL, + `v3` varchar(30) NULL, + `v4` date NULL, + `v5` datetime NULL, + `v6` float NULL, + `v7` datev2 NULL +) ENGINE=OLAP +UNIQUE KEY(`key`) +COMMENT 'OLAP' +DISTRIBUTED BY HASH(`key`) BUCKETS 1 +PROPERTIES ( + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "row_store_columns" = "key,v1,v3,v5,v7", + "row_store_page_size" = "4096" +); +``` + +For more information on point query usage, please refer to [High-Concurrent Point Query](../query/high-concurrent-point-query). diff --git a/versioned_docs/version-3.0/table-design/row-store.md b/versioned_docs/version-3.0/table-design/row-store.md new file mode 100644 index 00000000000..39fb3b6ec19 --- /dev/null +++ b/versioned_docs/version-3.0/table-design/row-store.md @@ -0,0 +1,80 @@ +--- +{ + "title": "Hybrid Storage", + "language": "en" +} +--- + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +# Hybrid Storage + +Doris defaults to columnar storage, where each column is stored contiguously. Columnar storage offers excellent performance for analytical scenarios (such as aggregation, filtering, sorting, etc.), as it only reads the necessary columns, reducing unnecessary IO. However, in point query scenarios (such as `SELECT *`), all columns need to be read, requiring an IO operation for each column, which can lead to IOPS becoming a bottleneck, especially for wide tables with many columns (e.g., hun [...] + +To address the IOPS bottleneck in point query scenarios, starting from version 2.0.0, Doris supports hybrid storage. When users create tables, they can specify whether to enable row storage. With row storage enabled, each row only requires one IO operation for point queries (such as `SELECT *`), significantly improving performance. + +## Syntax + +When creating a table, specify whether to enable row storage, which columns to enable row storage for, and the storage compression unit size page_size in the table's PROPERTIES. + +1. Whether to enable row storage: defaults to false (not enabled). +``` +"store_row_column" = "true" +``` + +2. Which columns to enable row storage for: if 1 is enabled, all columns are enabled by default. If you need to specify that only some columns are enabled for row storage, set the row_store_columns parameter, formatted as a comma-separated list of column names. +``` +"row_store_columns" = "column1,column2,column3" +``` + +3. Row storage page_size: defaults to 16KB. +``` +"row_store_page_size" = "16384" +``` + +The page is the smallest unit of storage read/write operations, and page_size is the size of the row storage page. This means that reading one row also requires generating an IO for a page. The larger the value, the better the compression effect and the lower the storage space usage, but the higher the IO overhead for point queries (since one IO reads at least one page), and vice versa. The smaller the value, the higher the storage space, the better the point query performance. The defau [...] + +## Example + +The example below creates an 8-column table, where "key,v1,v3,v5,v7" are the 5 columns enabled for row storage. To optimize for high-concurrency point query performance, the page_size is configured to 4KB. + +``` +CREATE TABLE `tbl_point_query` ( + `key` int(11) NULL, + `v1` decimal(27, 9) NULL, + `v2` varchar(30) NULL, + `v3` varchar(30) NULL, + `v4` date NULL, + `v5` datetime NULL, + `v6` float NULL, + `v7` datev2 NULL +) ENGINE=OLAP +UNIQUE KEY(`key`) +COMMENT 'OLAP' +DISTRIBUTED BY HASH(`key`) BUCKETS 1 +PROPERTIES ( + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "row_store_columns" = "key,v1,v3,v5,v7", + "row_store_page_size" = "4096" +); +``` + +For more information on point query usage, please refer to [High-Concurrent Point Query](../query/high-concurrent-point-query). diff --git a/versioned_sidebars/version-2.0-sidebars.json b/versioned_sidebars/version-2.0-sidebars.json index 0f74ff5fd5f..8c3d0cfea75 100644 --- a/versioned_sidebars/version-2.0-sidebars.json +++ b/versioned_sidebars/version-2.0-sidebars.json @@ -65,6 +65,7 @@ "table-design/data-model/tips" ] }, + "table-design/row-store", "table-design/data-partition", "table-design/schema-change", "table-design/cold-hot-separation", diff --git a/versioned_sidebars/version-2.1-sidebars.json b/versioned_sidebars/version-2.1-sidebars.json index ca0b069114c..eca419f6f31 100644 --- a/versioned_sidebars/version-2.1-sidebars.json +++ b/versioned_sidebars/version-2.1-sidebars.json @@ -66,6 +66,7 @@ "table-design/data-model/tips" ] }, + "table-design/row-store", "table-design/data-partition", "table-design/schema-change", "table-design/cold-hot-separation", diff --git a/versioned_sidebars/version-3.0-sidebars.json b/versioned_sidebars/version-3.0-sidebars.json index 424ad09d360..adacb33ac35 100644 --- a/versioned_sidebars/version-3.0-sidebars.json +++ b/versioned_sidebars/version-3.0-sidebars.json @@ -97,6 +97,7 @@ "table-design/data-model/tips" ] }, + "table-design/row-store", "table-design/data-partition", "table-design/schema-change", "table-design/cold-hot-separation", --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org