This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 3ef1229635fb51750b9959dc7f6f5caf09fd4d8d Author: zhengyu <freeman.zhang1...@gmail.com> AuthorDate: Thu Jan 11 20:57:13 2024 +0800 [docs](query-accel) refine several statements in docs (#29716) --- docs/en/docs/query-acceleration/nereids.md | 4 +- docs/en/docs/query-acceleration/statistics.md | 8 ++-- docs/zh-CN/docs/query-acceleration/nereids.md | 4 +- .../pipeline-execution-engine.md | 4 +- docs/zh-CN/docs/query-acceleration/statistics.md | 6 +-- docs/zh-CN/docs/releasenotes/release-2.0-Beta.md | 6 +-- docs/zh-CN/docs/releasenotes/release-2.0.0.md | 10 ++--- .../java/org/apache/doris/statistics/README.md | 46 +++++++++++----------- 8 files changed, 44 insertions(+), 44 deletions(-) diff --git a/docs/en/docs/query-acceleration/nereids.md b/docs/en/docs/query-acceleration/nereids.md index 6e1724fc8c7..2e09e307891 100644 --- a/docs/en/docs/query-acceleration/nereids.md +++ b/docs/en/docs/query-acceleration/nereids.md @@ -5,7 +5,7 @@ } --- -<!-- +<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information @@ -66,7 +66,7 @@ Turn on auto fall back to legacy planner SET enable_fallback_to_original_planner=true; ``` -Recommand execute analyze on table before query on it to get the benefits of cbo +Executing analyze on table before query is highly recommended when query performance is critical so that we can fully utilize Nereids's CBO capability. ## Known issues and temporarily unsupported features diff --git a/docs/en/docs/query-acceleration/statistics.md b/docs/en/docs/query-acceleration/statistics.md index a70d6287a3c..667db64e3af 100644 --- a/docs/en/docs/query-acceleration/statistics.md +++ b/docs/en/docs/query-acceleration/statistics.md @@ -5,7 +5,7 @@ } --- -<!-- +<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information @@ -52,7 +52,7 @@ Doris allows users to manually trigger the collection and update of statistics b Syntax: ```SQL -ANALYZE < TABLE | DATABASE table_name | db_name > +ANALYZE < TABLE table_name | DATABASE db_name > [ (column_name [, ...]) ] [ [ WITH SYNC ] [ WITH SAMPLE PERCENT | ROWS ] ]; ``` @@ -146,7 +146,7 @@ mysql> show analyze 245073\G; col_name: [l_returnflag,l_receiptdate,l_tax,l_shipmode,l_suppkey,l_shipdate,l_commitdate,l_partkey,l_orderkey,l_quantity,l_linestatus,l_comment,l_extendedprice,l_linenumber,l_discount,l_shipinstruct] job_type: MANUAL analysis_type: FUNDAMENTALS - message: + message: last_exec_time_in_ms: 2023-11-07 11:00:52 state: FINISHED progress: 16 Finished | 0 Failed | 0 In Progress | 16 Total @@ -326,4 +326,4 @@ Users can use `SHOW BACKENDS\G` to verify the BE (Backend) status. If the BE sta ### 4.2 Failure of ANALYZE on Large Tables -Due to resource limitations, ANALYZE on some large tables may timeout or exceed BE memory limits. In such cases, it is recommended to use `ANALYZE ... WITH SAMPLE...`. +Due to resource limitations, ANALYZE on some large tables may timeout or exceed BE memory limits. In such cases, it is recommended to use `ANALYZE ... WITH SAMPLE...`. diff --git a/docs/zh-CN/docs/query-acceleration/nereids.md b/docs/zh-CN/docs/query-acceleration/nereids.md index 1f0489fa23b..ea120322fdf 100644 --- a/docs/zh-CN/docs/query-acceleration/nereids.md +++ b/docs/zh-CN/docs/query-acceleration/nereids.md @@ -5,7 +5,7 @@ } --- -<!-- +<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information @@ -66,7 +66,7 @@ SET enable_nereids_planner=true; SET enable_fallback_to_original_planner=true; ``` -为了能够充分利用新优化器的CBO能力,强烈建议对关注性能查询所以来的表,执行analyze语句,以收集列统计信息 +为了能够充分利用新优化器的CBO能力,强烈建议对查询延迟敏感的表,执行analyze语句,以收集列统计信息。 ## 已知问题和暂不支持的功能 diff --git a/docs/zh-CN/docs/query-acceleration/pipeline-execution-engine.md b/docs/zh-CN/docs/query-acceleration/pipeline-execution-engine.md index bbbe3e100e4..94e742c208a 100644 --- a/docs/zh-CN/docs/query-acceleration/pipeline-execution-engine.md +++ b/docs/zh-CN/docs/query-acceleration/pipeline-execution-engine.md @@ -7,7 +7,7 @@ } --- -<!-- +<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information @@ -71,7 +71,7 @@ set enable_pipeline_engine = true; #### parallel_pipeline_task_num -`parallel_pipeline_task_num`代表了 SQL 查询进行查询并发的 Pipeline Task 数目。Doris默认的配置为`0`,即CPU核数的一半。用户也可以实际根据自己的实际情况进行调整。 +`parallel_pipeline_task_num`代表了 SQL 查询进行查询并发的 Pipeline Task 数目。Doris默认的配置为`0`,即CPU核数的一半。用户也可以根据自己的实际情况进行调整。 ``` set parallel_pipeline_task_num = 0; diff --git a/docs/zh-CN/docs/query-acceleration/statistics.md b/docs/zh-CN/docs/query-acceleration/statistics.md index 3c478730bd9..882b2cebba0 100644 --- a/docs/zh-CN/docs/query-acceleration/statistics.md +++ b/docs/zh-CN/docs/query-acceleration/statistics.md @@ -5,7 +5,7 @@ } --- -<!-- +<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information @@ -54,7 +54,7 @@ Doris支持用户通过提交ANALYZE语句来手动触发统计信息的收集 语法: ```SQL -ANALYZE < TABLE | DATABASE table_name | db_name > +ANALYZE < TABLE table_name | DATABASE db_name > [ (column_name [, ...]) ] [ [ WITH SYNC ] [ WITH SAMPLE PERCENT | ROWS ] ]; ``` @@ -147,7 +147,7 @@ mysql> show analyze 245073\G; col_name: [l_returnflag,l_receiptdate,l_tax,l_shipmode,l_suppkey,l_shipdate,l_commitdate,l_partkey,l_orderkey,l_quantity,l_linestatus,l_comment,l_extendedprice,l_linenumber,l_discount,l_shipinstruct] job_type: MANUAL analysis_type: FUNDAMENTALS - message: + message: last_exec_time_in_ms: 2023-11-07 11:00:52 state: FINISHED progress: 16 Finished | 0 Failed | 0 In Progress | 16 Total diff --git a/docs/zh-CN/docs/releasenotes/release-2.0-Beta.md b/docs/zh-CN/docs/releasenotes/release-2.0-Beta.md index e54c253b877..9db268c6502 100644 --- a/docs/zh-CN/docs/releasenotes/release-2.0-Beta.md +++ b/docs/zh-CN/docs/releasenotes/release-2.0-Beta.md @@ -27,7 +27,7 @@ under the License. 亲爱的社区小伙伴们,我们很高兴地向大家宣布,Apache Doris 2.0-Beta 版本已于 2023 年 7 月 3 日正式发布!**在 2.0-Beta 版本中有超过 255 位贡献者为 Apache Doris 提交了超过 3500 个优化与修复**,欢迎大家下载使用! > 下载链接:[https://doris.apache.org/download](https://doris.apache.org/download) -> +> > GitHub > 源码:[https://github.com/apache/doris/tree/branch-2.0](https://github.com/apache/doris/tree/branch-2.0) @@ -50,7 +50,7 @@ under the License. 秉持着“**将易用性留给用户、将复杂性留给自己**”的原则,为了克服以上一系列挑战,从理论基础到工程实现、从理想业务场景到极端异常 Case、从内部测试通过到大规模生产可用,我们耗费了更多的时间与精力在功能的开发、验证、持续迭代与精益求精上。值得庆祝的是,在经过近半年的开发、测试与稳定性调优后,Apache Doris 终于迎来了 2.0-Beta 版本的正式发布!而这一版本的成功发布也使得我们的愿景离现实更进一步! - + # 盲测性能 10 倍以上提升! @@ -91,7 +91,7 @@ under the License. 参考文档:[https://doris.apache.org/zh-CN/docs/dev/query-acceleration/pipeline-execution-engine](https://doris.apache.org/zh-CN/docs/dev/query-acceleration/pipeline-execution-engine) -如何开启:` Set enable_pipeline_engine = true `该功能在 Apache Doris 2.0 版本中将默认开启,BE 在进行查询执行时默认将 SQL 的执行模型转变 Pipeline 的执行方式。`parallel_pipeline_task_num`代表了 SQL 查询进行查询并发的 Pipeline Task 数目。Apache Doris 默认配置为`0`,此时 Apache Doris 会自动感知每个 BE 的 CPU 核数并把并发度设置为 CPU 核数的一半,用户也可以实际根据自己的实际情况进行调整。对于从老版本升级的用户,建议用户将该参数设置成老版本中`parallel_fragment_exec_instance_num`的值。 +如何开启:` Set enable_pipeline_engine = true `该功能在 Apache Doris 2.0 版本中将默认开启,BE 在进行查询执行时默认将 SQL 的执行模型转变 Pipeline 的执行方式。`parallel_pipeline_task_num`代表了 SQL 查询进行查询并发的 Pipeline Task 数目。Apache Doris 默认配置为`0`,此时 Apache Doris 会自动感知每个 BE 的 CPU 核数并把并发度设置为 CPU 核数的一半,用户也可以根据自己的实际情况进行调整。对于从老版本升级的用户,建议用户将该参数设置成老版本中`parallel_fragment_exec_instance_num`的值。 # 查询稳定性进一步提升 diff --git a/docs/zh-CN/docs/releasenotes/release-2.0.0.md b/docs/zh-CN/docs/releasenotes/release-2.0.0.md index 1a40bf998c7..0e982adcaa8 100644 --- a/docs/zh-CN/docs/releasenotes/release-2.0.0.md +++ b/docs/zh-CN/docs/releasenotes/release-2.0.0.md @@ -29,9 +29,9 @@ under the License. 在 2.0.0 版本中,Apache Doris 在标准 Benchmark 数据集上盲测查询性能得到超过 10 倍的提升、在日志分析和数据湖联邦分析场景能力得到全面加强、数据更新效率和写入效率都更加高效稳定、支持了更加完善的多租户和资源隔离机制、在资源弹性与存算分离方向踏上了新的台阶、增加了一系列面向企业用户的易用性特性。在经过近半年的开发、测试与稳定性调优后,这一版本已经正式稳定可用,欢迎大家下载使用! > 下载链接:[https://doris.apache.org/download](https://doris.apache.org/download) -> +> > GitHub > 源码:[https://github.com/apache/doris/tree/2.0.0-rc04](https://github.com/apache/doris/tree/2.0.0-rc04) - + # 盲测性能 10 倍以上提升! @@ -73,7 +73,7 @@ under the License. 如何开启:` Set enable_pipeline_engine = true ` - 该功能在 Apache Doris 2.0 版本中将默认开启,BE 在进行查询执行时默认将 SQL 的执行模型转变 Pipeline 的执行方式。 -- `parallel_pipeline_task_num`代表了 SQL 查询进行查询并发的 Pipeline Task 数目。Apache Doris 默认配置为`0`,此时 Apache Doris 会自动感知每个 BE 的 CPU 核数并把并发度设置为 CPU 核数的一半,用户也可以实际根据自己的实际情况进行调整。 +- `parallel_pipeline_task_num`代表了 SQL 查询进行查询并发的 Pipeline Task 数目。Apache Doris 默认配置为`0`,此时 Apache Doris 会自动感知每个 BE 的 CPU 核数并把并发度设置为 CPU 核数的一半,用户也可以根据自己的实际情况进行调整。 - 对于从老版本升级的用户,系统自动将该参数设置成老版本中`parallel_fragment_exec_instance_num`的值。 # 更统一多样的分析场景 @@ -210,7 +210,7 @@ under the License. # 正式踏上 2.0 之旅 -在 Apache Doris 2.0.0 版本发布过程中,我们邀请了数百家企业参与新版本的打磨,力求为所有用户提供性能更佳、稳定性更高、易用性更好的数据分析体验。后续我们将会持续敏捷发版来响应所有用户对功能和稳定性的更高追求,预计 2.0 系列的第一个迭代版本 2.0.1 将于 8 月下旬发布,9 月会进一步发布 2.0.2 版本。在快速 Bugfix 的同时,也会不断将一些最新特性加入到新版本中。9 月份我们还将发布 2.1 版本的尝鲜版本,会增加一系列呼声已久的新能力,包括 Variant 可变数据类型以更好满足半结构化数据 Schema Free 的分析需求,多表物化视图,在导入性能方面持续优化、增加新的更加简洁的数据导入方式,通过自动攒批实现更加实时的数据写入,复合数据类型的嵌套能力等。 +在 Apache Doris 2.0.0 版本发布过程中,我们邀请了数百家企业参与新版本的打磨,力求为所有用户提供性能更佳、稳定性更高、易用性更好的数据分析体验。后续我们将会持续敏捷发版来响应所有用户对功能和稳定性的更高追求,预计 2.0 系列的第一个迭代版本 2.0.1 将于 8 月下旬发布,9 月会进一步发布 2.0.2 版本。在快速 Bugfix 的同时,也会不断将一些最新特性加入到新版本中。9 月份我们还将发布 2.1 版本的尝鲜版本,会增加一系列呼声已久的新能力,包括 Variant 可变数据类型以更好满足半结构化数据 Schema Free 的分析需求,多表物化视图,在导入性能方面持续优化、增加新的更加简洁的数据导入方式,通过自动攒批实现更加实时的数据写入,复合数据类型的嵌套能力等。 期待 Apache Doris 2.0 版本的正式发布为更多社区用户提供实时统一的分析体验,我们也相信 Apache Doris 2.0 版本会成为您在实时分析场景中的最理想选择。 @@ -218,4 +218,4 @@ under the License. 再次向所有参与 Apache Doris 2.0.0 版本开发和测试的贡献者们表示最衷心的感谢,他们分别是: -0xflotus、1330571、15767714253、924060929、ArmandoZ、AshinGau、BBB-source、BePPPower、Bears0haunt、BiteTheDDDDt、ByteYue、Cai-Yao、CalvinKirs、Centurybbx、ChaseHuangxu、CodeCooker17、DarvenDua、Dazhuwei、DongLiang-0、EvanTheBoy、FreeOnePlus、Gabriel39、GoGoWen、HHoflittlefish777、HackToday、HappenLee、Henry2SS、HonestManXin、JNSimba、JackDrogon、Jake-00、Jenson97Jibing-Li、Johnnyssc、JoverZhang、KassieZ、Kikyou1997、Larborator、Lchangliang、LemonLiTree、LiBinfeng-01、MRYOG、Mellorsssss、Moonm3n、Mryange、Myasuka、NetShrimp06、Remini [...] \ No newline at end of file +0xflotus、1330571、15767714253、924060929、ArmandoZ、AshinGau、BBB-source、BePPPower、Bears0haunt、BiteTheDDDDt、ByteYue、Cai-Yao、CalvinKirs、Centurybbx、ChaseHuangxu、CodeCooker17、DarvenDua、Dazhuwei、DongLiang-0、EvanTheBoy、FreeOnePlus、Gabriel39、GoGoWen、HHoflittlefish777、HackToday、HappenLee、Henry2SS、HonestManXin、JNSimba、JackDrogon、Jake-00、Jenson97Jibing-Li、Johnnyssc、JoverZhang、KassieZ、Kikyou1997、Larborator、Lchangliang、LemonLiTree、LiBinfeng-01、MRYOG、Mellorsssss、Moonm3n、Mryange、Myasuka、NetShrimp06、Remini [...] diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/README.md b/fe/fe-core/src/main/java/org/apache/doris/statistics/README.md index ef9340b2817..f9ae316611a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/README.md +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/README.md @@ -44,8 +44,8 @@ There may be compatibility issues if there are changes to the schema of the stat |---|---| |AnalyzeStmt|Constructed by parsing user-input SQL, each AnalyzeStmt corresponds to a Job, and a Job can have multiple Tasks, with each Task responsible for collecting statistics information on a column.| |AnalysisManager|Mainly responsible for managing Analyze Jobs/Tasks, including creation, execution, cancellation, and status updates, etc.| -|StatisticCache|The collected statistical information is cached here on demand.| -|StatisticCacheLoader|When `StatsCalculator#computeScan` fails to find the corresponding stats for a column in the cache, the load logic will be triggered, which is implemented in this class.| +|StatisticsCache|The collected statistical information is cached here on demand.| +|StatisticsCacheLoader|When `StatsCalculator#computeScan` fails to find the corresponding stats for a column in the cache, the load logic will be triggered, which is implemented in this class.| |AnalysisTaskExecutor|Used to excute AnalyzeJob| |AnalysisTaskWrapper|This class encapsulates an `AnalysisTask` and extends `FutureTask`. It overrides some methods for state updates.| |AnalysisTaskScheduler|AnalysisTaskExecutor retrieves jobs from here for execution. Manually submitted jobs always have higher priority than automatically triggered ones.| @@ -72,7 +72,7 @@ else is async task AnalysisTaskExecutor->>ThreadPoolExecutor: submit(AnalysisTaskWrapper) ThreadPoolExecutor->>AnalysisTaskWrapper: run AnalysisTaskWrapper->>BE: collect && write - AnalysisTaskWrapper->>StatisticCache: refresh + AnalysisTaskWrapper->>StatisticsCache: refresh AnalysisTaskWrapper->>AnalysisManager: updateTaskStatus alt is all task finished AnalysisManager->> StatisticsUtil: execUpdate mark job finished @@ -85,25 +85,25 @@ end ```mermaid sequenceDiagram -StatsCalculator->>StatisticCache: get +StatsCalculator->>StatisticsCache: get alt is cached - StatisticCache->>StatsCalculator: return cached stats + StatisticsCache->>StatsCalculator: return cached stats else not cached - StatisticCache->>StatsCalculator: return UNKNOWN stats - StatisticCache->>ThreadPoolExecutor: submit load task + StatisticsCache->>StatsCalculator: return UNKNOWN stats + StatisticsCache->>ThreadPoolExecutor: submit load task ThreadPoolExecutor->>AsyncTask: get AsyncTask->>StatisticsUtil: execStatisticQuery alt exception occurred: - AsyncTask->>StatisticCache: return UNKNOWN stats - StatisticCache->> StatisticCache: cache UNKNOWN for the column + AsyncTask->>StatisticsCache: return UNKNOWN stats + StatisticsCache->> StatisticsCache: cache UNKNOWN for the column else no exception: StatisticsUtil->>AsyncTask: Return results rows AsyncTask->>StatisticsUtil: deserializeToColumnStatistics(result rows) alt exception occurred: - AsyncTask->>StatisticCache: return UNKNOWN stats - StatisticCache->> StatisticCache: cache UNKNOWN for the column + AsyncTask->>StatisticsCache: return UNKNOWN stats + StatisticsCache->> StatisticsCache: cache UNKNOWN for the column else no exception: - StatisticCache->> StatisticCache: cache normal stats + StatisticsCache->> StatisticsCache: cache normal stats end end @@ -116,7 +116,7 @@ end # Test -The regression tests now mainly cover the following. +The regression tests now mainly cover the following. - Analyze stats: mainly to verify the `ANALYZE` statement and its related characteristics, because some functions are affected by other factors (such as system metadata reporting time), may show instability, so this part is placed in p1. - Manage stats: mainly used to verify the injection, deletion, display and other related operations of statistical information. @@ -132,21 +132,21 @@ p0 tests: p1 tests: 1. Universal analysis -2. Sampled analysis -3. Incremental analysis -4. Automatic analysis +2. Sampled analysis +3. Incremental analysis +4. Automatic analysis 5. Periodic analysis ## Manage stats p0 tests: -1. Alter table stats -2. Show table stats -3. Alter column stats -4. Show column stats -5. Show column histogram -6. Drop column stats +1. Alter table stats +2. Show table stats +3. Alter column stats +4. Show column stats +5. Show column histogram +6. Drop column stats 7. Drop expired stats For the modification of the statistics module, all the above cases should be guaranteed to pass! @@ -155,4 +155,4 @@ For the modification of the statistics module, all the above cases should be gua 20230508: 1. Add table level statistics, support `SHOW TABLE STATS` statement to show table level statistics. -2. Implement automatically analyze statistics, support `ANALYZE... WITH AUTO ...` statement to automatically analyze statistics. \ No newline at end of file +2. Implement automatically analyze statistics, support `ANALYZE... WITH AUTO ...` statement to automatically analyze statistics. --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org