This is an automated email from the ASF dual-hosted git repository. jiafengzheng pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new db83f37256 [doc](typo)Add doc sidebars (#11749) db83f37256 is described below commit db83f37256683b3ad077294aa7aedf7e73c5437f Author: jiafeng.zhang <zhang...@gmail.com> AuthorDate: Sat Aug 13 20:46:52 2022 +0800 [doc](typo)Add doc sidebars (#11749) add doc sidebars --- docs/dev.json | 282 ++++++ .../design/Flink-doris-connector-Design.md | 259 ------ .../design/spark_load.md} | 6 +- docs/en/docs/advanced/alter-table/replace-table.md | 72 ++ .../join-optimization/doris-join-optimization.md | 107 ++- docs/en/docs/data-table/data-model.md | 2 +- docs/en/docs/ecosystem/cloudcanal.md | 4 +- .../ecosystem/doris-manager/cluster-managenent.md | 26 +- docs/en/docs/ecosystem/flink-doris-connector.md | 30 +- docs/sidebars.json | 972 +++++++++++++++++++++ docs/sidebarsCommunity.json | 68 ++ .../design/flink_doris_connector_design.md | 272 ------ .../docs/advanced/alter-table/replace-table.md | 71 ++ docs/zh-CN/docs/summary/system-architecture.md | 29 - 14 files changed, 1542 insertions(+), 658 deletions(-) diff --git a/docs/dev.json b/docs/dev.json new file mode 100644 index 0000000000..730292f519 --- /dev/null +++ b/docs/dev.json @@ -0,0 +1,282 @@ +{ + "version.label": { + "message": "1.1", + "description": "The label for version current" + }, + "sidebar.docs.category.Getting Started": { + "message": "快速开始", + "description": "The label for category Getting Started in sidebar docs" + }, + "sidebar.docs.category.Doris Introduction": { + "message": "Doris 介绍", + "description": "The label for category Doris Architecture in sidebar docs" + }, + "sidebar.docs.category.Install And Deploy": { + "message": "安装部署", + "description": "The label for category Install And Deploy in sidebar docs" + }, + "sidebar.docs.category.Compile": { + "message": "源码编译", + "description": "The label for category Compile in sidebar docs" + }, + "sidebar.docs.category.Table Design": { + "message": "数据表设计", + "description": "The label for category Table Design in sidebar docs" + }, + "sidebar.docs.category.Index": { + "message": "索引", + "description": "The label for category Index in sidebar docs" + }, + "sidebar.docs.category.Data Operation": { + "message": "数据操作", + "description": "The label for category Data Operation in sidebar docs" + }, + "sidebar.docs.category.Import": { + "message": "数据导入", + "description": "The label for category Import in sidebar docs" + }, + "sidebar.docs.category.Import Scenes": { + "message": "按场景导入", + "description": "The label for category Import Scenes in sidebar docs" + }, + "sidebar.docs.category.Import Way": { + "message": "按方式导入", + "description": "The label for category Import Way in sidebar docs" + }, + "sidebar.docs.category.Export": { + "message": "导出", + "description": "The label for category Export in sidebar docs" + }, + "sidebar.docs.category.Update and Delete": { + "message": "数据更新及删除", + "description": "The label for category Update and Delete in sidebar docs" + }, + "sidebar.docs.category.Advanced Usage": { + "message": "进阶使用", + "description": "The label for category Advanced Usage in sidebar docs" + }, + "sidebar.docs.category.Alter Table": { + "message": "表结构变更", + "description": "The label for category Alter Table in sidebar docs" + }, + "sidebar.docs.category.Doris Partition": { + "message": "Doris 表分区", + "description": "The label for category Doris Partition in sidebar docs" + }, + "sidebar.docs.category.Join Optimization": { + "message": "Join 优化", + "description": "The label for category Join Optimization in sidebar docs" + }, + "sidebar.docs.category.Date Cache": { + "message": "数据缓存", + "description": "The label for category Date Cache in sidebar docs" + }, + "sidebar.docs.category.Best Practice": { + "message": "最佳实践", + "description": "The label for category Best Practice in sidebar docs" + }, + "sidebar.docs.category.Ecosystem": { + "message": "生态扩展", + "description": "The label for category Ecosystem in sidebar docs" + }, + "sidebar.docs.category.Expansion table": { + "message": "扩展表", + "description": "The label for category Expansion table in sidebar docs" + }, + "sidebar.docs.category.Doris Manager": { + "message": "Doris Manager", + "description": "The label for category Doris Manager in sidebar docs" + }, + "sidebar.docs.category.SeaTunnel": { + "message": "SeaTunnel", + "description": "The label for category SeaTunnel in sidebar docs" + }, + "sidebar.docs.category.UDFA": { + "message": "UDFA", + "description": "The label for category UDFA in sidebar docs" + }, + "sidebar.docs.category.UDF": { + "message": "自定义函数", + "description": "The label for category UDF in sidebar docs" + }, + "sidebar.docs.category.SQL Manual": { + "message": "SQL 手册", + "description": "The label for category SQL Manual in sidebar docs" + }, + "sidebar.docs.category.SQL Functions": { + "message": "SQL 函数", + "description": "The label for category SQL Functions in sidebar docs" + }, + "sidebar.docs.category.Array Functions": { + "message": "数组函数", + "description": "The label for category Array Functions in sidebar docs" + }, + "sidebar.docs.category.Date Functions": { + "message": "日期函数", + "description": "The label for category Date Functions in sidebar docs" + }, + "sidebar.docs.category.GIS Functions": { + "message": "地理位置函数", + "description": "The label for category GIS Functions in sidebar docs" + }, + "sidebar.docs.category.String Functions": { + "message": "字符串函数", + "description": "The label for category String Functions in sidebar docs" + }, + "sidebar.docs.category.Fuzzy Match": { + "message": "模糊匹配", + "description": "The label for category Fuzzy Match in sidebar docs" + }, + "sidebar.docs.category.Regular Match": { + "message": "正则匹配", + "description": "The label for category Regular Match in sidebar docs" + }, + "sidebar.docs.category.Aggregate Functions": { + "message": "聚合函数", + "description": "The label for category Aggregate Functions in sidebar docs" + }, + "sidebar.docs.category.Bitmap Functions": { + "message": "Bitmap 函数", + "description": "The label for category Bitmap Functions in sidebar docs" + }, + "sidebar.docs.category.Bitwise Functions": { + "message": "Bitwise 函数", + "description": "The label for category Bitwise Functions in sidebar docs" + }, + "sidebar.docs.category.Condition Functions": { + "message": "条件函数", + "description": "The label for category Condition Functions in sidebar docs" + }, + "sidebar.docs.category.JSON Functions": { + "message": "JSON 函数", + "description": "The label for category JSON Functions in sidebar docs" + }, + "sidebar.docs.category.Hash Functions": { + "message": "Hash 函数", + "description": "The label for category Hash Functions in sidebar docs" + }, + "sidebar.docs.category.Math Functions": { + "message": "数学函数", + "description": "The label for category Math Functions in sidebar docs" + }, + "sidebar.docs.category.Encryption Functions": { + "message": "加密和信息摘要函数", + "description": "The label for category Encryption Functions in sidebar docs" + }, + "sidebar.docs.category.Table Functions": { + "message": "表函数", + "description": "The label for category Table Functions in sidebar docs" + }, + "sidebar.docs.category.Analytic(Window) Functions": { + "message": "分析(窗口)函数", + "description": "The label for category Analytic(Window) Functions in sidebar docs" + }, + "sidebar.docs.category.SQL Reference": { + "message": "SQL 手册", + "description": "The label for category SQL Reference in sidebar docs" + }, + "sidebar.docs.category.Account Management": { + "message": "账户管理", + "description": "The label for category Account Management in sidebar docs" + }, + "sidebar.docs.category.Cluster management": { + "message": "集群管理", + "description": "The label for category Cluster management in sidebar docs" + }, + "sidebar.docs.category.DDL": { + "message": "DDL", + "description": "The label for category DDL in sidebar docs" + }, + "sidebar.docs.category.Alter": { + "message": "Alter", + "description": "The label for category Alter in sidebar docs" + }, + "sidebar.docs.category.Backup and Restore": { + "message": "备份及恢复", + "description": "The label for category Backup and Restore in sidebar docs" + }, + "sidebar.docs.category.Create": { + "message": "Create", + "description": "The label for category Create in sidebar docs" + }, + "sidebar.docs.category.Drop": { + "message": "Drop", + "description": "The label for category Drop in sidebar docs" + }, + "sidebar.docs.category.DML": { + "message": "DML", + "description": "The label for category DML in sidebar docs" + }, + "sidebar.docs.category.Load": { + "message": "Load", + "description": "The label for category Load in sidebar docs" + }, + "sidebar.docs.category.Manipulation": { + "message": "操作", + "description": "The label for category Manipulation in sidebar docs" + }, + "sidebar.docs.category.Database Administration": { + "message": "数据库管理", + "description": "The label for category Database Administration in sidebar docs" + }, + "sidebar.docs.category.Show": { + "message": "Show", + "description": "The label for category Show in sidebar docs" + }, + "sidebar.docs.category.Data Types": { + "message": "数据类型", + "description": "The label for category Data Types in sidebar docs" + }, + "sidebar.docs.category.Utility": { + "message": "辅助命令", + "description": "The label for category Utility in sidebar docs" + }, + "sidebar.docs.category.Admin Manual": { + "message": "管理手册", + "description": "The label for category Admin Manual in sidebar docs" + }, + "sidebar.docs.category.cluster management": { + "message": "集群管理", + "description": "The label for category cluster management in sidebar docs" + }, + "sidebar.docs.category.Data Admin": { + "message": "数据管理", + "description": "The label for category Data Admin in sidebar docs" + }, + "sidebar.docs.category.Maintenance and Monitor": { + "message": "运维监控", + "description": "The label for category Maintenance and Monitor in sidebar docs" + }, + "sidebar.docs.category.Monitor Metrics": { + "message": "监控指标", + "description": "The label for category Monitor Metrics in sidebar docs" + }, + "sidebar.docs.category.Config": { + "message": "配置管理", + "description": "The label for category Config in sidebar docs" + }, + "sidebar.docs.category.User Privilege and Ldap": { + "message": "用户权限及认证", + "description": "The label for category User Privilege and Ldap in sidebar docs" + }, + "sidebar.docs.category.HTTP API": { + "message": "HTTP API", + "description": "The label for category HTTP API in sidebar docs" + }, + "sidebar.docs.category.FE": { + "message": "FE", + "description": "The label for category FE in sidebar docs" + }, + "sidebar.docs.category.MANAGER": { + "message": "MANAGER", + "description": "The label for category MANAGER in sidebar docs" + }, + "sidebar.docs.category.FAQ": { + "message": "常见问题", + "description": "The label for category FAQ in sidebar docs" + }, + "sidebar.docs.category.Benchmark": { + "message": "性能测试", + "description": "The label for category Benchmark in sidebar docs" + } +} diff --git a/docs/en/community/design/Flink-doris-connector-Design.md b/docs/en/community/design/Flink-doris-connector-Design.md deleted file mode 100644 index 1eb6b336e1..0000000000 --- a/docs/en/community/design/Flink-doris-connector-Design.md +++ /dev/null @@ -1,259 +0,0 @@ ---- -{ - "title": "Flink doris connector Design", - "language": "en" -} - - ---- - -<!-- -Licensed to the Apache Software Foundation (ASF) under one -or more contributor license agreements. See the NOTICE file -distributed with this work for additional information -regarding copyright ownership. The ASF licenses this file -to you under the Apache License, Version 2.0 (the -"License"); you may not use this file except in compliance -with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, -software distributed under the License is distributed on an -"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -KIND, either express or implied. See the License for the -specific language governing permissions and limitations -under - -# Flink doris connector Design - - - -First of all, thanks to the author of the community Spark Doris Connector - -From the perspective of Doris, by introducing its data into Flink, Flink can use a series of rich ecological products, which broadens the imagination of the product and also makes it possible to query Doris and other data sources jointly. - -Starting from our business architecture and business needs, we chose Flink as part of our architecture, the ETL and real-time computing framework for data. The community currently supports Spark doris connector, so we designed and developed Flink doris Connector with reference to Spark doris connector. - -##Technical Choice - -When the model was originally selected, it was the same as the Spark Doris connector, so we started to consider the JDBC method, but, as described in the Spark doris connector article, this method has advantages, but the disadvantages are more obvious. Later, we read and tested the Spark code and decided to implement it on the shoulders of giants (note: copy the code and modify it directly). - -The following content is from the Spark Doris Connector blog, directly copied - -``` -Therefore, we developed a new data source Spark-Doris-Connector for Doris. Under this scheme, Doris can publish Doris data and distribute it to Spark. The Spark driver accesses Doris's FE to obtain the Doris table architecture and basic data distribution. After that, according to this data distribution, the data query task is reasonably allocated to the executors. Finally, Spark's execution program accesses different BEs for querying. Greatly improve query efficiency -``` - -## 1. Instructions - -Compile and generate doris-flink-1.0.0-SNAPSHOT.jar in the extension/flink-doris-connector/ directory of the Doris code base, add this jar package to the ClausPath of flink, and then you can use Flink-on -Doris function - -## 2. how to use - -Compile and generate doris-flink-1.0.0-SNAPSHOT.jar in the extension/flink-doris-connector/ directory of the Doris code library, add this jar package to the ClassPath of flink, and then use the Flink-on-Doris function - -#### 2.1 SQL way - -Support function: - -1. Supports reading data in Doris data warehouse tables through Flink SQL to Flink for calculations -2. Support inserting data into the corresponding table of the data warehouse through Flink SQL. The back-end implementation is to communicate directly with BE through Stream Load to complete the data insertion operation -3. You can use Flink to associate non-Doris external data source tables for association analysis - -example: - - - -```java - final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); - env.setParallelism(1); - final StreamTableEnvironment tEnv = StreamTableEnvironment.create(env); - tEnv.executeSql( - "CREATE TABLE test_aggregation01 (" + - "user_id STRING," + - "user_city STRING," + - "age INT," + - "last_visit_date STRING" + - ") " + - "WITH (\n" + - " 'connector' = 'doris',\n" + - " 'fenodes' = 'doris01:8030',\n" + - " 'table.identifier' = 'demo.test_aggregation',\n" + - " 'username' = 'root',\n" + - " 'password' = ''\n" + - ")"); - tEnv.executeSql( - "CREATE TABLE test_aggregation02 (" + - "user_id STRING," + - "user_city STRING," + - "age INT," + - "last_visit_date STRING" + - ") " + - "WITH (\n" + - " 'connector' = 'doris',\n" + - " 'fenodes' = 'doris01:8030',\n" + - " 'table.identifier' = 'demo.test_aggregation_01',\n" + - " 'username' = 'root',\n" + - " 'password' = ''\n" + - ")"); - - tEnv.executeSql("INSERT INTO test_aggregation02 select * from test_aggregation01"); - tEnv.executeSql("select count(1) from test_aggregation01"); -``` - -#### 2.2 DataStream way: - -```java -DorisOptions.Builder options = DorisOptions.builder() - .setFenodes("$YOUR_DORIS_FE_HOSTNAME:$YOUR_DORIS_FE_RESFUL_PORT") - .setUsername("$YOUR_DORIS_USERNAME") - .setPassword("$YOUR_DORIS_PASSWORD") - .setTableIdentifier("$YOUR_DORIS_DATABASE_NAME.$YOUR_DORIS_TABLE_NAME"); -env.addSource(new DorisSourceFunction<>(options.build(),new SimpleListDeserializationSchema())).print(); -``` - -## 3. Applicable scene - -![1616987965864](/images/Flink-doris-connector.png) - -#### 3.1. Use Flink to perform joint analysis on data in Doris and other data sources - -Many business departments place their data on different storage systems, such as some online analysis and report data in Doris, some structured retrieval data in Elasticsearch, and some data used for transaction processing in MySQL, and so on. It is often necessary to analyze the business across multiple storage sources. After connecting Flink and Doris through the Flink Doris connector, companies can directly use Flink to perform joint query calculations on the data in Doris and multipl [...] - -#### 3.2 Real-time data access - -Before Flink Doris Connector: For business irregular data, it is usually necessary to perform standardized processing on messages, and write null value filtering into new topics, and then start regular loading to write Doris. - -![1616988281677](/images/Flink-doris-connector1.png) - -After Flink Doris Connector: flink reads kafka and writes doris directly. - -![1616988514873](/images/Flink-doris-connector2.png) - -## 4.Technical realization - -### 4.1 Architecture diagram - -![1616997396610](/images/Flink-doris-connector-architecture.png) - -### 4.2 Doris provides more external capabilities - -#### 4.2.1 Doris FE - -The interface for obtaining metadata information of internal tables, single-table query planning and some statistical information has been opened to the outside world. - -All Rest API interfaces require HttpBasic authentication. The user name and password are the user name and password for logging in to the database. Pay attention to the correct assignment of permissions. - -``` -// Get table schema meta information -GET api/{database}/{table}/_schema - -// Get the query plan template for a single table -POST api/{database}/{table}/_query_plan -{ -"sql": "select k1, k2 from {database}.{table}" -} - -// Get the table size -GET api/{database}/{table}/_count -``` - -#### 4.2.2 Doris BE - -Through the Thrift protocol, data filtering, scanning and cropping capabilities are directly provided to the outside world. - -``` -service TDorisExternalService { - // Initialize the query executor -TScanOpenResult open_scanner(1: TScanOpenParams params); - -// Streaming batch to get data, Apache Arrow data format - TScanBatchResult get_next(1: TScanNextBatchParams params); - -// end scan - TScanCloseResult close_scanner(1: TScanCloseParams params); -} -``` - -For definitions of Thrift related structures, please refer to: - -https://github.com/apache/incubator-doris/blob/master/gensrc/thrift/DorisExternalService.thrift - -### 4.3 Implement DataStream - -Inherit org.apache.flink.streaming.api.functions.source.RichSourceFunction and customize DorisSourceFunction. During initialization, get the execution plan of the related table and get the corresponding partition. - -Rewrite the run method to read data from the partition in a loop. - -``` -public void run(SourceContext sourceContext){ - //Cycle through the partitions - for(PartitionDefinition partitions : dorisPartitions){ - scalaValueReader = new ScalaValueReader(partitions, settings); - while (scalaValueReader.hasNext()){ - Object next = scalaValueReader.next(); - sourceContext.collect(next); - } - } -} -``` - -### 4.4 Implement Flink SQL on Doris - -Refer to [Flink Custom Source&Sink](https://ci.apache.org/projects/flink/flink-docs-stable/zh/dev/table/sourceSinks.md) and Flink-jdbc-connector to implement the following As a result, Flink SQL can be used to directly manipulate Doris tables, including reading and writing. - -#### 4.4.1 Implementation details - -1. Realize DynamicTableSourceFactory and DynamicTableSinkFactory register doris connector -2. Customize DynamicTableSource and DynamicTableSink to generate logical plans -3. After DorisRowDataInputFormat and DorisDynamicOutputFormat obtain the logical plan, start execution - -![1616747472136](/images/table_connectors.svg) - -The most important implementation is DorisRowDataInputFormat and DorisDynamicOutputFormat customized based on RichInputFormat and RichOutputFormat. - -In DorisRowDataInputFormat, the obtained dorisPartitions are divided into multiple shards in createInputSplits for parallel computing. - -```java -public DorisTableInputSplit[] createInputSplits(int minNumSplits) { - List<DorisTableInputSplit> dorisSplits = new ArrayList<>(); - int splitNum = 0; - for (PartitionDefinition partition : dorisPartitions) { - dorisSplits.add(new DorisTableInputSplit(splitNum++,partition)); - } - return dorisSplits.toArray(new DorisTableInputSplit[0]); -} - -public RowData nextRecord(RowData reuse) { - if (!hasNext) { - //After reading the data, return null - return null; - } - List next = (List)scalaValueReader.next(); - GenericRowData genericRowData = new GenericRowData(next.size()); - for(int i =0;i<next.size();i++){ - genericRowData.setField(i, next.get(i)); - } - //Determine if there is still data - hasNext = scalaValueReader.hasNext(); - return genericRowData; -} -``` - -In DorisRowDataOutputFormat, write data to doris through streamload. Refer to org.apache.doris.plugin.audit.DorisStreamLoader for streamload program - -```java -public void writeRecord(RowData row) throws IOException { - //streamload Default delimiter \t - StringJoiner value = new StringJoiner("\t"); - GenericRowData rowData = (GenericRowData) row; - for(int i = 0; i < row.getArity(); ++i) { - value.add(rowData.getField(i).toString()); - } - //streamload write data - DorisStreamLoad.LoadResponse loadResponse = dorisStreamLoad.loadBatch(value.toString()); - System.out.println(loadResponse); -} -``` - diff --git a/docs/en/docs/summary/system-architecture.md b/docs/en/community/design/spark_load.md similarity index 91% rename from docs/en/docs/summary/system-architecture.md rename to docs/en/community/design/spark_load.md index f7178ee8de..9e630f4393 100644 --- a/docs/en/docs/summary/system-architecture.md +++ b/docs/en/community/design/spark_load.md @@ -1,6 +1,6 @@ --- { - "title": "System Architecture", + "title": "Spark Load", "language": "en" } --- @@ -24,6 +24,6 @@ specific language governing permissions and limitations under the License. --> -# Doris system architecture +# Spark load -(TODO) \ No newline at end of file +TODO diff --git a/docs/en/docs/advanced/alter-table/replace-table.md b/docs/en/docs/advanced/alter-table/replace-table.md new file mode 100644 index 0000000000..13ad8d5cc4 --- /dev/null +++ b/docs/en/docs/advanced/alter-table/replace-table.md @@ -0,0 +1,72 @@ +--- +{ + "title": "Replace Table", + "language": "en" +} +--- + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +# Replace Table + +In version 0.14, Doris supports atomic replacement of two tables. +This operation only applies to OLAP tables. + +For partition level replacement operations, please refer to [Temporary Partition Document](../partition/table-temp-partition.md) + +## Syntax + +``` +ALTER TABLE [db.]tbl1 REPLACE WITH tbl2 +[PROPERTIES('swap' = 'true')]; +``` + +Replace table `tbl1` with table `tbl2`. + +If the `swap` parameter is `true`, after replacement, the data in the table named `tbl1` is the data in the original `tbl2` table. The data in the table named `tbl2` is the data in the original table `tbl1`. That is, the data of the two tables are interchanged. + +If the `swap` parameter is `false`, after replacement, the data in the table named `tbl1` is the data in the original `tbl2` table. The table named `tbl2` is dropped. + +## Principle + +The replacement table function actually turns the following set of operations into an atomic operation. + +Suppose you want to replace table A with table B, and `swap` is `true`, the operation is as follows: + +1. Rename table B to table A. +2. Rename table A to table B. + +If `swap` is `false`, the operation is as follows: + +1. Drop table A. +2. Rename table B to table A. + +## Notice + +1. The `swap` parameter defaults to `true`. That is, the replacement table operation is equivalent to the exchange of two table data. +2. If the `swap` parameter is set to `false`, the replaced table (table A) will be dropped and cannot be recovered. +3. The replacement operation can only occur between two OLAP tables, and the table structure of the two tables is not checked for consistency. +4. The replacement operation will not change the original permission settings. Because the permission check is based on the table name. + +## Best Practices + +1. Atomic Overwrite Operation + + In some cases, the user wants to be able to rewrite the data of a certain table, but if it is dropped and then imported, there will be a period of time in which the data cannot be viewed. At this time, the user can first use the `CREATE TABLE LIKE` statement to create a new table with the same structure, import the new data into the new table, and replace the old table atomically through the replacement operation to achieve the goal. For partition level atomic overwrite operation, pl [...] diff --git a/docs/en/docs/advanced/join-optimization/doris-join-optimization.md b/docs/en/docs/advanced/join-optimization/doris-join-optimization.md index 188842179a..057975f7a7 100644 --- a/docs/en/docs/advanced/join-optimization/doris-join-optimization.md +++ b/docs/en/docs/advanced/join-optimization/doris-join-optimization.md @@ -1,13 +1,8 @@ --- -{ - "title": "Doris Join optimization principle", - "language": "en" -} - - +{ 'title': 'Doris Join optimization principle', 'language': 'en' } --- -<!-- +<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information @@ -30,63 +25,61 @@ under the License. Doris supports two physical operators, one is **Hash Join**, and the other is **Nest Loop Join**. -- Hash Join: Create a hash table on the right table based on the equivalent join column, and the left table uses the hash table to perform join calculations in a streaming manner. Its limitation is that it can only be applied to equivalent joins. -- Nest Loop Join: With two for loops, it is very intuitive. Then it is applicable to unequal-valued joins, such as: greater than or less than or the need to find a Cartesian product. It is a general join operator, but has poor performance. +- Hash Join: Create a hash table on the right table based on the equivalent join column, and the left table uses the hash table to perform join calculations in a streaming manner. Its limitation is that it can only be applied to equivalent joins. +- Nest Loop Join: With two for loops, it is very intuitive. Then it is applicable to unequal-valued joins, such as: greater than or less than or the need to find a Cartesian product. It is a general join operator, but has poor performance. As a distributed MPP database, data shuffle needs to be performed during the Join process. Data needs to be split and scheduled to ensure that the final Join result is correct. As a simple example, assume that the relationship S and R are joined, and N represents the number of nodes participating in the join calculation; T represents the number of tuples in the relationship. - - ## Doris Shuffle way 1. Doris supports 4 Shuffle methods - 1. BroadCast Join + 1. BroadCast Join - It requires the full data of the right table to be sent to the left table, that is, each node participating in Join has the full data of the right table, that is, T(R). + It requires the full data of the right table to be sent to the left table, that is, each node participating in Join has the full data of the right table, that is, T(R). - Its applicable scenarios are more general, and it can support Hash Join and Nest loop Join at the same time, and its network overhead is N * T(R). + Its applicable scenarios are more general, and it can support Hash Join and Nest loop Join at the same time, and its network overhead is N \* T(R). - ![image-20220523152004731](/images/join/image-20220523152004731.png) + ![image-20220523152004731](/images/join/image-20220523152004731.png) - The data in the left table is not moved, and the data in the right table is sent to the scanning node of the data in the left table. + The data in the left table is not moved, and the data in the right table is sent to the scanning node of the data in the left table. 2. Shuffle Join - When Hash Join is performed, the corresponding Hash value can be calculated through the Join column, and Hash bucketing can be performed. + When Hash Join is performed, the corresponding Hash value can be calculated through the Join column, and Hash bucketing can be performed. - Its network overhead is: T(R) + T(N), but it can only support Hash Join, because it also calculates buckets according to the conditions of Join. + Its network overhead is: T(R) + T(N), but it can only support Hash Join, because it also calculates buckets according to the conditions of Join. - ![image-20220523151902368](/images/join/image-20220523151902368.png) + ![image-20220523151902368](/images/join/image-20220523151902368.png) - The left and right table data are sent to different partition nodes according to the partition, and the calculated demerits are sent. + The left and right table data are sent to different partition nodes according to the partition, and the calculated demerits are sent. 3. Bucket Shuffle Join - Doris's table data itself is bucketed by Hash calculation, so you can use the properties of the bucketed columns of the table itself to shuffle the Join data. If two tables need to be joined, and the Join column is the bucket column of the left table, then the data in the left table can actually be calculated by sending the data into the buckets of the left table without moving the data in the right table. + Doris's table data itself is bucketed by Hash calculation, so you can use the properties of the bucketed columns of the table itself to shuffle the Join data. If two tables need to be joined, and the Join column is the bucket column of the left table, then the data in the left table can actually be calculated by sending the data into the buckets of the left table without moving the data in the right table. - Its network overhead is: T(R) is equivalent to only Shuffle the data in the right table. + Its network overhead is: T(R) is equivalent to only Shuffle the data in the right table. - ![image-20220523151653562](/images/join/image-20220523151653562.png) + ![image-20220523151653562](/images/join/image-20220523151653562.png) - The data in the left table does not move, and the data in the right table is sent to the node that scans the table in the left table according to the result of the partition calculation. + The data in the left table does not move, and the data in the right table is sent to the node that scans the table in the left table according to the result of the partition calculation. -4. Colocation +4. Colocation - It is similar to Bucket Shuffle Join, which means that the data has been shuffled according to the preset Join column scenario when data is imported. Then the join calculation can be performed directly without considering the Shuffle problem of the data during the actual query. + It is similar to Bucket Shuffle Join, which means that the data has been shuffled according to the preset Join column scenario when data is imported. Then the join calculation can be performed directly without considering the Shuffle problem of the data during the actual query. - ![image-20220523151619754](/images/join/image-20220523151619754.png) + ![image-20220523151619754](/images/join/image-20220523151619754.png) - The data has been pre-partitioned, and the Join calculation is performed directly locally + The data has been pre-partitioned, and the Join calculation is performed directly locally ### Comparison of four Shuffle methods -| Shuffle Mode | Network Overhead | Physical Operators | Applicable Scenarios | -| -------------- | ------------- | ------------ ---- | --------------------------------------------- --------------- | -| BroadCast | N * T(R) | Hash Join / Nest Loop Join | Universal | -| Shuffle | T(S) + T(R) | Hash Join | General | -| Bucket Shuffle | T(R) | Hash Join | There are distributed columns in the left table in the join condition, and the left table is executed as a single partition | -| Colocate | 0 | Hash Join | There are distributed columns in the left table in the join condition, and the left and right tables belong to the same Colocate Group | +| Shuffle Mode | Network Overhead | Physical Operators | Applicable Scenarios | +| -------------- | ---------------- | -------------------------- | -------------------------------------------------------------------------------------------------------------------------------------- | +| BroadCast | N \* T(R) | Hash Join / Nest Loop Join | Universal | +| Shuffle | T(S) + T(R) | Hash Join | General | +| Bucket Shuffle | T(R) | Hash Join | There are distributed columns in the left table in the join condition, and the left table is executed as a single partition | +| Colocate | 0 | Hash Join | There are distributed columns in the left table in the join condition, and the left and right tables belong to the same Colocate Group | N : The number of Instances participating in the Join calculation @@ -102,14 +95,14 @@ If the left table is a large table and the right table is a small table, then us Currently Doris supports three types of RuntimeFilter -- One is IN-IN, which is well understood, and pushes a hashset down to the data scanning node. -- The second is BloomFilter, which uses the data of the hash table to construct a BloomFilter, and then pushes the BloomFilter down to the scanning node that queries the data. . -- The last one is MinMax, which is a Range range. After the Range range is determined by the data in the right table, it is pushed down to the data scanning node. +- One is IN-IN, which is well understood, and pushes a hashset down to the data scanning node. +- The second is BloomFilter, which uses the data of the hash table to construct a BloomFilter, and then pushes the BloomFilter down to the scanning node that queries the data. . +- The last one is MinMax, which is a Range range. After the Range range is determined by the data in the right table, it is pushed down to the data scanning node. There are two requirements for the applicable scenarios of Runtime Filter: -- The first requirement is that the right table is large and the left table is small, because building a Runtime Filter needs to bear the computational cost, including some memory overhead. -- The second requirement is that there are few results from the join of the left and right tables, indicating that this join can filter out most of the data in the left table. +- The first requirement is that the right table is large and the left table is small, because building a Runtime Filter needs to bear the computational cost, including some memory overhead. +- The second requirement is that there are few results from the join of the left and right tables, indicating that this join can filter out most of the data in the left table. When the above two conditions are met, turning on the Runtime Filter can achieve better results @@ -119,10 +112,10 @@ Delayed materialization is simply like this: if you need to scan three columns A ### Runtime Filter Type -- Doris provides three different Runtime Filter types: - - The advantage of **IN** is that the effect filtering effect is obvious and fast. Its shortcomings are: First, it only applies to BroadCast. Second, when the right table exceeds a certain amount of data, it will fail. The current Doris configuration is 1024, that is, if the right table is larger than 1024, the Runtime Filter of IN will directly failed. - - The advantage of **MinMax** is that the overhead is relatively small. Its disadvantage is that it has a relatively good effect on numeric columns, but basically no effect on non-numeric columns. - - The feature of **Bloom Filter** is that it is universal, suitable for various types, and the effect is better. The disadvantage is that its configuration is more complicated and the calculation is high. +- Doris provides three different Runtime Filter types: + - The advantage of **IN** is that the effect filtering effect is obvious and fast. Its shortcomings are: First, it only applies to BroadCast. Second, when the right table exceeds a certain amount of data, it will fail. The current Doris configuration is 1024, that is, if the right table is larger than 1024, the Runtime Filter of IN will directly failed. + - The advantage of **MinMax** is that the overhead is relatively small. Its disadvantage is that it has a relatively good effect on numeric columns, but basically no effect on non-numeric columns. + - The feature of **Bloom Filter** is that it is universal, suitable for various types, and the effect is better. The disadvantage is that its configuration is more complicated and the calculation is high. ## Join Reader @@ -132,19 +125,19 @@ Next, look at the picture on the right and adjust the order of Join. Join the a ![image-20220523152639123](/images/join/image-20220523152639123.png) -- Doris currently supports the rule-based Join Reorder algorithm. Its logic is: - - Make joins with large tables and small tables as much as possible, and the intermediate results it generates are as small as possible. - - Put the conditional join table forward, that is to say, try to filter the conditional join table - - Hash Join has higher priority than Nest Loop Join, because Hash Join itself is much faster than Nest Loop Join. +- Doris currently supports the rule-based Join Reorder algorithm. Its logic is: + - Make joins with large tables and small tables as much as possible, and the intermediate results it generates are as small as possible. + - Put the conditional join table forward, that is to say, try to filter the conditional join table + - Hash Join has higher priority than Nest Loop Join, because Hash Join itself is much faster than Nest Loop Join. ## Doris Join optimization method Doris Join tuning method: -- Use the Profile provided by Doris itself to locate the bottleneck of the query. Profile records various information in Doris' entire query, which is first-hand information for performance tuning. . -- Understand the Join mechanism of Doris, which is also the content shared with you in the second part. Only by knowing why and understanding its mechanism can we analyze why it is slow. -- Use Session variables to change some behaviors of Join, so as to realize the tuning of Join. -- Check the Query Plan to analyze whether this tuning is effective. +- Use the Profile provided by Doris itself to locate the bottleneck of the query. Profile records various information in Doris' entire query, which is first-hand information for performance tuning. . +- Understand the Join mechanism of Doris, which is also the content shared with you in the second part. Only by knowing why and understanding its mechanism can we analyze why it is slow. +- Use Session variables to change some behaviors of Join, so as to realize the tuning of Join. +- Check the Query Plan to analyze whether this tuning is effective. The above 4 steps basically complete a standard Join tuning process, and then it is to actually query and verify it to see what the effect is. @@ -203,7 +196,7 @@ where This Join query is very simple, a simple join of left and right tables. Of course, there are some filter conditions on it. When I opened the Profile, I found that the entire query Hash Join was executed for more than three minutes. It is a BroadCast Join, and its right table has 200 million entries, while the left table has only 700,000. In this case, it is unreasonable to choose Broadcast Join, which is equivalent to making a Hash Table of 200 million records, and then traversing the Ha [...] -![image-20220523154712519](/images/image-20220523154712519.png) +![image-20220523154712519](/images/join/image-20220523154712519.png) Why is there an unreasonable Join order? In fact, the left table is a large table with a level of 1 billion records. Two filter conditions are added to it. After adding these two filter conditions, there are 700,000 records of 1 billion records. But Doris currently does not have a good framework for collecting statistics, so it does not know what the filtering rate of this filter condition is. Therefore, when the join order is arranged, the wrong left and right table order of the join is [...] @@ -215,8 +208,8 @@ The following figure is an SQL statement after the rewrite is completed. A Join Finally, we summarize four suggestions for optimization and tuning of Doris Join: -- The first point: When doing Join, try to select columns of the same type or simple type. If the same type is used, reduce its data cast, and the simple type itself joins the calculation quickly. -- The second point: try to choose the Key column for Join. The reason is also introduced in the Runtime Filter. The Key column can play a better effect on delayed materialization. -- The third point: Join between large tables, try to make it Co-location, because the network overhead between large tables is very large, if you need to do Shuffle, the cost is very high. -- Fourth point: Use Runtime Filter reasonably, which is very effective in scenarios with high join filtering rate. But it is not a panacea, but has certain side effects, so it needs to be switched according to the granularity of specific SQL. -- Finally: When it comes to multi-table Join, it is necessary to judge the rationality of Join. Try to ensure that the left table is a large table and the right table is a small table, and then Hash Join will be better than Nest Loop Join. If necessary, you can use SQL Rewrite to adjust the order of Join using Hint. +- The first point: When doing Join, try to select columns of the same type or simple type. If the same type is used, reduce its data cast, and the simple type itself joins the calculation quickly. +- The second point: try to choose the Key column for Join. The reason is also introduced in the Runtime Filter. The Key column can play a better effect on delayed materialization. +- The third point: Join between large tables, try to make it Co-location, because the network overhead between large tables is very large, if you need to do Shuffle, the cost is very high. +- Fourth point: Use Runtime Filter reasonably, which is very effective in scenarios with high join filtering rate. But it is not a panacea, but has certain side effects, so it needs to be switched according to the granularity of specific SQL. +- Finally: When it comes to multi-table Join, it is necessary to judge the rationality of Join. Try to ensure that the left table is a large table and the right table is a small table, and then Hash Join will be better than Nest Loop Join. If necessary, you can use SQL Rewrite to adjust the order of Join using Hint. diff --git a/docs/en/docs/data-table/data-model.md b/docs/en/docs/data-table/data-model.md index c1f2084084..8d80110323 100644 --- a/docs/en/docs/data-table/data-model.md +++ b/docs/en/docs/data-table/data-model.md @@ -1,6 +1,6 @@ --- { - "title": "Data Model, ROLLUP and Prefix Index", + "title": "Data Model", "language": "en" } --- diff --git a/docs/en/docs/ecosystem/cloudcanal.md b/docs/en/docs/ecosystem/cloudcanal.md index 9c587eca45..3fb7518057 100644 --- a/docs/en/docs/ecosystem/cloudcanal.md +++ b/docs/en/docs/ecosystem/cloudcanal.md @@ -1,7 +1,7 @@ --- { "title": "CloudCanal Data Import", - "language": "zh-CN" + "language": "en" } --- @@ -30,6 +30,6 @@ under the License. CloudCanal Community Edition is a free data migration and synchronization platform published by [ClouGence Company](https://www.clougence.com) that integrates structure migration, full data migration/check/correction, and incremental real-time synchronization. Product contains complete Its productization capabilities help enterprises break data silos, complete data integration and interoperability, and make better use of data. -![image.png](../../../.vuepress/public/images/cloudcanal/cloudcanal-1.jpg) +![image.png](/images/cloudcanal/cloudcanal-1.jpg) > There is no English version of this document, please switch to the Chinese > version. diff --git a/docs/en/docs/ecosystem/doris-manager/cluster-managenent.md b/docs/en/docs/ecosystem/doris-manager/cluster-managenent.md index 5315294005..138061d0dd 100644 --- a/docs/en/docs/ecosystem/doris-manager/cluster-managenent.md +++ b/docs/en/docs/ecosystem/doris-manager/cluster-managenent.md @@ -1,11 +1,8 @@ --- -{ - "title": "Cluster management", - "language": "en" -} +{ 'title': 'Cluster management', 'language': 'en' } --- -<!-- +<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information @@ -28,9 +25,9 @@ under the License. The super administrator and space administrator can mainly perform the following operations under the cluster module: -- View cluster overview -- View node list -- Edit parameter configuration +- View cluster overview +- View node list +- Edit parameter configuration ## Cluster overview @@ -40,7 +37,7 @@ Cluster function, showing a cluster-based monitoring panel. On the home page, click "Cluster" in the navigation bar to enter the cluster function. -![](/images/doris-manager/iclustermanager-1.png) +![](/images/doris-manager/clustermanagenent-1.png) The operation and maintenance monitoring panel provides various performance monitoring indicators of the cluster for users to gain insight into the cluster status. Users can control the start and stop operations of the cluster through buttons in the upper right corner. @@ -53,17 +50,16 @@ Users can view disk usage through pie charts, and view the number of databases, Displays information about FE nodes, BE nodes, and brokers in the cluster. Provides fields including Node ID, Node Type, Host IP, and Node Status. -![](/images/doris-manager/iclustermanager-2.png) +![](/images/doris-manager/clustermanagenent-2.png) ## Parameter configuration Parameter configuration provides parameter name, parameter type, parameter value type, thermal effect and operation fields. -![](/images/doris-manager/iclustermanager-3.png) +![](/images/doris-manager/clustermanagenent-3.png) -- **Operation**: Click the "Edit" button, you can edit and modify the corresponding configuration value, you can choose the corresponding effective method; click the "View current value" button, you can view the current value corresponding to the host IP +- **Operation**: Click the "Edit" button, you can edit and modify the corresponding configuration value, you can choose the corresponding effective method; click the "View current value" button, you can view the current value corresponding to the host IP -![](/images/doris-manager/iclustermanager-4.png) - -![](/images/doris-manager/iclustermanager-5.png) +![](/images/doris-manager/clustermanagenent-4.png) +![](/images/doris-manager/clustermanagenent-5.png) diff --git a/docs/en/docs/ecosystem/flink-doris-connector.md b/docs/en/docs/ecosystem/flink-doris-connector.md index 1a504cdba1..b63362c4df 100644 --- a/docs/en/docs/ecosystem/flink-doris-connector.md +++ b/docs/en/docs/ecosystem/flink-doris-connector.md @@ -113,7 +113,10 @@ After successful compilation, the file `flink-doris-connector-1.14_2.12-1.0.0-SN **Remarks:** -1. Doris FE should enable http v2 in the configuration fe.conf, which is enabled by default after version 0.15 +1. Doris FE should be configured to enable http v2 in the configuration + + conf/fe.conf + ``` enable_http_server_v2 = true ``` @@ -148,18 +151,6 @@ Add flink-doris-connector and necessary Flink Maven dependencies <scope>provided</scope> </dependency> -<!-- Add log dependencies when debugging locally --> -<dependency> - <groupId>org.slf4j</groupId> - <artifactId>slf4j-api</artifactId> - <version>${slf4j.version}</version> -</dependency> -<dependency> - <groupId>org.slf4j</groupId> - <artifactId>slf4j-log4j12</artifactId> - <version>${slf4j.version}</version> -</dependency> - <!-- flink-doris-connector --> <dependency> <groupId>org.apache.doris</groupId> @@ -365,12 +356,12 @@ source.sinkTo(builder.build()); | doris.read.field | -- | N | List of column names in the Doris table, separated by commas | | doris.filter.query | -- | N | Filter expression of the query, which is transparently transmitted to Doris. Doris uses this expression to complete source-side data filtering. | | sink.label-prefix | -- | Y | The label prefix used by stream load imports. In the 2pc scenario, global uniqueness is required to ensure the EOS semantics of Flink. | -| sink.properties.* | -- | N | The stream load parameters.<br /> <br /> eg:<br /> `sink.properties.column_separator' = ','`<br /> <br /> Setting `'sink.properties.escape_delimiters' = 'true'` if you want to use a control char as a separator, so that such as '\\x01' will translate to binary 0x01<br /><br /> Support JSON format import, you need to enable both `'sink.properties.format' ='json'` and `'sink.properties.read_json_by_line' = 'true'` | +| sink.properties.* | -- | N | The stream load parameters.<br /> <br /> eg:<br /> sink.properties.column_separator' = ','<br /> <br /> Setting 'sink.properties.escape_delimiters' = 'true' if you want to use a control char as a separator, so that such as '\\x01' will translate to binary 0x01<br /><br />Support JSON format import, you need to enable both 'sink.properties.format' ='json' and 'sink.properties.strip_outer_array' ='true' | | sink.enable-delete | true | N | Whether to enable deletion. This option requires Doris table to enable batch delete function (0.15+ version is enabled by default), and only supports Uniq model.| | sink.enable-2pc | true | N | Whether to enable two-phase commit (2pc), the default is true, to ensure Exactly-Once semantics. For two-phase commit, please refer to [here](../data-operate/import/import-way/stream-load-manual.md). | | sink.max-retries | 1 | N | In the 2pc scenario, the number of retries after the commit phase fails. | | sink.buffer-size | 1048576(1MB) | N | Write data cache buffer size, in bytes. It is not recommended to modify, the default configuration is sufficient. | -| sink.buffer-count | 3 | N | The number of write data cache buffers, it is not recommended to modify, the default configuration is sufficient. | +| sink.buffer-count | 3 | N | The number of write data cache buffers, it is not recommended to modify, the default configuration is sufficient. @@ -450,7 +441,7 @@ The most suitable scenario for using Flink Doris Connector is to synchronize sou ### common problem -**1. Bitmap type write** +1. **Bitmap type write** ```sql CREATE TABLE bitmap_sink ( @@ -468,8 +459,7 @@ WITH ( 'sink.properties.columns' = 'dt,page,user_id,user_id=to_bitmap(user_id)' ) ```` +2. **errCode = 2, detailMessage = Label [label_0_1] has already been used, relate to txn [19650]** -**2. errCode = 2, detailMessage = Label [label_0_1] has already been used, relate to txn [19650]** - -In the Exactly-Once scenario, the Flink Job must be restarted from the latest Checkpoint/Savepoint, otherwise the above error will be reported. </br> -When Exactly-Once is not required, it can also be solved by turning off 2PC commits (`sink.enable-2pc=false`) or changing to a different `sink.label-prefix`. \ No newline at end of file +In the Exactly-Once scenario, the Flink Job must be restarted from the latest Checkpoint/Savepoint, otherwise the above error will be reported. +When Exactly-Once is not required, it can also be solved by turning off 2PC commits (sink.enable-2pc=false) or changing to a different sink.label-prefix. \ No newline at end of file diff --git a/docs/sidebars.json b/docs/sidebars.json new file mode 100644 index 0000000000..4646c6a1c2 --- /dev/null +++ b/docs/sidebars.json @@ -0,0 +1,972 @@ +{ + "docs": [ + { + "type": "category", + "label": "Getting Started", + "items": [ + "get-starting/get-starting" + ] + }, + { + "type": "category", + "label": "Doris Introduction", + "items": [ + "summary/basic-summary" + ] + }, + { + "type": "category", + "label": "Install And Deploy", + "items": [ + "install/install-deploy", + { + "type": "category", + "label": "Compile", + "items": [ + "install/source-install/compilation", + "install/source-install/compilation-with-ldb-toolchain", + "install/source-install/compilation-arm" + ] + } + ] + }, + { + "type": "category", + "label": "Table Design", + "items": [ + "data-table/data-model", + "data-table/data-partition", + "data-table/basic-usage", + "data-table/advance-usage", + "data-table/hit-the-rollup", + "data-table/best-practice", + { + "type": "category", + "label": "Index", + "items": [ + "data-table/index/bloomfilter", + "data-table/index/prefix-index", + "data-table/index/bitmap-index" + ] + } + ] + }, + { + "type": "category", + "label": "Data Operation", + "items": [ + { + "type": "category", + "label": "Import", + "items": [ + "data-operate/import/load-manual", + { + "type": "category", + "label": "Import Scenes", + "items": [ + "data-operate/import/import-scenes/local-file-load", + "data-operate/import/import-scenes/external-storage-load", + "data-operate/import/import-scenes/kafka-load", + "data-operate/import/import-scenes/external-table-load", + "data-operate/import/import-scenes/jdbc-load", + "data-operate/import/import-scenes/load-atomicity", + "data-operate/import/import-scenes/load-data-convert", + "data-operate/import/import-scenes/load-strict-mode" + ] + }, + { + "type": "category", + "label": "Import Way", + "items": [ + "data-operate/import/import-way/binlog-load-manual", + "data-operate/import/import-way/broker-load-manual", + "data-operate/import/import-way/routine-load-manual", + "data-operate/import/import-way/spark-load-manual", + "data-operate/import/import-way/stream-load-manual", + "data-operate/import/import-way/s3-load-manual", + "data-operate/import/import-way/insert-into-manual", + "data-operate/import/import-way/load-json-format" + ] + } + ] + }, + { + "type": "category", + "label": "Export", + "items": [ + "data-operate/export/export-manual", + "data-operate/export/outfile", + "data-operate/export/export_with_mysql_dump" + ] + }, + { + "type": "category", + "label": "Update and Delete", + "items": [ + "data-operate/update-delete/batch-delete-manual", + "data-operate/update-delete/update", + "data-operate/update-delete/delete-manual", + "data-operate/update-delete/sequence-column-manual" + ] + } + ] + }, + { + "type": "category", + "label": "Advanced Usage", + "items": [ + "advanced/materialized-view", + "advanced/vectorized-execution-engine", + "advanced/broker", + "advanced/resource", + "advanced/orthogonal-bitmap-manual", + "advanced/orthogonal-hll-manual", + "advanced/using-hll", + "advanced/variables", + "advanced/time-zone", + "advanced/small-file-mgr", + "advanced/sql-mode", + { + "type": "category", + "label": "Alter Table", + "items": [ + "advanced/alter-table/schema-change", + "advanced/alter-table/replace-table" + ] + }, + { + "type": "category", + "label": "Doris Partition", + "items": [ + "advanced/partition/table-temp-partition", + "advanced/partition/dynamic-partition" + ] + }, + { + "type": "category", + "label": "Join Optimization", + "items": [ + "advanced/join-optimization/runtime-filter", + "advanced/join-optimization/doris-join-optimization", + "advanced/join-optimization/colocation-join", + "advanced/join-optimization/bucket-shuffle-join" + ] + }, + { + "type": "category", + "label": "Date Cache", + "items": [ + "advanced/cache/partition-cache", + "advanced/cache/query-cache" + ] + }, + { + "type": "category", + "label": "Best Practice", + "items": [ + "advanced/best-practice/query-analysis", + "advanced/best-practice/debug-log", + "advanced/best-practice/import-analysis" + + ] + } + ] + }, + { + "type": "category", + "label": "Ecosystem", + "items": [ + { + "type": "category", + "label": "Expansion table", + "items": [ + "ecosystem/external-table/doris-on-es", + "ecosystem/external-table/hudi-external-table", + "ecosystem/external-table/iceberg-of-doris", + "ecosystem/external-table/odbc-of-doris", + "ecosystem/external-table/hive-of-doris" + ] + }, + "ecosystem/spark-doris-connector", + "ecosystem/flink-doris-connector", + "ecosystem/datax", + "ecosystem/mysql-to-doris", + "ecosystem/logstash", + "ecosystem/plugin-development-manual", + "ecosystem/audit-plugin", + "ecosystem/cloudcanal", + { + "type": "category", + "label": "Doris Manager", + "items": [ + "ecosystem/doris-manager/compiling-deploying", + "ecosystem/doris-manager/initializing", + "ecosystem/doris-manager/cluster-managenent", + "ecosystem/doris-manager/space-list", + "ecosystem/doris-manager/space-management", + "ecosystem/doris-manager/system-settings" + ] + }, + { + "type": "category", + "label": "SeaTunnel", + "items": [ + "ecosystem/seatunnel/flink-sink", + "ecosystem/seatunnel/spark-sink" + ] + }, + { + "type": "category", + "label": "UDF", + "items": [ + "ecosystem/udf/contribute-udf", + "ecosystem/udf/remote-user-defined-function", + "ecosystem/udf/native-user-defined-function", + "ecosystem/udf/java-user-defined-function" + ] + }, + { + "type": "category", + "label": "UDFA", + "items": [ + "ecosystem/udaf/remote-user-defined-aggregation-function" + ] + } + ] + }, + { + "type": "category", + "label": "SQL Manual", + "items": [ + { + "type": "category", + "label": "SQL Functions", + "items": [ + { + "type": "category", + "label": "Array Functions", + "items": [ + "sql-manual/sql-functions/array-functions/array_max", + "sql-manual/sql-functions/array-functions/array_remove", + "sql-manual/sql-functions/array-functions/array_slice", + "sql-manual/sql-functions/array-functions/array_sort", + "sql-manual/sql-functions/array-functions/array_position", + "sql-manual/sql-functions/array-functions/array_contains", + "sql-manual/sql-functions/array-functions/array_except", + "sql-manual/sql-functions/array-functions/array_product", + "sql-manual/sql-functions/array-functions/array_intersect", + "sql-manual/sql-functions/array-functions/arrays_overlap", + "sql-manual/sql-functions/array-functions/array_min", + "sql-manual/sql-functions/array-functions/element_at", + "sql-manual/sql-functions/array-functions/array_avg", + "sql-manual/sql-functions/array-functions/size", + "sql-manual/sql-functions/array-functions/array_distinct", + "sql-manual/sql-functions/array-functions/array_union", + "sql-manual/sql-functions/array-functions/array_sum" + ] + }, + { + "type": "category", + "label": "Date Functions", + "items": [ + "sql-manual/sql-functions/date-time-functions/dayname", + "sql-manual/sql-functions/date-time-functions/minute", + "sql-manual/sql-functions/date-time-functions/from_unixtime", + "sql-manual/sql-functions/date-time-functions/hour", + "sql-manual/sql-functions/date-time-functions/monthname", + "sql-manual/sql-functions/date-time-functions/date_sub", + "sql-manual/sql-functions/date-time-functions/yearweek", + "sql-manual/sql-functions/date-time-functions/unix_timestamp", + "sql-manual/sql-functions/date-time-functions/day", + "sql-manual/sql-functions/date-time-functions/curtime", + "sql-manual/sql-functions/date-time-functions/month", + "sql-manual/sql-functions/date-time-functions/week", + "sql-manual/sql-functions/date-time-functions/to_date", + "sql-manual/sql-functions/date-time-functions/timediff", + "sql-manual/sql-functions/date-time-functions/makedate", + "sql-manual/sql-functions/date-time-functions/dayofweek", + "sql-manual/sql-functions/date-time-functions/timestampadd", + "sql-manual/sql-functions/date-time-functions/from_days", + "sql-manual/sql-functions/date-time-functions/weekofyear", + "sql-manual/sql-functions/date-time-functions/year", + "sql-manual/sql-functions/date-time-functions/timestampdiff", + "sql-manual/sql-functions/date-time-functions/dayofmonth", + "sql-manual/sql-functions/date-time-functions/dayofyear", + "sql-manual/sql-functions/date-time-functions/date_format", + "sql-manual/sql-functions/date-time-functions/date_add", + "sql-manual/sql-functions/date-time-functions/curdate", + "sql-manual/sql-functions/date-time-functions/current_timestamp", + "sql-manual/sql-functions/date-time-functions/str_to_date", + "sql-manual/sql-functions/date-time-functions/weekday", + "sql-manual/sql-functions/date-time-functions/to_days", + "sql-manual/sql-functions/date-time-functions/datediff", + "sql-manual/sql-functions/date-time-functions/now", + "sql-manual/sql-functions/date-time-functions/time_round", + "sql-manual/sql-functions/date-time-functions/utc_timestamp", + "sql-manual/sql-functions/date-time-functions/convert_tz", + "sql-manual/sql-functions/date-time-functions/second" + ] + }, + { + "type": "category", + "label": "GIS Functions", + "items": [ + "sql-manual/sql-functions/spatial-functions/st_x", + "sql-manual/sql-functions/spatial-functions/st_y", + "sql-manual/sql-functions/spatial-functions/st_circle", + "sql-manual/sql-functions/spatial-functions/st_distance_sphere", + "sql-manual/sql-functions/spatial-functions/st_point", + "sql-manual/sql-functions/spatial-functions/st_polygon", + "sql-manual/sql-functions/spatial-functions/st_astext", + "sql-manual/sql-functions/spatial-functions/st_contains", + "sql-manual/sql-functions/spatial-functions/st_geometryfromtext", + "sql-manual/sql-functions/spatial-functions/st_linefromtext" + ] + }, + { + "type": "category", + "label": "String Functions", + "items": [ + "sql-manual/sql-functions/string-functions/concat_ws", + "sql-manual/sql-functions/string-functions/hex", + "sql-manual/sql-functions/string-functions/lower", + "sql-manual/sql-functions/string-functions/lpad", + "sql-manual/sql-functions/string-functions/substring", + "sql-manual/sql-functions/string-functions/instr", + "sql-manual/sql-functions/string-functions/repeat", + "sql-manual/sql-functions/string-functions/lcase", + "sql-manual/sql-functions/string-functions/replace", + "sql-manual/sql-functions/string-functions/rpad", + "sql-manual/sql-functions/string-functions/split_part", + "sql-manual/sql-functions/string-functions/append_trailing_char_if_absent", + "sql-manual/sql-functions/string-functions/ltrim", + "sql-manual/sql-functions/string-functions/left", + "sql-manual/sql-functions/string-functions/starts_with", + "sql-manual/sql-functions/string-functions/concat", + "sql-manual/sql-functions/string-functions/bit_length", + "sql-manual/sql-functions/string-functions/strleft", + "sql-manual/sql-functions/string-functions/strright", + "sql-manual/sql-functions/string-functions/money_format", + "sql-manual/sql-functions/string-functions/right", + "sql-manual/sql-functions/string-functions/substr", + "sql-manual/sql-functions/string-functions/find_in_set", + "sql-manual/sql-functions/string-functions/ascii", + "sql-manual/sql-functions/string-functions/reverse", + "sql-manual/sql-functions/string-functions/length", + "sql-manual/sql-functions/string-functions/unhex", + "sql-manual/sql-functions/string-functions/ends_with", + "sql-manual/sql-functions/string-functions/char_length", + "sql-manual/sql-functions/string-functions/null_or_empty", + "sql-manual/sql-functions/string-functions/locate", + { + "type": "category", + "label": "Fuzzy Match", + "items": [ + "sql-manual/sql-functions/string-functions/like/like", + "sql-manual/sql-functions/string-functions/like/not_like" + ] + }, + { + "type": "category", + "label": "Regular Match", + "items": [ + "sql-manual/sql-functions/string-functions/regexp/regexp", + "sql-manual/sql-functions/string-functions/regexp/regexp_extract", + "sql-manual/sql-functions/string-functions/regexp/regexp_replace", + "sql-manual/sql-functions/string-functions/regexp/not_regexp" + ] + } + ] + }, + { + "type": "category", + "label": "Aggregate Functions", + "items": [ + "sql-manual/sql-functions/aggregate-functions/collect_set", + "sql-manual/sql-functions/aggregate-functions/min", + "sql-manual/sql-functions/aggregate-functions/stddev_samp", + "sql-manual/sql-functions/aggregate-functions/avg", + "sql-manual/sql-functions/aggregate-functions/percentile", + "sql-manual/sql-functions/aggregate-functions/hll_union_agg", + "sql-manual/sql-functions/aggregate-functions/topn", + "sql-manual/sql-functions/aggregate-functions/count", + "sql-manual/sql-functions/aggregate-functions/sum", + "sql-manual/sql-functions/aggregate-functions/max_by", + "sql-manual/sql-functions/aggregate-functions/bitmap_union", + "sql-manual/sql-functions/aggregate-functions/percentile_approx", + "sql-manual/sql-functions/aggregate-functions/stddev", + "sql-manual/sql-functions/aggregate-functions/group_concat", + "sql-manual/sql-functions/aggregate-functions/collect_list", + "sql-manual/sql-functions/aggregate-functions/min_by", + "sql-manual/sql-functions/aggregate-functions/max", + "sql-manual/sql-functions/aggregate-functions/var_samp", + "sql-manual/sql-functions/aggregate-functions/approx_count_distinct", + "sql-manual/sql-functions/aggregate-functions/variance" + ] + }, + { + "type": "category", + "label": "Bitmap Functions", + "items": [ + "sql-manual/sql-functions/bitmap-functions/bitmap_and_not_count", + "sql-manual/sql-functions/bitmap-functions/bitmap_subset_limit", + "sql-manual/sql-functions/bitmap-functions/to_bitmap", + "sql-manual/sql-functions/bitmap-functions/bitmap_from_string", + "sql-manual/sql-functions/bitmap-functions/bitmap_or", + "sql-manual/sql-functions/bitmap-functions/bitmap_and", + "sql-manual/sql-functions/bitmap-functions/orthogonal_bitmap_union_count", + "sql-manual/sql-functions/bitmap-functions/bitmap_has_all", + "sql-manual/sql-functions/bitmap-functions/orthogonal_bitmap_intersect", + "sql-manual/sql-functions/bitmap-functions/bitmap_not", + "sql-manual/sql-functions/bitmap-functions/bitmap_min", + "sql-manual/sql-functions/bitmap-functions/bitmap_contains", + "sql-manual/sql-functions/bitmap-functions/sub_bitmap", + "sql-manual/sql-functions/bitmap-functions/bitmap_union", + "sql-manual/sql-functions/bitmap-functions/bitmap_xor_count", + "sql-manual/sql-functions/bitmap-functions/bitmap_and_not", + "sql-manual/sql-functions/bitmap-functions/bitmap_xor", + "sql-manual/sql-functions/bitmap-functions/bitmap_or_count", + "sql-manual/sql-functions/bitmap-functions/orthogonal_bitmap_intersect_count", + "sql-manual/sql-functions/bitmap-functions/bitmap_has_any", + "sql-manual/sql-functions/bitmap-functions/bitmap_intersect", + "sql-manual/sql-functions/bitmap-functions/bitmap_to_string", + "sql-manual/sql-functions/bitmap-functions/bitmap_hash", + "sql-manual/sql-functions/bitmap-functions/intersect_count", + "sql-manual/sql-functions/bitmap-functions/bitmap_empty", + "sql-manual/sql-functions/bitmap-functions/bitmap_max", + "sql-manual/sql-functions/bitmap-functions/bitmap_and_count", + "sql-manual/sql-functions/bitmap-functions/bitmap_subset_in_range" + ] + }, + { + "type": "category", + "label": "Bitwise Functions", + "items": [ + "sql-manual/sql-functions/bitwise-functions/bitand", + "sql-manual/sql-functions/bitwise-functions/bitor", + "sql-manual/sql-functions/bitwise-functions/bitxor", + "sql-manual/sql-functions/bitwise-functions/bitnot" + ] + }, + { + "type": "category", + "label": "Condition Functions", + "items": [ + "sql-manual/sql-functions/conditional-functions/case", + "sql-manual/sql-functions/conditional-functions/coalesce", + "sql-manual/sql-functions/conditional-functions/if", + "sql-manual/sql-functions/conditional-functions/ifnull", + "sql-manual/sql-functions/conditional-functions/nvl", + "sql-manual/sql-functions/conditional-functions/nullif" + ] + }, + { + "type": "category", + "label": "JSON Functions", + "items": [ + "sql-manual/sql-functions/json-functions/get_json_double", + "sql-manual/sql-functions/json-functions/get_json_int", + "sql-manual/sql-functions/json-functions/get_json_string", + "sql-manual/sql-functions/json-functions/json_array", + "sql-manual/sql-functions/json-functions/json_object", + "sql-manual/sql-functions/json-functions/json_quote" + ] + }, + { + "type": "category", + "label": "Hash Functions", + "items": [ + "sql-manual/sql-functions/hash-functions/murmur_hash3_32" + ] + }, + { + "type": "category", + "label": "Math Functions", + "items": [ + "sql-manual/sql-functions/math-functions/conv", + "sql-manual/sql-functions/math-functions/pmod" + ] + }, + { + "type": "category", + "label": "Encryption Functions", + "items": [ + "sql-manual/sql-functions/encrypt-digest-functions/aes", + "sql-manual/sql-functions/encrypt-digest-functions/md5", + "sql-manual/sql-functions/encrypt-digest-functions/md5sum", + "sql-manual/sql-functions/encrypt-digest-functions/sm4", + "sql-manual/sql-functions/encrypt-digest-functions/sm3", + "sql-manual/sql-functions/encrypt-digest-functions/sm3sum" + ] + }, + { + "type": "category", + "label": "Table Functions", + "items": [ + "sql-manual/sql-functions/table-functions/explode-json-array", + "sql-manual/sql-functions/table-functions/explode", + "sql-manual/sql-functions/table-functions/explode-split", + "sql-manual/sql-functions/table-functions/explode-bitmap", + "sql-manual/sql-functions/table-functions/outer-combinator", + "sql-manual/sql-functions/table-functions/numbers", + "sql-manual/sql-functions/table-functions/explode-numbers" + ] + }, + { + "type": "category", + "label": "Analytic(Window) Functions", + "items": [ + "sql-manual/sql-functions/window-functions/WINDOW-FUNCTION-LAG", + "sql-manual/sql-functions/window-functions/WINDOW-FUNCTION-SUM", + "sql-manual/sql-functions/window-functions/WINDOW-FUNCTION-LAST-VALUE", + "sql-manual/sql-functions/window-functions/WINDOW-FUNCTION-AVG", + "sql-manual/sql-functions/window-functions/WINDOW-FUNCTION-MIN", + "sql-manual/sql-functions/window-functions/WINDOW-FUNCTION-COUNT", + "sql-manual/sql-functions/window-functions/WINDOW-FUNCTION", + "sql-manual/sql-functions/window-functions/WINDOW-FUNCTION-RANK", + "sql-manual/sql-functions/window-functions/WINDOW-FUNCTION-DENSE-RANK", + "sql-manual/sql-functions/window-functions/WINDOW-FUNCTION-MAX", + "sql-manual/sql-functions/window-functions/WINDOW-FUNCTION-FIRST-VALUE", + "sql-manual/sql-functions/window-functions/WINDOW-FUNCTION-LEAD", + "sql-manual/sql-functions/window-functions/WINDOW-FUNCTION-ROW-NUMBER", + "sql-manual/sql-functions/window-functions/WINDOW-FUNCTION-NTILE" + ] + }, + "sql-manual/sql-functions/cast", + "sql-manual/sql-functions/digital-masking" + ] + }, + { + "type": "category", + "label": "SQL Reference", + "items": [ + { + "type": "category", + "label": "Account Management", + "items": [ + "sql-manual/sql-reference/Account-Management-Statements/SET-PROPERTY", + "sql-manual/sql-reference/Account-Management-Statements/REVOKE", + "sql-manual/sql-reference/Account-Management-Statements/GRANT", + "sql-manual/sql-reference/Account-Management-Statements/LDAP", + "sql-manual/sql-reference/Account-Management-Statements/CREATE-ROLE", + "sql-manual/sql-reference/Account-Management-Statements/DROP-ROLE", + "sql-manual/sql-reference/Account-Management-Statements/CREATE-USER", + "sql-manual/sql-reference/Account-Management-Statements/DROP-USER", + "sql-manual/sql-reference/Account-Management-Statements/SET-PASSWORD" + ] + }, + { + "type": "category", + "label": "Cluster management", + "items": [ + "sql-manual/sql-reference/Cluster-Management-Statements/ALTER-SYSTEM-DROP-FOLLOWER", + "sql-manual/sql-reference/Cluster-Management-Statements/ALTER-SYSTEM-DECOMMISSION-BACKEND", + "sql-manual/sql-reference/Cluster-Management-Statements/ALTER-SYSTEM-DROP-OBSERVER", + "sql-manual/sql-reference/Cluster-Management-Statements/CANCEL-ALTER-SYSTEM", + "sql-manual/sql-reference/Cluster-Management-Statements/ALTER-SYSTEM-DROP-BROKER", + "sql-manual/sql-reference/Cluster-Management-Statements/ALTER-SYSTEM-ADD-OBSERVER", + "sql-manual/sql-reference/Cluster-Management-Statements/ALTER-SYSTEM-MODIFY-BACKEND", + "sql-manual/sql-reference/Cluster-Management-Statements/ALTER-SYSTEM-ADD-FOLLOWER", + "sql-manual/sql-reference/Cluster-Management-Statements/ALTER-SYSTEM-MODIFY-BROKER", + "sql-manual/sql-reference/Cluster-Management-Statements/ALTER-SYSTEM-ADD-BROKER", + "sql-manual/sql-reference/Cluster-Management-Statements/ALTER-SYSTEM-ADD-BACKEND", + "sql-manual/sql-reference/Cluster-Management-Statements/ALTER-SYSTEM-DROP-BACKEND" + ] + }, + { + "type": "category", + "label": "DDL", + "items": [ + { + "type": "category", + "label": "Alter", + "items": [ + "sql-manual/sql-reference/Data-Definition-Statements/Alter/ALTER-DATABASE", + "sql-manual/sql-reference/Data-Definition-Statements/Alter/ALTER-TABLE-BITMAP", + "sql-manual/sql-reference/Data-Definition-Statements/Alter/ALTER-TABLE-PARTITION", + "sql-manual/sql-reference/Data-Definition-Statements/Alter/ALTER-TABLE-COLUMN", + "sql-manual/sql-reference/Data-Definition-Statements/Alter/ALTER-RESOURCE", + "sql-manual/sql-reference/Data-Definition-Statements/Alter/CANCEL-ALTER-TABLE", + "sql-manual/sql-reference/Data-Definition-Statements/Alter/ALTER-TABLE-COMMENT", + "sql-manual/sql-reference/Data-Definition-Statements/Alter/ALTER-VIEW", + "sql-manual/sql-reference/Data-Definition-Statements/Alter/ALTER-SQL-BLOCK-RULE", + "sql-manual/sql-reference/Data-Definition-Statements/Alter/ALTER-TABLE-REPLACE", + "sql-manual/sql-reference/Data-Definition-Statements/Alter/ALTER-TABLE-PROPERTY", + "sql-manual/sql-reference/Data-Definition-Statements/Alter/ALTER-TABLE-ROLLUP", + "sql-manual/sql-reference/Data-Definition-Statements/Alter/ALTER-TABLE-RENAME" + ] + }, + { + "type": "category", + "label": "Backup and Restore", + "items": [ + "sql-manual/sql-reference/Data-Definition-Statements/Backup-and-Restore/RESTORE", + "sql-manual/sql-reference/Data-Definition-Statements/Backup-and-Restore/DROP-REPOSITORY", + "sql-manual/sql-reference/Data-Definition-Statements/Backup-and-Restore/CANCEL-RESTORE", + "sql-manual/sql-reference/Data-Definition-Statements/Backup-and-Restore/BACKUP", + "sql-manual/sql-reference/Data-Definition-Statements/Backup-and-Restore/CANCEL-BACKUP", + "sql-manual/sql-reference/Data-Definition-Statements/Backup-and-Restore/CREATE-REPOSITORY" + ] + }, + { + "type": "category", + "label": "Create", + "items": [ + "sql-manual/sql-reference/Data-Definition-Statements/Create/CREATE-ENCRYPT-KEY", + "sql-manual/sql-reference/Data-Definition-Statements/Create/CREATE-TABLE-AS-SELECT", + "sql-manual/sql-reference/Data-Definition-Statements/Create/CREATE-POLICY", + "sql-manual/sql-reference/Data-Definition-Statements/Create/CREATE-VIEW", + "sql-manual/sql-reference/Data-Definition-Statements/Create/CREATE-DATABASE", + "sql-manual/sql-reference/Data-Definition-Statements/Create/CREATE-FILE", + "sql-manual/sql-reference/Data-Definition-Statements/Create/CREATE-INDEX", + "sql-manual/sql-reference/Data-Definition-Statements/Create/CREATE-RESOURCE", + "sql-manual/sql-reference/Data-Definition-Statements/Create/CREATE-TABLE-LIKE", + "sql-manual/sql-reference/Data-Definition-Statements/Create/CREATE-MATERIALIZED-VIEW", + "sql-manual/sql-reference/Data-Definition-Statements/Create/CREATE-EXTERNAL-TABLE", + "sql-manual/sql-reference/Data-Definition-Statements/Create/CREATE-TABLE", + "sql-manual/sql-reference/Data-Definition-Statements/Create/CREATE-SQL-BLOCK-RULE", + "sql-manual/sql-reference/Data-Definition-Statements/Create/CREATE-FUNCTION" + ] + }, + { + "type": "category", + "label": "Drop", + "items": [ + "sql-manual/sql-reference/Data-Definition-Statements/Drop/DROP-INDEX", + "sql-manual/sql-reference/Data-Definition-Statements/Drop/DROP-RESOURCE", + "sql-manual/sql-reference/Data-Definition-Statements/Drop/DROP-FILE", + "sql-manual/sql-reference/Data-Definition-Statements/Drop/DROP-ENCRYPT-KEY", + "sql-manual/sql-reference/Data-Definition-Statements/Drop/DROP-DATABASE", + "sql-manual/sql-reference/Data-Definition-Statements/Drop/DROP-MATERIALIZED-VIEW", + "sql-manual/sql-reference/Data-Definition-Statements/Drop/DROP-POLICY", + "sql-manual/sql-reference/Data-Definition-Statements/Drop/TRUNCATE-TABLE", + "sql-manual/sql-reference/Data-Definition-Statements/Drop/DROP-TABLE", + "sql-manual/sql-reference/Data-Definition-Statements/Drop/DROP-FUNCTION", + "sql-manual/sql-reference/Data-Definition-Statements/Drop/DROP-SQL-BLOCK-RULE" + ] + } + ] + }, + { + "type": "category", + "label": "DML", + "items": [ + { + "type": "category", + "label": "Load", + "items": [ + "sql-manual/sql-reference/Data-Manipulation-Statements/Load/PAUSE-ROUTINE-LOAD", + "sql-manual/sql-reference/Data-Manipulation-Statements/Load/MULTI-LOAD", + "sql-manual/sql-reference/Data-Manipulation-Statements/Load/RESUME-SYNC-JOB", + "sql-manual/sql-reference/Data-Manipulation-Statements/Load/CREATE-ROUTINE-LOAD", + "sql-manual/sql-reference/Data-Manipulation-Statements/Load/STOP-ROUTINE-LOAD", + "sql-manual/sql-reference/Data-Manipulation-Statements/Load/CLEAN-LABEL", + "sql-manual/sql-reference/Data-Manipulation-Statements/Load/ALTER-ROUTINE-LOAD", + "sql-manual/sql-reference/Data-Manipulation-Statements/Load/CANCEL-LOAD", + "sql-manual/sql-reference/Data-Manipulation-Statements/Load/RESUME-ROUTINE-LOAD", + "sql-manual/sql-reference/Data-Manipulation-Statements/Load/STOP-SYNC-JOB", + "sql-manual/sql-reference/Data-Manipulation-Statements/Load/PAUSE-SYNC-JOB", + "sql-manual/sql-reference/Data-Manipulation-Statements/Load/BROKER-LOAD", + "sql-manual/sql-reference/Data-Manipulation-Statements/Load/CREATE-SYNC-JOB", + "sql-manual/sql-reference/Data-Manipulation-Statements/Load/STREAM-LOAD" + ] + }, + { + "type": "category", + "label": "Manipulation", + "items": [ + "sql-manual/sql-reference/Data-Manipulation-Statements/Manipulation/INSERT", + "sql-manual/sql-reference/Data-Manipulation-Statements/Manipulation/SELECT", + "sql-manual/sql-reference/Data-Manipulation-Statements/Manipulation/DELETE", + "sql-manual/sql-reference/Data-Manipulation-Statements/Manipulation/UPDATE", + "sql-manual/sql-reference/Data-Manipulation-Statements/Manipulation/EXPORT" + ] + }, + "sql-manual/sql-reference/Data-Manipulation-Statements/OUTFILE" + ] + }, + { + "type": "category", + "label": "Database Administration", + "items": [ + "sql-manual/sql-reference/Database-Administration-Statements/ADMIN-DIAGNOSE-TABLET", + "sql-manual/sql-reference/Database-Administration-Statements/ADMIN-SHOW-CONFIG", + "sql-manual/sql-reference/Database-Administration-Statements/KILL", + "sql-manual/sql-reference/Database-Administration-Statements/ADMIN-CHECK-TABLET", + "sql-manual/sql-reference/Database-Administration-Statements/ADMIN-CLEAN-TRASH", + "sql-manual/sql-reference/Database-Administration-Statements/ENABLE-FEATURE", + "sql-manual/sql-reference/Database-Administration-Statements/RECOVER", + "sql-manual/sql-reference/Database-Administration-Statements/UNINSTALL-PLUGIN", + "sql-manual/sql-reference/Database-Administration-Statements/ADMIN-SET-REPLICA-STATUS", + "sql-manual/sql-reference/Database-Administration-Statements/ADMIN-SHOW-REPLICA-DISTRIBUTION", + "sql-manual/sql-reference/Database-Administration-Statements/INSTALL-PLUGIN", + "sql-manual/sql-reference/Database-Administration-Statements/ADMIN-REPAIR-TABLE", + "sql-manual/sql-reference/Database-Administration-Statements/ADMIN-CANCEL-REPAIR", + "sql-manual/sql-reference/Database-Administration-Statements/SET-VARIABLE", + "sql-manual/sql-reference/Database-Administration-Statements/ADMIN-SET-CONFIG", + "sql-manual/sql-reference/Database-Administration-Statements/ADMIN-SHOW-TABLET-STORAGE-FORMAT", + "sql-manual/sql-reference/Database-Administration-Statements/ADMIN-SHOW-REPLICA-STATUS" + ] + }, + { + "type": "category", + "label": "Show", + "items": [ + "sql-manual/sql-reference/Show-Statements/SHOW-DATABASES", + "sql-manual/sql-reference/Show-Statements/SHOW-LAST-INSERT", + "sql-manual/sql-reference/Show-Statements/SHOW-BACKUP", + "sql-manual/sql-reference/Show-Statements/SHOW-MIGRATIONS", + "sql-manual/sql-reference/Show-Statements/SHOW-PARTITION-ID", + "sql-manual/sql-reference/Show-Statements/SHOW-ALTER-TABLE-MATERIALIZED-VIEW", + "sql-manual/sql-reference/Show-Statements/SHOW-SNAPSHOT", + "sql-manual/sql-reference/Show-Statements/SHOW-FUNCTIONS", + "sql-manual/sql-reference/Show-Statements/SHOW-ROLLUP", + "sql-manual/sql-reference/Show-Statements/SHOW-ENGINES", + "sql-manual/sql-reference/Show-Statements/SHOW-DELETE", + "sql-manual/sql-reference/Show-Statements/SHOW-SQL-BLOCK-RULE", + "sql-manual/sql-reference/Show-Statements/SHOW-CREATE-FUNCTION", + "sql-manual/sql-reference/Show-Statements/SHOW-ROUTINE-LOAD", + "sql-manual/sql-reference/Show-Statements/SHOW-SYNC-JOB", + "sql-manual/sql-reference/Show-Statements/SHOW-WHITE-LIST", + "sql-manual/sql-reference/Show-Statements/SHOW-WARNING", + "sql-manual/sql-reference/Show-Statements/SHOW-DATA-SKEW", + "sql-manual/sql-reference/Show-Statements/SHOW-DATABASE-ID", + "sql-manual/sql-reference/Show-Statements/SHOW-DYNAMIC-PARTITION", + "sql-manual/sql-reference/Show-Statements/SHOW-TABLET", + "sql-manual/sql-reference/Show-Statements/SHOW-VARIABLES", + "sql-manual/sql-reference/Show-Statements/SHOW-CREATE-ROUTINE-LOAD", + "sql-manual/sql-reference/Show-Statements/SHOW-PLUGINS", + "sql-manual/sql-reference/Show-Statements/SHOW-EVENTS", + "sql-manual/sql-reference/Show-Statements/SHOW-LOAD-WARNINGS", + "sql-manual/sql-reference/Show-Statements/SHOW-ROLES", + "sql-manual/sql-reference/Show-Statements/SHOW-GRANTS", + "sql-manual/sql-reference/Show-Statements/SHOW-INDEX", + "sql-manual/sql-reference/Show-Statements/SHOW-EXPORT", + "sql-manual/sql-reference/Show-Statements/SHOW-PROCEDURE", + "sql-manual/sql-reference/Show-Statements/SHOW-ROUTINE-LOAD-TASK", + "sql-manual/sql-reference/Show-Statements/SHOW-BACKENDS", + "sql-manual/sql-reference/Show-Statements/SHOW-PROC", + "sql-manual/sql-reference/Show-Statements/SHOW-COLLATION", + "sql-manual/sql-reference/Show-Statements/SHOW-TABLE-STATUS", + "sql-manual/sql-reference/Show-Statements/SHOW-REPOSITORIES", + "sql-manual/sql-reference/Show-Statements/SHOW-CREATE-DATABASE", + "sql-manual/sql-reference/Show-Statements/SHOW-CREATE-MATERIALIZED-VIEW", + "sql-manual/sql-reference/Show-Statements/SHOW-QUERY-PROFILE", + "sql-manual/sql-reference/Show-Statements/SHOW-OPEN-TABLES", + "sql-manual/sql-reference/Show-Statements/SHOW-TABLETS", + "sql-manual/sql-reference/Show-Statements/SHOW-LOAD", + "sql-manual/sql-reference/Show-Statements/SHOW-TABLES", + "sql-manual/sql-reference/Show-Statements/SHOW-RESOURCES", + "sql-manual/sql-reference/Show-Statements/SHOW-PARTITIONS", + "sql-manual/sql-reference/Show-Statements/SHOW-FRONTENDS", + "sql-manual/sql-reference/Show-Statements/SHOW-RESTORE", + "sql-manual/sql-reference/Show-Statements/SHOW-DATA", + "sql-manual/sql-reference/Show-Statements/SHOW-PROPERTY", + "sql-manual/sql-reference/Show-Statements/SHOW-BROKER", + "sql-manual/sql-reference/Show-Statements/SHOW-TRIGGERS", + "sql-manual/sql-reference/Show-Statements/SHOW-PROCESSLIST", + "sql-manual/sql-reference/Show-Statements/SHOW-ENCRYPT-KEY", + "sql-manual/sql-reference/Show-Statements/SHOW-COLUMNS", + "sql-manual/sql-reference/Show-Statements/SHOW-TRASH", + "sql-manual/sql-reference/Show-Statements/SHOW-VIEW", + "sql-manual/sql-reference/Show-Statements/SHOW-TRANSACTION", + "sql-manual/sql-reference/Show-Statements/SHOW-FILE", + "sql-manual/sql-reference/Show-Statements/SHOW-STREAM-LOAD", + "sql-manual/sql-reference/Show-Statements/SHOW-STATUS", + "sql-manual/sql-reference/Show-Statements/SHOW-LOAD-PROFILE", + "sql-manual/sql-reference/Show-Statements/SHOW-TABLE-ID", + "sql-manual/sql-reference/Show-Statements/SHOW-ALTER", + "sql-manual/sql-reference/Show-Statements/SHOW-SMALL-FILES", + "sql-manual/sql-reference/Show-Statements/SHOW-CREATE-TABLE", + "sql-manual/sql-reference/Show-Statements/SHOW-CHARSET" + ] + }, + { + "type": "category", + "label": "Data Types", + "items": [ + "sql-manual/sql-reference/Data-Types/VARCHAR", + "sql-manual/sql-reference/Data-Types/INT", + "sql-manual/sql-reference/Data-Types/DATE", + "sql-manual/sql-reference/Data-Types/BITMAP", + "sql-manual/sql-reference/Data-Types/DOUBLE", + "sql-manual/sql-reference/Data-Types/STRING", + "sql-manual/sql-reference/Data-Types/HLL", + "sql-manual/sql-reference/Data-Types/ARRAY", + "sql-manual/sql-reference/Data-Types/DATETIME", + "sql-manual/sql-reference/Data-Types/LARGEINT", + "sql-manual/sql-reference/Data-Types/QUANTILE_STATE", + "sql-manual/sql-reference/Data-Types/SMALLINT", + "sql-manual/sql-reference/Data-Types/TINYINT", + "sql-manual/sql-reference/Data-Types/DECIMAL", + "sql-manual/sql-reference/Data-Types/BIGINT", + "sql-manual/sql-reference/Data-Types/BOOLEAN", + "sql-manual/sql-reference/Data-Types/FLOAT", + "sql-manual/sql-reference/Data-Types/CHAR" + ] + }, + { + "type": "category", + "label": "Utility", + "items": [ + "sql-manual/sql-reference/Utility-Statements/HELP", + "sql-manual/sql-reference/Utility-Statements/USE", + "sql-manual/sql-reference/Utility-Statements/DESCRIBE" + ] + } + ] + } + ] + }, + { + "type": "category", + "label": "Admin Manual", + "items": [ + { + "type": "category", + "label": "cluster management", + "items": [ + "admin-manual/cluster-management/upgrade", + "admin-manual/cluster-management/elastic-expansion", + "admin-manual/cluster-management/load-balancing" + ] + }, + { + "type": "category", + "label": "Data Admin", + "items": [ + "admin-manual/data-admin/backup", + "admin-manual/data-admin/restore", + "admin-manual/data-admin/delete-recover" + ] + }, + "admin-manual/sql-interception", + "admin-manual/query-profile", + "admin-manual/tracing", + "admin-manual/optimization", + { + "type": "category", + "label": "Maintenance and Monitor", + "items": [ + { + "type": "category", + "label": "Monitor Metrics", + "items": [ + "admin-manual/maint-monitor/monitor-metrics/metrics" + ] + }, + "admin-manual/maint-monitor/disk-capacity", + "admin-manual/maint-monitor/tablet-repair-and-balance", + "admin-manual/maint-monitor/be-olap-error-code", + "admin-manual/maint-monitor/doris-error-code", + "admin-manual/maint-monitor/tablet-meta-tool", + "admin-manual/maint-monitor/monitor-alert", + "admin-manual/maint-monitor/multi-tenant", + "admin-manual/maint-monitor/tablet-local-debug", + "admin-manual/maint-monitor/tablet-restore-tool", + "admin-manual/maint-monitor/monitor-metrics/metrics", + "admin-manual/maint-monitor/metadata-operation" + ] + }, + { + "type": "category", + "label": "Config", + "items": [ + "admin-manual/config/fe-config", + "admin-manual/config/be-config", + "admin-manual/config/user-property" + ] + }, + { + "type": "category", + "label": "User Privilege and Ldap", + "items": [ + "admin-manual/privilege-ldap/user-privilege", + "admin-manual/privilege-ldap/ldap" + ] + }, + "admin-manual/multi-tenant", + { + "type": "category", + "label": "HTTP API", + "items": [ + { + "type": "category", + "label": "FE", + "items": [ + { + "type": "category", + "label": "MANAGER", + "items": [ + "admin-manual/http-actions/fe/manager/query-profile-action", + "admin-manual/http-actions/fe/manager/node-action", + "admin-manual/http-actions/fe/manager/cluster-action" + ] + }, + "admin-manual/http-actions/fe/get-load-state", + "admin-manual/http-actions/fe/bootstrap-action", + "admin-manual/http-actions/fe/ha-action", + "admin-manual/http-actions/fe/meta-replay-state-action", + "admin-manual/http-actions/fe/statement-execution-action", + "admin-manual/http-actions/fe/profile-action", + "admin-manual/http-actions/fe/meta-info-action", + "admin-manual/http-actions/fe/meta-action", + "admin-manual/http-actions/fe/query-profile-action", + "admin-manual/http-actions/fe/show-data-action", + "admin-manual/http-actions/fe/config-action", + "admin-manual/http-actions/fe/get-log-file-action", + "admin-manual/http-actions/fe/connection-action", + "admin-manual/http-actions/fe/system-action", + "admin-manual/http-actions/fe/table-schema-action", + "admin-manual/http-actions/fe/show-meta-info-action", + "admin-manual/http-actions/fe/hardware-info-action", + "admin-manual/http-actions/fe/log-action", + "admin-manual/http-actions/fe/cancel-load-action", + "admin-manual/http-actions/fe/logout-action", + "admin-manual/http-actions/fe/backends-action", + "admin-manual/http-actions/fe/get-load-info-action", + "admin-manual/http-actions/fe/show-runtime-info-action", + "admin-manual/http-actions/fe/query-detail-action", + "admin-manual/http-actions/fe/upload-action", + "admin-manual/http-actions/fe/session-action", + "admin-manual/http-actions/fe/table-row-count-action", + "admin-manual/http-actions/fe/get-small-file", + "admin-manual/http-actions/fe/table-query-plan-action", + "admin-manual/http-actions/fe/set-config-action", + "admin-manual/http-actions/fe/row-count-action", + "admin-manual/http-actions/fe/get-ddl-stmt-action", + "admin-manual/http-actions/fe/show-proc-action", + "admin-manual/http-actions/fe/check-decommission-action", + "admin-manual/http-actions/fe/health-action", + "admin-manual/http-actions/fe/check-storage-type-action" + ] + }, + "admin-manual/http-actions/restore-tablet", + "admin-manual/http-actions/get-load-state", + "admin-manual/http-actions/tablet-migration-action", + "admin-manual/http-actions/cancel-label", + "admin-manual/http-actions/profile-action", + "admin-manual/http-actions/show-data-action", + "admin-manual/http-actions/tablets_distribution", + "admin-manual/http-actions/connection-action", + "admin-manual/http-actions/compaction-action", + "admin-manual/http-actions/query-detail-action", + "admin-manual/http-actions/get-tablets", + "admin-manual/http-actions/fe-get-log-file", + "admin-manual/http-actions/check-reset-rpc-cache", + "admin-manual/http-actions/check-tablet-segment-action" + ] + } + ] + }, + { + "type": "category", + "label": "FAQ", + "items": [ + "faq/install-faq", + "faq/data-faq", + "faq/sql-faq" + ] + } + ] +} diff --git a/docs/sidebarsCommunity.json b/docs/sidebarsCommunity.json new file mode 100644 index 0000000000..54dfa337f6 --- /dev/null +++ b/docs/sidebarsCommunity.json @@ -0,0 +1,68 @@ +{ + "community": [ + "team", + "gitter", + "subscribe-mail-list", + "feedback", + { + "type": "category", + "label": "How to Contribute", + "items": [ + "how-to-contribute/how-to-contribute", + "how-to-contribute/contributor-guide", + "how-to-contribute/how-to-be-a-committer", + "how-to-contribute/commit-format-specification", + "how-to-contribute/pull-request", + "how-to-contribute/contribute-doc" + ] + }, + { + "type": "category", + "label": "Release Process & Verification", + "items": [ + "release-and-verify/release-prepare", + "release-and-verify/release-doris-core", + "release-and-verify/release-doris-connectors", + "release-and-verify/release-doris-manager", + "release-and-verify/release-complete", + "release-and-verify/release-verify" + ] + }, + "security", + { + "type": "category", + "label": "Design Documents", + "items": [ + "design/doris_storage_optimization", + "design/grouping_sets_design", + "design/metadata-design", + "design/spark_load" + ] + }, + { + "type": "category", + "label": "Developer Guide", + "items": [ + "developer-guide/debug-tool", + "developer-guide/docker-dev", + "developer-guide/benchmark-tool", + "developer-guide/fe-eclipse-dev", + "developer-guide/fe-idea-dev", + "developer-guide/fe-vscode-dev", + "developer-guide/be-vscode-dev", + "developer-guide/java-format-code", + "developer-guide/cpp-format-code", + "developer-guide/cpp-diagnostic-code", + "developer-guide/how-to-share-blogs", + "developer-guide/bitmap-hll-file-format", + "developer-guide/github-checks", + "developer-guide/regression-testing" + ] + }, + { + "type": "link", + "href": "https://cwiki.apache.org/confluence/display/DORIS/Doris+Improvement+Proposals", + "label" : "Improvement Proposals" + } + ] +} \ No newline at end of file diff --git a/docs/zh-CN/community/design/flink_doris_connector_design.md b/docs/zh-CN/community/design/flink_doris_connector_design.md deleted file mode 100644 index ca91982d37..0000000000 --- a/docs/zh-CN/community/design/flink_doris_connector_design.md +++ /dev/null @@ -1,272 +0,0 @@ ---- -{ - "title": "Flink Doris Connector设计方案", - "language": "zh-CN" -} - ---- - -<!-- -Licensed to the Apache Software Foundation (ASF) under one -or more contributor license agreements. See the NOTICE file -distributed with this work for additional information -regarding copyright ownership. The ASF licenses this file -to you under the Apache License, Version 2.0 (the -"License"); you may not use this file except in compliance -with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, -software distributed under the License is distributed on an -"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -KIND, either express or implied. See the License for the -specific language governing permissions and limitations -under - -# Flink Doris Connector设计方案 - -该方案首先感谢社区Spark Doris Connector的作者 - -从Doris角度看,将其数据引入Flink,可以使用Flink一系列丰富的生态产品,拓宽了产品的想象力,也使得Doris和其他数据源的联合查询成为可能 - -从我们业务架构出发和业务需求,我们选择了Flink作为我们架构的一部分,用于数据的ETL及实时计算框架,社区目前支持Spark doris connector,因此我们参照Spark doris connector 设计开发了Flink doris Connector。 - -## 技术选型 - -一开始我们选型的时候,也是和Spark Doris Connector 一样,开始考虑的是JDBC的方式,但是这种方式就像Spark doris connector那篇文章中说的,有优点,但是缺点更明显。后来我们阅读及测试了Spark的代码,决定站在巨人的肩上来实现(备注:直接拷贝代码修改)。 - -以下内容来自Spark Doris Connector博客的,直接拷贝了 - -``` -于是我们开发了针对Doris的新的Datasource,Spark-Doris-Connector。这种方案下,Doris可以暴露Doris数据分布给Spark。Spark的Driver访问Doris的FE获取Doris表的Schema和底层数据分布。之后,依据此数据分布,合理分配数据查询任务给Executors。最后,Spark的Executors分别访问不同的BE进行查询。大大提升了查询的效率 -``` - -## 使用方法 - -在Doris的代码库的 extension/flink-doris-connector/ 目录下编译生成doris-flink-1.0.0-SNAPSHOT.jar,将这个jar包加入flink的ClassPath中,即可使用Flink-on-Doris功能了 - -#### SQL方式 - -支持功能: - -1. 支持通过Flink SQL方式读取Doris数仓里表的数据到Flink里进行计算 -2. 支持通过Flink SQL将数据insert 到数仓对应的表中,后端实现是通过Stream Load直接和BE进行通讯完成数据插入操作 -3. 可以通过Flink关联非doris的外部数据源表进行关联分析 - -示例: - -```java - final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); - env.setParallelism(1); - final StreamTableEnvironment tEnv = StreamTableEnvironment.create(env); - tEnv.executeSql( - "CREATE TABLE test_aggregation01 (" + - "user_id STRING," + - "user_city STRING," + - "age INT," + - "last_visit_date STRING" + - ") " + - "WITH (\n" + - " 'connector' = 'doris',\n" + - " 'fenodes' = 'doris01:8030',\n" + - " 'table.identifier' = 'demo.test_aggregation',\n" + - " 'username' = 'root',\n" + - " 'password' = ''\n" + - ")"); - tEnv.executeSql( - "CREATE TABLE test_aggregation02 (" + - "user_id STRING," + - "user_city STRING," + - "age INT," + - "last_visit_date STRING" + - ") " + - "WITH (\n" + - " 'connector' = 'doris',\n" + - " 'fenodes' = 'doris01:8030',\n" + - " 'table.identifier' = 'demo.test_aggregation_01',\n" + - " 'username' = 'root',\n" + - " 'password' = ''\n" + - ")"); - - tEnv.executeSql("INSERT INTO test_aggregation02 select * from test_aggregation01"); - tEnv.executeSql("select count(1) from test_aggregation01"); -``` - -#### DataStream方式 - -```java -DorisOptions.Builder options = DorisOptions.builder() - .setFenodes("$YOUR_DORIS_FE_HOSTNAME:$YOUR_DORIS_FE_RESFUL_PORT") - .setUsername("$YOUR_DORIS_USERNAME") - .setPassword("$YOUR_DORIS_PASSWORD") - .setTableIdentifier("$YOUR_DORIS_DATABASE_NAME.$YOUR_DORIS_TABLE_NAME"); -env.addSource(new DorisSourceFunction<>(options.build(),new SimpleListDeserializationSchema())).print(); -``` - -## 适用场景 - - - -![1616987965864](/images/Flink-doris-connector.png) - - - -#### 1.使用Flink对Doris中的数据和其他数据源进行联合分析 - -很多业务部门会将自己的数据放在不同的存储系统上,比如一些在线分析、报表的数据放在Doris中,一些结构化检索数据放在Elasticsearch中、一些需要事物的数据放在MySQL中,等等。业务往往需要跨多个存储源进行分析,通过Flink Doris Connector打通Flink和Doris后,业务可以直接使用Flink,将Doris中的数据与多个外部数据源做联合查询计算。 - -#### 2.实时数据接入 - -Flink Doris Connector之前:针对业务不规则数据,经常需要针对消息做规范处理,空值过滤等写入新的topic,然后再启动Routine load写入Doris。 - -![1616988281677](/images/Flink-doris-connector1.png) - -Flink Doris Connector之后:flink读取kafka,直接写入doris。 - -![1616988514873](/images/Flink-doris-connector2.png) - - - -## 技术实现 - -### 架构图 - -![1616997396610](/images/Flink-doris-connector-architecture.png) - - - -### Doris对外提供更多能力 - -#### Doris FE - -对外开放了获取内部表的元数据信息、单表查询规划和部分统计信息的接口。 - -所有的Rest API接口都需要进行HttpBasic认证,用户名和密码是登录数据库的用户名和密码,需要注意权限的正确分配。 - -``` -// 获取表schema元信息 -GET api/{database}/{table}/_schema - -// 获取对单表的查询规划模版 -POST api/{database}/{table}/_query_plan -{ -"sql": "select k1, k2 from {database}.{table}" -} - -// 获取表大小 -GET api/{database}/{table}/_count -``` - -#### Doris BE - - -通过Thrift协议,直接对外提供数据的过滤、扫描和裁剪能力。 - -``` -service TDorisExternalService { - // 初始化查询执行器 -TScanOpenResult open_scanner(1: TScanOpenParams params); - -// 流式batch获取数据,Apache Arrow数据格式 - TScanBatchResult get_next(1: TScanNextBatchParams params); - -// 结束扫描 - TScanCloseResult close_scanner(1: TScanCloseParams params); -} -``` - -Thrift相关结构体定义可参考: - -https://github.com/apache/incubator-doris/blob/master/gensrc/thrift/DorisExternalService.thrift - - - -### 实现DataStream - -继承 org.apache.flink.streaming.api.functions.source.RichSourceFunction ,自定义DorisSourceFunction,初始化时,获取相关表的执行计划,获取对应的分区。 - -重写run方法,循环从分区中读取数据。 - -```java -public void run(SourceContext sourceContext){ - //循环读取各分区 - for(PartitionDefinition partitions : dorisPartitions){ - scalaValueReader = new ScalaValueReader(partitions, settings); - while (scalaValueReader.hasNext()){ - Object next = scalaValueReader.next(); - sourceContext.collect(next); - } - } -} -``` - - - -### 实现Flink SQL on Doris - -参考了[Flink自定义Source&Sink](https://ci.apache.org/projects/flink/flink-docs-stable/zh/dev/table/sourceSinks.html) 和 Flink-jdbc-connector,实现了下面的效果,可以实现用Flink SQL直接操作Doris的表,包括读和写。 - -#### 实现细节 - -1.实现DynamicTableSourceFactory , DynamicTableSinkFactory 注册 doris connector - -2.自定义DynamicTableSource和DynamicTableSink 生成逻辑计划 - -3.DorisRowDataInputFormat和DorisDynamicOutputFormat获取到逻辑计划后开始执行。 - -![1616747472136](/images/table_connectors.svg) - - - -实现中最主要的是基于RichInputFormat和RichOutputFormat 定制的DorisRowDataInputFormat和DorisDynamicOutputFormat。 - -在DorisRowDataInputFormat中,将获取到的dorisPartitions 在createInputSplits中 切分成多个分片,用于并行计算。 - -```java -public DorisTableInputSplit[] createInputSplits(int minNumSplits) { - List<DorisTableInputSplit> dorisSplits = new ArrayList<>(); - int splitNum = 0; - for (PartitionDefinition partition : dorisPartitions) { - dorisSplits.add(new DorisTableInputSplit(splitNum++,partition)); - } - return dorisSplits.toArray(new DorisTableInputSplit[0]); -} - - -public RowData nextRecord(RowData reuse) { - if (!hasNext) { - //已经读完数据,返回null - return null; - } - List next = (List)scalaValueReader.next(); - GenericRowData genericRowData = new GenericRowData(next.size()); - for(int i =0;i<next.size();i++){ - genericRowData.setField(i, next.get(i)); - } - //判断是否还有数据 - hasNext = scalaValueReader.hasNext(); - return genericRowData; -} - -``` - - - -在DorisRowDataOutputFormat中,通过streamload的方式向doris中写数据。streamload程序参考org.apache.doris.plugin.audit.DorisStreamLoader - -```java -public void writeRecord(RowData row) throws IOException { - //streamload 默认分隔符 \t - StringJoiner value = new StringJoiner("\t"); - GenericRowData rowData = (GenericRowData) row; - for(int i = 0; i < row.getArity(); ++i) { - value.add(rowData.getField(i).toString()); - } - //streamload 写数据 - DorisStreamLoad.LoadResponse loadResponse = dorisStreamLoad.loadBatch(value.toString()); - System.out.println(loadResponse); -} -``` - - diff --git a/docs/zh-CN/docs/advanced/alter-table/replace-table.md b/docs/zh-CN/docs/advanced/alter-table/replace-table.md new file mode 100644 index 0000000000..b17fa4ec2f --- /dev/null +++ b/docs/zh-CN/docs/advanced/alter-table/replace-table.md @@ -0,0 +1,71 @@ +--- +{ + "title": "替换表", + "language": "zh-CN" +} +--- + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +# 替换表 + +在 0.14 版本中,Doris 支持对两个表进行原子的替换操作。 该操作仅适用于 OLAP 表。 + +分区级别的替换操作,请参阅 [临时分区文档](../partition/table-tmp-partition.md) + +## 语法说明 + +```text +ALTER TABLE [db.]tbl1 REPLACE WITH TABLE tbl2 +[PROPERTIES('swap' = 'true')]; +``` + +将表 tbl1 替换为表 tbl2。 + +如果 `swap` 参数为 `true`,则替换后,名称为 `tbl1` 表中的数据为原 `tbl2` 表中的数据。而名称为 `tbl2` 表中的数据为原 `tbl1` 表中的数据。即两张表数据发生了互换。 + +如果 `swap` 参数为 `false`,则替换后,名称为 `tbl1` 表中的数据为原 `tbl2` 表中的数据。而名称为 `tbl2` 表被删除。 + +## 原理 + +替换表功能,实际上是将以下操作集合变成一个原子操作。 + +假设要将表 A 替换为表 B,且 `swap` 为 `true`,则操作如下: + +1. 将表 B 重名为表 A。 +2. 将表 A 重名为表 B。 + +如果 `swap` 为 `false`,则操作如下: + +1. 删除表 A。 +2. 将表 B 重名为表 A。 + +## 注意事项 + +1. `swap` 参数默认为 `true`。即替换表操作相当于将两张表数据进行交换。 +2. 如果设置 `swap` 参数为 `false`,则被替换的表(表A)将被删除,且无法恢复。 +3. 替换操作仅能发生在两张 OLAP 表之间,且不会检查两张表的表结构是否一致。 +4. 替换操作不会改变原有的权限设置。因为权限检查以表名称为准。 + +## 最佳实践 + +1. 原子的覆盖写操作 + + 某些情况下,用户希望能够重写某张表的数据,但如果采用先删除再导入的方式进行,在中间会有一段时间无法查看数据。这时,用户可以先使用 `CREATE TABLE LIKE` 语句创建一个相同结构的新表,将新的数据导入到新表后,通过替换操作,原子的替换旧表,以达到目的。分区级别的原子覆盖写操作,请参阅 [临时分区文档](../partition/table-tmp-partition.md)。 diff --git a/docs/zh-CN/docs/summary/system-architecture.md b/docs/zh-CN/docs/summary/system-architecture.md deleted file mode 100644 index fc57b16dee..0000000000 --- a/docs/zh-CN/docs/summary/system-architecture.md +++ /dev/null @@ -1,29 +0,0 @@ ---- -{ - "title": "系统架构", - "language": "zh-CN" -} ---- - -<!-- -Licensed to the Apache Software Foundation (ASF) under one -or more contributor license agreements. See the NOTICE file -distributed with this work for additional information -regarding copyright ownership. The ASF licenses this file -to you under the Apache License, Version 2.0 (the -"License"); you may not use this file except in compliance -with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, -software distributed under the License is distributed on an -"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -KIND, either express or implied. See the License for the -specific language governing permissions and limitations -under the License. ---> - -# Doris系统架构 - -(TODO) \ No newline at end of file --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org