(doris) branch master updated: [performance](load) do not copy input_block in memtable (#36939)

2024-06-29 Thread liaoxin
This is an automated email from the ASF dual-hosted git repository.

liaoxin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
 new 288fef58c09 [performance](load) do not copy input_block in memtable 
(#36939)
288fef58c09 is described below

commit 288fef58c0944ea967e2d0a2b6d8ac5dcb524302
Author: Kaijie Chen 
AuthorDate: Sat Jun 29 16:06:35 2024 +0800

[performance](load) do not copy input_block in memtable (#36939)

Pass `column_offset` to `Block::clone_without_columns()` and
`MutableBlock::add_rows()`,
so we do not need to copy `input_block` in `MemTable::insert()`.

To optimize performance in cases with many (e.g. 12k+) tablets:

Before:

```
-  DeltaWriterWriteTime:  1m58s
   -  MemTableWriterFlushTime:  0ns
   -  MemTableWriterLockTime:  1s41ms
   -  MemTableWriterShrinkTime:  0ns
   -  MemTableWriterWriteTime:  1m53s
  -  MemTableCopyCount:  12.044003M  (12044003)
  -  MemTableCopyTime:  42s857ms
  -  MemTableCopyTime0:  11s866ms
  -  MemTableInitTime:  13.384ms
  -  MemTableInsertTime:  51s543ms
```

After:

```
-  DeltaWriterWriteTime:  1m
   -  MemTableWriterFlushTime:  0ns
   -  MemTableWriterLockTime:  971.481ms
   -  MemTableWriterShrinkTime:  0ns
   -  MemTableWriterWriteTime:  55s274ms
  -  MemTableCopyCount:  0
  -  MemTableCopyTime:  0ns
  -  MemTableCopyTime0:  0ns
  -  MemTableInitTime:  18.746ms
  -  MemTableInsertTime:  53s772ms
```
---
 be/src/olap/memtable.cpp  | 14 ++
 be/src/vec/core/block.cpp | 25 ++---
 be/src/vec/core/block.h   |  5 +++--
 3 files changed, 27 insertions(+), 17 deletions(-)

diff --git a/be/src/olap/memtable.cpp b/be/src/olap/memtable.cpp
index 750026c289d..e55fd678bd2 100644
--- a/be/src/olap/memtable.cpp
+++ b/be/src/olap/memtable.cpp
@@ -180,16 +180,14 @@ int RowInBlockComparator::operator()(const RowInBlock* 
left, const RowInBlock* r
 
 Status MemTable::insert(const vectorized::Block* input_block,
 const std::vector& row_idxs) {
-vectorized::Block target_block = *input_block;
-target_block = input_block->copy_block(_column_offset);
 if (_is_first_insertion) {
 _is_first_insertion = false;
-auto cloneBlock = target_block.clone_without_columns();
+auto cloneBlock = input_block->clone_without_columns(&_column_offset);
 _input_mutable_block = 
vectorized::MutableBlock::build_mutable_block(&cloneBlock);
 _vec_row_comparator->set_block(&_input_mutable_block);
 _output_mutable_block = 
vectorized::MutableBlock::build_mutable_block(&cloneBlock);
 if (_keys_type != KeysType::DUP_KEYS) {
-_init_agg_functions(&target_block);
+_init_agg_functions(input_block);
 }
 if (_tablet_schema->has_sequence_col()) {
 if (_is_partial_update) {
@@ -210,11 +208,11 @@ Status MemTable::insert(const vectorized::Block* 
input_block,
 auto num_rows = row_idxs.size();
 size_t cursor_in_mutableblock = _input_mutable_block.rows();
 auto block_size0 = _input_mutable_block.allocated_bytes();
-RETURN_IF_ERROR(_input_mutable_block.add_rows(&target_block, 
row_idxs.data(),
-  row_idxs.data() + num_rows));
+RETURN_IF_ERROR(_input_mutable_block.add_rows(input_block, row_idxs.data(),
+  row_idxs.data() + num_rows, 
&_column_offset));
 auto block_size1 = _input_mutable_block.allocated_bytes();
 g_memtable_input_block_allocated_size << block_size1 - block_size0;
-auto input_size = size_t(target_block.bytes() * num_rows / 
target_block.rows() *
+auto input_size = size_t(input_block->bytes() * num_rows / 
input_block->rows() *
  config::memtable_insert_memory_ratio);
 _mem_usage += input_size;
 _insert_mem_tracker->consume(input_size);
@@ -348,7 +346,7 @@ Status MemTable::_sort_by_cluster_keys() {
 row_pos_vec.emplace_back(row_in_blocks[i]->_row_pos);
 }
 return _output_mutable_block.add_rows(&in_block, row_pos_vec.data(),
-  row_pos_vec.data() + 
in_block.rows());
+  row_pos_vec.data() + 
in_block.rows(), &_column_offset);
 }
 
 void MemTable::_sort_one_column(std::vector& row_in_blocks, Tie& 
tie,
diff --git a/be/src/vec/core/block.cpp b/be/src/vec/core/block.cpp
index 676674e8ec0..22062dc5310 100644
--- a/be/src/vec/core/block.cpp
+++ b/be/src/vec/core/block.cpp
@@ -653,12 +653,19 @@ Block Block::clone_with_columns(const Columns& columns) 
const {
 return res;
 }
 
-Block Block::clone_without_columns() const {
+Block Block::clone_without_columns(const std::vector* 

Error while running notifications feature from refs/heads/master:.asf.yaml in doris-website!

2024-06-29 Thread Apache Infrastructure


An error occurred while running notifications feature in .asf.yaml!:
Invalid notification target 'comm...@foo.apache.org'. Must be a valid 
@doris.apache.org list!


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris-website) branch master updated: Update term “湖仓一体” / “Data Lakehouse” in docs (#804)

2024-06-29 Thread morningman
This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris-website.git


The following commit(s) were added to refs/heads/master by this push:
 new 53b92c92a7 Update term “湖仓一体” / “Data Lakehouse” in docs (#804)
53b92c92a7 is described below

commit 53b92c92a71bfcac14297e327c244d529a31c980
Author: lishiqi_amy 
AuthorDate: Sat Jun 29 18:20:42 2024 +0800

Update term “湖仓一体” / “Data Lakehouse” in docs (#804)

Co-authored-by: Luzhijing <82810928+luzhij...@users.noreply.github.com>
---
 docs/faq/lakehouse-faq.md|  2 +-
 docs/get-starting/what-is-apache-doris.md|  2 +-
 docs/lakehouse/lakehouse-overview.md |  4 ++--
 docs/releasenotes/release-2.1.4.md   |  4 ++--
 .../current/get-starting/what-is-apache-doris.md |  4 ++--
 .../current/releasenotes/release-2.0.0.md|  4 ++--
 .../version-1.2/summary/basic-summary.md | 20 ++--
 .../version-2.0/get-starting/what-is-apache-doris.md |  2 +-
 .../version-2.0/releasenotes/release-2.0.0.md| 14 +++---
 .../version-2.1/get-starting/what-is-apache-doris.md |  2 +-
 10 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/docs/faq/lakehouse-faq.md b/docs/faq/lakehouse-faq.md
index a0fbdbd0cd..951c1b9daf 100644
--- a/docs/faq/lakehouse-faq.md
+++ b/docs/faq/lakehouse-faq.md
@@ -1,6 +1,6 @@
 ---
 {
-"title": "Lakehouse FAQ",
+"title": "Data Lakehouse FAQ",
 "language": "en"
 }
 ---
diff --git a/docs/get-starting/what-is-apache-doris.md 
b/docs/get-starting/what-is-apache-doris.md
index 006f6becbd..43550398a5 100644
--- a/docs/get-starting/what-is-apache-doris.md
+++ b/docs/get-starting/what-is-apache-doris.md
@@ -40,7 +40,7 @@ The figure below shows what Apache Doris can do in a data 
pipeline. Data sources
   - Reports for internal analysts and managers
   - Customer-facing reports: such as site analysis for website owners and 
advertising reports for advertisers. Such cases typically require high 
concurrency (thousands of QPS) and low query latency (measured in 
milliseconds). For example, the e-commerce giant JD.com uses Apache Doris for 
ad reporting. It ingests 10 billion rows of data per day and achieves over 
10,000 QPS and P99 latency of 150ms.
 - **Ad-hoc query**: analyst-facing self-service analytics with irregular query 
patterns and high throughput requirements. For example, Xiaomi builds a Growth 
Analytics platform based on Apache Doris. Handling 10,000s of SQL queries every 
day, it delivers an average query latency of 10 seconds and a P95 latency of 30 
seconds. 
-- **Lakehouse**: Apache Doris allows federated queries on external tables in 
offline data lakehouses such as Hive, Hudi, and Iceberg and achieves 
outstanding query performance by avoiding data copying.
+- **Data Lakehouse**: Apache Doris allows federated queries on external tables 
in offline data lakehouses such as Hive, Hudi, and Iceberg and achieves 
outstanding query performance by avoiding data copying.
 - **Log analysis**: Apache Doris supports inverted index and full-text search 
since version 2.0. Relying on its highly efficient query and storage engines, 
Apache Doris enables 10 times higher cost-effectiveness than common log 
analytic solutions.
 - **Unified data warehouse**: Apache Doris can work as a unified data 
processing platform for various analytic workloads, saving users from handling 
complicated data components and tech stacks. For example, Haidilao, a 
world-renowned chain restaurant, replaces its old architecture consisting of 
Spark, Hive, Kudu, HBase, and Phoenix with Apache Doris.
 
diff --git a/docs/lakehouse/lakehouse-overview.md 
b/docs/lakehouse/lakehouse-overview.md
index 96fdcc3d9b..f4c2bea70e 100644
--- a/docs/lakehouse/lakehouse-overview.md
+++ b/docs/lakehouse/lakehouse-overview.md
@@ -1,6 +1,6 @@
 ---
 {
-"title": "Lakehouse Overview",
+"title": "Data Lakehouse Overview",
 "language": "en"
 }
 ---
@@ -36,7 +36,7 @@ Data warehouses addresses the need for fast data analysis, 
while data lakes are
 
 ## Applicable scenarios
 
-We design the Doris lakehouse solution for the following four applicable 
scenarios:
+We design the Doris Data Lakehouse solution for the following four applicable 
scenarios:
 
 - Lakehouse query acceleration: As a highly efficient OLAP query engine, Doris 
has excellent MPP-based vectorized distributed query capabilities. Data lake 
analysis with Doris will benefit from the efficient query engine.
 
diff --git a/docs/releasenotes/release-2.1.4.md 
b/docs/releasenotes/release-2.1.4.md
index d04a8be0cf..3534e2008f 100644
--- a/docs/releasenotes/release-2.1.4.md
+++ b/docs/releasenotes/release-2.1.4.md
@@ -70,7 +70,7 @@ under the License.
 
 - The new optimizer fully supports point query functionality 
[#36205](https://github.com/apache/doris/pull/36205).
 
-

Error while running notifications feature from refs/heads/master:.asf.yaml in doris-website!

2024-06-29 Thread Apache Infrastructure


An error occurred while running notifications feature in .asf.yaml!:
Invalid notification target 'comm...@foo.apache.org'. Must be a valid 
@doris.apache.org list!


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris-website) branch master updated: [doc] Remove contents related to the MySQL Load function in 1.2 version (#803)

2024-06-29 Thread luzhijing
This is an automated email from the ASF dual-hosted git repository.

luzhijing pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris-website.git


The following commit(s) were added to refs/heads/master by this push:
 new 97b91a0d18 [doc] Remove contents related to the MySQL Load function in 
1.2 version (#803)
97b91a0d18 is described below

commit 97b91a0d188cd5ee601410244db2dc3c966a170d
Author: lishiqi_amy 
AuthorDate: Sat Jun 29 18:28:43 2024 +0800

[doc] Remove contents related to the MySQL Load function in 1.2 version 
(#803)
---
 .../import/import-scenes/local-file-load.md|  64 +
 .../import/import-way/mysql-load-manual.md | 155 -
 .../version-1.2/data-operate/import/load-manual.md |  34 +++--
 .../import/import-scenes/local-file-load.md|  59 +---
 .../import/import-way/mysql-load-manual.md | 152 
 .../version-1.2/data-operate/import/load-manual.md |   1 -
 versioned_sidebars/version-1.2-sidebars.json   |   1 -
 7 files changed, 27 insertions(+), 439 deletions(-)

diff --git 
a/i18n/zh-CN/docusaurus-plugin-content-docs/version-1.2/data-operate/import/import-scenes/local-file-load.md
 
b/i18n/zh-CN/docusaurus-plugin-content-docs/version-1.2/data-operate/import/import-scenes/local-file-load.md
index be023fe354..83eaf9f3bf 100644
--- 
a/i18n/zh-CN/docusaurus-plugin-content-docs/version-1.2/data-operate/import/import-scenes/local-file-load.md
+++ 
b/i18n/zh-CN/docusaurus-plugin-content-docs/version-1.2/data-operate/import/import-scenes/local-file-load.md
@@ -28,11 +28,10 @@ under the License.
 # 导入本地数据
 本文档主要介绍如何从客户端导入本地的数据。
 
-目前Doris支持两种从本地导入数据的模式:
-1. [Stream Load](../import-way/stream-load-manual.md)
-2. [MySql Load](../import-way/mysql-load-manual.md)
+目前 Doris 支持从本地导入数据的模式为 [Stream 
Load](../import-way/stream-load-manual.md)。下文介绍了使用 Stream Load 导入本地数据的方法。
+
+## 使用 Stream Load 导入本地数据
 
-## Stream Load
 Stream Load 用于将本地文件导入到 Doris 中。
 
 不同于其他命令的提交方式,Stream Load 是通过 HTTP 协议与 Doris 进行连接交互的。
@@ -44,7 +43,7 @@ Stream Load 用于将本地文件导入到 Doris 中。
 
 本文文档我们以 [curl](https://curl.se/docs/manpage.html) 命令为例演示如何进行数据导入。
 
-文档最后,我们给出一个使用 Java 导入数据的代码示例
+文档最后,我们给出一个使用 Java 导入数据的代码示例。
 
 ### 导入数据
 
@@ -78,14 +77,14 @@ PUT /api/{db}/{table}/_stream_load
```
 
- user:passwd 为在 Doris 中创建的用户。初始用户为 admin / root,密码初始状态下为空。
-   - host:port 为 BE 的 HTTP 协议端口,默认是 8040,可以在 Doris 集群 WEB UI页面查看。
+   - host:port 为 BE 的 HTTP 协议端口,默认是 8040,可以在 Doris 集群 WEB UI 页面查看。
- label: 可以在 Header 中指定 Label 唯一标识这个导入任务。
 
关于 Stream Load 命令的更多高级操作,请参阅 [Stream 
Load](../../../sql-manual/sql-reference/Data-Manipulation-Statements/Load/STREAM-LOAD.md)
 命令文档。
 
 3. 等待导入结果
 
-   Stream Load 命令是同步命令,返回成功即表示导入成功。如果导入数据较大,可能需要较长的等待时间。示例如下:
+   Stream Load 命令是同步命令,返回成功即表示导入成功。如果导入数据较大,可能需要较长的等待时间。示例如下:
 
```json
{
@@ -217,7 +216,7 @@ public class DorisStreamLoader {
 
 
 
->注意:这里 http client 的版本要是4.5.13
+>注意:这里 http client 的版本要是 4.5.13
 > ```xml
 >
 >org.apache.httpcomponents
@@ -225,52 +224,3 @@ public class DorisStreamLoader {
 >4.5.13
 >
 > ```
-
-## MySql LOAD
-
-MySql LOAD样例
-
-
-### 导入数据
-1. 创建一张表
-
-   通过 `CREATE TABLE` 命令在`demo`创建一张表用于存储待导入的数据
-
-   ```sql
-   CREATE TABLE IF NOT EXISTS load_local_file_test
-   (
-   id INT,
-   age TINYINT,
-   name VARCHAR(50)
-   )
-   unique key(id)
-   DISTRIBUTED BY HASH(id) BUCKETS 3;
-   ```
-
-2. 导入数据
-   在MySql客户端下执行以下 SQL 命令导入本地文件:
-
-   ```sql
-   LOAD DATA
-   LOCAL
-   INFILE '/path/to/local/demo.txt'
-   INTO TABLE demo.load_local_file_test
-   ```
-
-   关于 MySQL Load 命令的更多高级操作,请参阅 [MySQL 
Load](../../../sql-manual/sql-reference/Data-Manipulation-Statements/Load/MYSQL-LOAD.md)
 命令文档。
-
-3. 等待导入结果
-
-   MySql Load 命令是同步命令,返回成功即表示导入成功。如果导入数据较大,可能需要较长的等待时间。示例如下:
-
-   ```text
-   Query OK, 1 row affected (0.17 sec)
-   Records: 1  Deleted: 0  Skipped: 0  Warnings: 0
-   ```
-
-   - 如果出现上述结果, 则表示导入成功。导入失败, 会抛出错误,并在客户端显示错误原因
-   - 其他字段的详细介绍,请参阅 [MySQL 
Load](../../../sql-manual/sql-reference/Data-Manipulation-Statements/Load/MYSQL-LOAD.md)
 命令文档。
-
-### 导入建议
-- MySql Load 只能导入本地文件(可以是客户端本地或者连接的FE节点本地), 而且支持CSV格式。
-- 建议一个导入请求的数据量控制在 1 - 2 GB 以内。如果有大量本地文件,可以分批并发提交。
diff --git 
a/i18n/zh-CN/docusaurus-plugin-content-docs/version-1.2/data-operate/import/import-way/mysql-load-manual.md
 
b/i18n/zh-CN/docusaurus-plugin-content-docs/version-1.2/data-operate/import/import-way/mysql-load-manual.md
deleted file mode 100644
index 8c316d599f..00
--- 
a/i18n/zh-CN/docusaurus-plugin-content-docs/version-1.2/data-operate/import/import-way/mysql-load-manual.md
+++ /dev/null
@@ -1,155 +0,0 @@

-{
-"title": "MySql load",
-"language": "zh-CN"
-}

-
-
-
-# MySql load
-
-
-该语句兼容MySQL标准的[LOAD 
DATA](https://dev.mysql.com/doc/refman/8.0/en/load-data.html)语法,方便用户导入本地数据,并降低学习成本。
-
-MySql load 同步执行导入并返回导入结果。用户可直接通过SQL返回信息判断本次导入是否成功。
-
-MySql load 主要适用于导入客户端本地文件,或通过程序导入数据流中的数据。
-
-
-
-## 基本原理
-
-MySql Load和Stream Load功能相似, 都是导入本地文件到Doris集群中, 因此

(doris) branch branch-2.1 updated: [branch-2.1][improvement](mysql catalog) disable mysql AbandonedConnectionCleanupThread (#36970)

2024-06-29 Thread morningman
This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
 new e25b0d7c378 [branch-2.1][improvement](mysql catalog) disable mysql 
AbandonedConnectionCleanupThread  (#36970)
e25b0d7c378 is described below

commit e25b0d7c378a3287d5bd5ca173d3448a8d09abab
Author: zy-kkk 
AuthorDate: Sat Jun 29 18:35:41 2024 +0800

[branch-2.1][improvement](mysql catalog) disable mysql 
AbandonedConnectionCleanupThread  (#36970)

pick (#36655)
---
 .../src/main/java/org/apache/doris/jdbc/MySQLJdbcExecutor.java  | 1 +
 .../java/org/apache/doris/datasource/jdbc/client/JdbcMySQLClient.java   | 2 ++
 2 files changed, 3 insertions(+)

diff --git 
a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/MySQLJdbcExecutor.java
 
b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/MySQLJdbcExecutor.java
index 6a7e2affd58..5cdd30a9751 100644
--- 
a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/MySQLJdbcExecutor.java
+++ 
b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/MySQLJdbcExecutor.java
@@ -48,6 +48,7 @@ public class MySQLJdbcExecutor extends BaseJdbcExecutor {
 
 public MySQLJdbcExecutor(byte[] thriftParams) throws Exception {
 super(thriftParams);
+System.setProperty("com.mysql.cj.disableAbandonedConnectionCleanup", 
"true");
 }
 
 @Override
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/client/JdbcMySQLClient.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/client/JdbcMySQLClient.java
index efb69d8003f..f58753bccee 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/client/JdbcMySQLClient.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/client/JdbcMySQLClient.java
@@ -45,6 +45,8 @@ public class JdbcMySQLClient extends JdbcClient {
 
 protected JdbcMySQLClient(JdbcClientConfig jdbcClientConfig) {
 super(jdbcClientConfig);
+// Disable abandoned connection cleanup
+System.setProperty("com.mysql.cj.disableAbandonedConnectionCleanup", 
"true");
 convertDateToNull = isConvertDatetimeToNull(jdbcClientConfig);
 Connection conn = null;
 Statement stmt = null;


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



Error while running notifications feature from refs/heads/master:.asf.yaml in doris-website!

2024-06-29 Thread Apache Infrastructure


An error occurred while running notifications feature in .asf.yaml!:
Invalid notification target 'comm...@foo.apache.org'. Must be a valid 
@doris.apache.org list!


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris-website) branch master updated: [lakehouse] modify the title of docs (#806)

2024-06-29 Thread morningman
This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris-website.git


The following commit(s) were added to refs/heads/master by this push:
 new 618ebdb2a0 [lakehouse] modify the title of docs (#806)
618ebdb2a0 is described below

commit 618ebdb2a0c5f1114aa69300afa656cac9ce1315
Author: Mingyu Chen 
AuthorDate: Sat Jun 29 19:48:53 2024 +0800

[lakehouse] modify the title of docs (#806)
---
 docs/lakehouse/datalake-analytics/hive.md  |  4 +---
 docs/lakehouse/datalake-analytics/hudi.md  |  4 +---
 docs/lakehouse/datalake-analytics/iceberg.md   |  4 +---
 docs/lakehouse/datalake-analytics/lakesoul.md  |  4 ++--
 docs/lakehouse/datalake-analytics/paimon.md|  4 +---
 docs/lakehouse/file.md |  2 +-
 i18n/zh-CN/docusaurus-plugin-content-docs/current.json | 14 +++---
 .../current/lakehouse/datalake-analytics/hive.md   |  4 +---
 .../current/lakehouse/datalake-analytics/hudi.md   |  5 +
 .../current/lakehouse/datalake-analytics/iceberg.md|  5 +
 .../current/lakehouse/datalake-analytics/lakesoul.md   |  4 ++--
 .../current/lakehouse/datalake-analytics/paimon.md |  4 +---
 .../current/lakehouse/file.md  |  2 +-
 i18n/zh-CN/docusaurus-plugin-content-docs/version-2.0.json | 10 +-
 .../version-2.0/lakehouse/datalake/hive.md |  2 +-
 .../version-2.0/lakehouse/datalake/hudi.md |  5 +
 .../version-2.0/lakehouse/datalake/iceberg.md  |  5 +
 .../version-2.0/lakehouse/datalake/paimon.md   |  6 ++
 .../version-2.0/lakehouse/file.md  |  4 +---
 i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1.json | 14 +++---
 .../version-2.1/lakehouse/datalake-analytics/hive.md   |  4 +---
 .../version-2.1/lakehouse/datalake-analytics/hudi.md   |  5 +
 .../version-2.1/lakehouse/datalake-analytics/iceberg.md|  5 +
 .../version-2.1/lakehouse/datalake-analytics/paimon.md |  5 ++---
 .../version-2.1/lakehouse/file.md  |  2 +-
 sidebars.json  |  8 
 versioned_docs/version-2.0/lakehouse/datalake/hive.md  |  4 +---
 versioned_docs/version-2.0/lakehouse/datalake/hudi.md  |  5 +
 versioned_docs/version-2.0/lakehouse/datalake/iceberg.md   |  5 +
 versioned_docs/version-2.0/lakehouse/datalake/paimon.md|  7 ++-
 versioned_docs/version-2.0/lakehouse/file.md   |  6 ++
 .../version-2.1/lakehouse/datalake-analytics/hive.md   |  4 +---
 .../version-2.1/lakehouse/datalake-analytics/hudi.md   |  4 +---
 .../version-2.1/lakehouse/datalake-analytics/iceberg.md|  5 +
 .../version-2.1/lakehouse/datalake-analytics/paimon.md |  6 ++
 versioned_docs/version-2.1/lakehouse/file.md   |  2 +-
 versioned_sidebars/version-2.0-sidebars.json   |  4 ++--
 versioned_sidebars/version-2.1-sidebars.json   |  8 
 38 files changed, 68 insertions(+), 127 deletions(-)

diff --git a/docs/lakehouse/datalake-analytics/hive.md 
b/docs/lakehouse/datalake-analytics/hive.md
index 900b800df8..ab0830d830 100644
--- a/docs/lakehouse/datalake-analytics/hive.md
+++ b/docs/lakehouse/datalake-analytics/hive.md
@@ -1,6 +1,6 @@
 ---
 {
-"title": "Hive",
+"title": "Hive Catalog",
 "language": "en"
 }
 ---
@@ -24,8 +24,6 @@ specific language governing permissions and limitations
 under the License.
 -->
 
-# Hive
-
 By connecting to Hive Metastore, or a metadata service compatible with Hive 
Metatore, Doris can automatically obtain Hive database table information and 
perform data queries.
 
 In addition to Hive, many other systems also use the Hive Metastore to store 
metadata. So through Hive Catalog, we can not only access Hive, but also access 
systems that use Hive Metastore as metadata storage. Such as Iceberg, Hudi, etc.
diff --git a/docs/lakehouse/datalake-analytics/hudi.md 
b/docs/lakehouse/datalake-analytics/hudi.md
index e201c11e07..c986710d49 100644
--- a/docs/lakehouse/datalake-analytics/hudi.md
+++ b/docs/lakehouse/datalake-analytics/hudi.md
@@ -1,6 +1,6 @@
 ---
 {
-"title": "Hudi",
+"title": "Hudi Catalog",
 "language": "en"
 }
 ---
@@ -25,8 +25,6 @@ under the License.
 -->
 
 
-# Hudi
-
 ## Usage
 
 1. The query types supported by the Hudi table are as follows, and the 
Incremental Query will be supported in the future.
diff --git a/docs/lakehouse/datalake-analytics/iceberg.md 
b/docs/lakehouse/datalake-analytics/iceberg.md
index 48d3da31d7..01f62739a2 100644
--- a/docs/lakehouse/datalake-analytics/iceberg.md
+++ b/docs/lakehouse/datalake-analytics/iceberg.md
@@ -1,6 +1,6 @@
 ---
 {
-"title": "Iceberg",
+"title": "Iceberg Catalog",
  

(doris-thirdparty) branch clucene-2.0 updated: [opt](position) add position iterator interface (#229)

2024-06-29 Thread kxiao
This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch clucene-2.0
in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git


The following commit(s) were added to refs/heads/clucene-2.0 by this push:
 new d83b162869 [opt](position) add position iterator interface (#229)
d83b162869 is described below

commit d83b1628694a22c7bc7e12e0d8511363b1b770bd
Author: zzzxl <33418555+zzzxl1...@users.noreply.github.com>
AuthorDate: Sat Jun 29 23:32:00 2024 +0800

[opt](position) add position iterator interface (#229)
---
 src/core/CLucene/index/SegmentTermDocs.cpp |  4 +--
 src/core/CLucene/index/_SegmentHeader.h|  4 +--
 src/core/CLucene/search/query/DcoIdSetIterator.h   | 16 
 src/core/CLucene/search/query/TermIterator.h   | 29 --
 .../CLucene/search/query/TermPositionIterator.h| 23 +
 src/core/CMakeLists.txt|  2 +-
 6 files changed, 44 insertions(+), 34 deletions(-)

diff --git a/src/core/CLucene/index/SegmentTermDocs.cpp 
b/src/core/CLucene/index/SegmentTermDocs.cpp
index 9108f1dfd5..e346dc0ca2 100644
--- a/src/core/CLucene/index/SegmentTermDocs.cpp
+++ b/src/core/CLucene/index/SegmentTermDocs.cpp
@@ -19,7 +19,7 @@
 CL_NS_DEF(index)
 
 SegmentTermDocs::SegmentTermDocs(const SegmentReader *_parent) : 
parent(_parent), freqStream(_parent->freqStream->clone()),
- count(0), 
df(0), deletedDocs(_parent->deletedDocs), _doc(0), _freq(0), 
skipInterval(_parent->tis->getSkipInterval()),
+ count(0), 
df(0), deletedDocs(_parent->deletedDocs), _doc(-1), _freq(0), 
skipInterval(_parent->tis->getSkipInterval()),
  
maxSkipLevels(_parent->tis->getMaxSkipLevels()), skipListReader(NULL), 
freqBasePointer(0), proxBasePointer(0),
  
skipPointer(0), haveSkipped(false), pointer(0), pointerMax(0), 
indexVersion_(_parent->_fieldInfos->getIndexVersion()),
  
hasProx(_parent->_fieldInfos->hasProx()), buffer_(freqStream, hasProx, 
indexVersion_) {
@@ -73,7 +73,7 @@ void SegmentTermDocs::seek(const TermInfo *ti, Term *term) {
 df = 0;
 } else {// punt case
 df = ti->docFreq;
-_doc = 0;
+_doc = -1;
 freqBasePointer = ti->freqPointer;
 proxBasePointer = ti->proxPointer;
 skipPointer = freqBasePointer + ti->skipOffset;
diff --git a/src/core/CLucene/index/_SegmentHeader.h 
b/src/core/CLucene/index/_SegmentHeader.h
index bf988a2f27..c1f01e7cec 100644
--- a/src/core/CLucene/index/_SegmentHeader.h
+++ b/src/core/CLucene/index/_SegmentHeader.h
@@ -93,8 +93,8 @@ protected:
   int32_t count;
   int32_t df;
   CL_NS(util)::BitSet* deletedDocs;
-  int32_t _doc;
-  int32_t _freq;
+  int32_t _doc = -1;
+  int32_t _freq = 0;
   int32_t docs[PFOR_BLOCK_SIZE]; // buffered doc numbers
   int32_t freqs[PFOR_BLOCK_SIZE];// buffered term freqs
   int32_t pointer;
diff --git a/src/core/CLucene/search/query/DcoIdSetIterator.h 
b/src/core/CLucene/search/query/DcoIdSetIterator.h
deleted file mode 100644
index 88aa431357..00
--- a/src/core/CLucene/search/query/DcoIdSetIterator.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#pragma once
-
-#include "CLucene/index/DocRange.h"
-
-class DocIdSetIterator {
-public:
-  DocIdSetIterator() = default;
-  virtual ~DocIdSetIterator() = default;
-
-  virtual int32_t docID() = 0;
-  virtual int32_t nextDoc() = 0;
-  virtual int32_t advance(int32_t target) = 0;
-
-  virtual int32_t docFreq() const = 0;
-  virtual bool readRange(DocRange* docRange) const = 0;
-};
\ No newline at end of file
diff --git a/src/core/CLucene/search/query/TermIterator.h 
b/src/core/CLucene/search/query/TermIterator.h
index e0cf23a4fb..3eb22a254d 100644
--- a/src/core/CLucene/search/query/TermIterator.h
+++ b/src/core/CLucene/search/query/TermIterator.h
@@ -1,51 +1,54 @@
 #pragma once
 
-#include "CLucene/search/query/DcoIdSetIterator.h"
 #include "CLucene/index/Terms.h"
 
 #include 
+#include 
 
 CL_NS_USE(index)
 
-class TermIterator : public DocIdSetIterator {
+class TermIterator {
 public:
   TermIterator() = default;
-  TermIterator(TermDocs* termDocs) : termDocs_(termDocs) {
+  TermIterator(TermDocs* termDocs) 
+: termDocs_(termDocs) {
   }
 
-  virtual ~TermIterator() = default;
-
-  bool isEmpty() {
+  inline bool isEmpty() const {
 return termDocs_ == nullptr;
   }
 
-  int32_t docID() override {
-uint32_t docId = termDocs_->doc();
+  inline int32_t docID() const {
+int32_t docId = termDocs_->doc();
 return docId >= INT_MAX ? INT_MAX : docId;
   }
 
-  int32_t nextDoc() override {
+  inline int32_t freq() const {
+return termDocs_->freq();
+  }
+
+  inline int32_t nextDo

(doris-website) branch asf-site updated (befa8feeb6 -> e4f7477f0e)

2024-06-29 Thread github-bot
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a change to branch asf-site
in repository https://gitbox.apache.org/repos/asf/doris-website.git


 discard befa8feeb6 Automated deployment with doris branch master@ 
618ebdb2a0c5f1114aa69300afa656cac9ce1315
 new e4f7477f0e Automated deployment with doris branch @ 
618ebdb2a0c5f1114aa69300afa656cac9ce1315

This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version.  This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:

 * -- * -- B -- O -- O -- O   (befa8feeb6)
\
 N -- N -- N   refs/heads/asf-site (e4f7477f0e)

You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.

Any revisions marked "omit" are not gone; other references still
refer to them.  Any revisions marked "discard" are gone forever.

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 docs/1.2/search-index.json   | 2 +-
 docs/2.0/search-index.json   | 2 +-
 docs/dev/search-index.json   | 2 +-
 search-index.json| 2 +-
 zh-CN/docs/1.2/search-index.json | 2 +-
 zh-CN/docs/2.0/search-index.json | 2 +-
 zh-CN/docs/dev/search-index.json | 2 +-
 zh-CN/search-index.json  | 2 +-
 8 files changed, 8 insertions(+), 8 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) branch master updated: Revert "[enhancement](compaction) optimizing memory usage for compaction (#36492)" (#37032)

2024-06-29 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
 new 4e165dc7ce1 Revert "[enhancement](compaction) optimizing memory usage 
for compaction (#36492)" (#37032)
4e165dc7ce1 is described below

commit 4e165dc7ce15f17e0c72ae5ea0c1caf29bdfc157
Author: Yongqiang YANG <98214048+dataroar...@users.noreply.github.com>
AuthorDate: Sun Jun 30 11:08:22 2024 +0800

Revert "[enhancement](compaction) optimizing memory usage for compaction 
(#36492)" (#37032)

This reverts commit 99901814d8b90887f54b1768b98b4f0b78fab376.
---
 be/src/cloud/cloud_base_compaction.cpp |  10 --
 be/src/cloud/cloud_cumulative_compaction.cpp   |  13 +-
 be/src/common/config.cpp   |   6 -
 be/src/common/config.h |   6 -
 be/src/olap/base_compaction.cpp|  10 --
 be/src/olap/base_tablet.h  |   5 -
 be/src/olap/compaction.cpp |  15 +--
 be/src/olap/compaction.h   |   2 -
 be/src/olap/cumulative_compaction.cpp  |  15 +--
 be/src/olap/iterators.h|  15 +--
 be/src/olap/merger.cpp |  67 +-
 be/src/olap/merger.h   |   6 +-
 be/src/olap/rowset/rowset_meta.h   |  15 ---
 be/src/olap/rowset/segcompaction.cpp   |   2 +-
 be/src/olap/tablet_reader.h|   2 -
 be/src/vec/olap/vertical_block_reader.cpp  |  18 +--
 be/src/vec/olap/vertical_block_reader.h|   3 +-
 be/src/vec/olap/vertical_merge_iterator.cpp|  29 ++---
 be/src/vec/olap/vertical_merge_iterator.h  |  25 +---
 be/test/olap/base_compaction_test.cpp  |  84 -
 be/test/olap/rowid_conversion_test.cpp |   6 +-
 be/test/vec/olap/vertical_compaction_test.cpp  |  14 +--
 .../compaction_width_array_column.groovy   | 137 -
 23 files changed, 42 insertions(+), 463 deletions(-)

diff --git a/be/src/cloud/cloud_base_compaction.cpp 
b/be/src/cloud/cloud_base_compaction.cpp
index 4ceab8eb6e3..d4a86743a48 100644
--- a/be/src/cloud/cloud_base_compaction.cpp
+++ b/be/src/cloud/cloud_base_compaction.cpp
@@ -163,16 +163,6 @@ Status CloudBaseCompaction::pick_rowsets_to_compact() {
 return Status::Error("no suitable versions for 
compaction");
 }
 
-int score = 0;
-int rowset_cnt = 0;
-while (rowset_cnt < _input_rowsets.size()) {
-score += 
_input_rowsets[rowset_cnt++]->rowset_meta()->get_compaction_score();
-if (score > config::base_compaction_max_compaction_score) {
-break;
-}
-}
-_input_rowsets.resize(rowset_cnt);
-
 // 1. cumulative rowset must reach base_compaction_min_rowset_num threshold
 if (_input_rowsets.size() > config::base_compaction_min_rowset_num) {
 VLOG_NOTICE << "satisfy the base compaction policy. tablet=" << 
_tablet->tablet_id()
diff --git a/be/src/cloud/cloud_cumulative_compaction.cpp 
b/be/src/cloud/cloud_cumulative_compaction.cpp
index 2a26b1b294b..de318f979a5 100644
--- a/be/src/cloud/cloud_cumulative_compaction.cpp
+++ b/be/src/cloud/cloud_cumulative_compaction.cpp
@@ -354,20 +354,11 @@ Status 
CloudCumulativeCompaction::pick_rowsets_to_compact() {
 return st;
 }
 
-int64_t max_score = config::cumulative_compaction_max_deltas;
-auto process_memory_usage = 
doris::GlobalMemoryArbitrator::process_memory_usage();
-bool memory_usage_high = process_memory_usage > MemInfo::soft_mem_limit() 
* 0.8;
-if 
(cloud_tablet()->last_compaction_status.is() ||
-memory_usage_high) {
-max_score = std::max(config::cumulative_compaction_max_deltas /
- 
config::cumulative_compaction_max_deltas_factor,
- config::cumulative_compaction_min_deltas + 1);
-}
-
 size_t compaction_score = 0;
 auto compaction_policy = 
cloud_tablet()->tablet_meta()->compaction_policy();
 _engine.cumu_compaction_policy(compaction_policy)
-->pick_input_rowsets(cloud_tablet(), candidate_rowsets, max_score,
+->pick_input_rowsets(cloud_tablet(), candidate_rowsets,
+ config::cumulative_compaction_max_deltas,
  config::cumulative_compaction_min_deltas, 
&_input_rowsets,
  &_last_delete_version, &compaction_score);
 
diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index 3e948f4cca2..580793d36ab 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -385,7 +385,6 @@ DEFINE_mInt32(max_single_replica_compaction_threads, "-1");
 
 DEFINE_Bool(enable_base_compaction_idle_sched, "true"

(doris) branch branch-2.1 updated: [fix](map)fix upgrade behavior from 1.2 version #36635 (#36983)

2024-06-29 Thread kxiao
This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
 new 021ed1d6905 [fix](map)fix upgrade behavior from 1.2 version #36635 
(#36983)
021ed1d6905 is described below

commit 021ed1d6905a12488d44492159b87b149c8f42a8
Author: amory 
AuthorDate: Sun Jun 30 12:11:03 2024 +0800

[fix](map)fix upgrade behavior from 1.2 version #36635 (#36983)
---
 be/src/olap/rowset/segment_v2/column_reader.cpp | 1 +
 be/src/olap/rowset/segment_v2/column_writer.cpp | 5 +
 be/src/olap/tablet_schema.cpp   | 6 --
 3 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp 
b/be/src/olap/rowset/segment_v2/column_reader.cpp
index 24b755d70cd..f6f131f9ad9 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/column_reader.cpp
@@ -144,6 +144,7 @@ Status ColumnReader::create(const ColumnReaderOptions& 
opts, const ColumnMetaPB&
 }
 case FieldType::OLAP_FIELD_TYPE_MAP: {
 // map reader now has 3 sub readers for key, value, 
offsets(scalar), null(scala)
+DCHECK(meta.children_columns_size() == 3 || 
meta.children_columns_size() == 4);
 std::unique_ptr key_reader;
 RETURN_IF_ERROR(ColumnReader::create(opts, 
meta.children_columns(0),
  
meta.children_columns(0).num_rows(), file_reader,
diff --git a/be/src/olap/rowset/segment_v2/column_writer.cpp 
b/be/src/olap/rowset/segment_v2/column_writer.cpp
index dee0d520d1f..e9315f7a220 100644
--- a/be/src/olap/rowset/segment_v2/column_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/column_writer.cpp
@@ -260,6 +260,11 @@ Status ColumnWriter::create(const ColumnWriterOptions& 
opts, const TabletColumn*
 }
 case FieldType::OLAP_FIELD_TYPE_MAP: {
 DCHECK(column->get_subtype_count() == 2);
+if (column->get_subtype_count() < 2) {
+return Status::InternalError(
+"If you upgraded from version 1.2.*, please DROP the 
MAP columns and then "
+"ADD the MAP columns back.");
+}
 // create key & value writer
 std::vector> inner_writer_list;
 for (int i = 0; i < 2; ++i) {
diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp
index 290e5a6bc25..0900c6d8d40 100644
--- a/be/src/olap/tablet_schema.cpp
+++ b/be/src/olap/tablet_schema.cpp
@@ -550,7 +550,8 @@ void TabletColumn::init_from_pb(const ColumnPB& column) {
 CHECK(column.children_columns_size() == 1) << "ARRAY type has more 
than 1 children types.";
 }
 if (_type == FieldType::OLAP_FIELD_TYPE_MAP) {
-CHECK(column.children_columns_size() == 2) << "MAP type has more than 
2 children types.";
+DCHECK(column.children_columns_size() == 2) << "MAP type has more than 
2 children types.";
+LOG(WARNING) << "MAP type has more than 2 children types.";
 }
 for (size_t i = 0; i < column.children_columns_size(); i++) {
 TabletColumn child_column;
@@ -615,7 +616,8 @@ void TabletColumn::to_schema_pb(ColumnPB* column) const {
 CHECK(_sub_columns.size() == 1) << "ARRAY type has more than 1 
children types.";
 }
 if (_type == FieldType::OLAP_FIELD_TYPE_MAP) {
-CHECK(_sub_columns.size() == 2) << "MAP type has more than 2 children 
types.";
+DCHECK(_sub_columns.size() == 2) << "MAP type has more than 2 children 
types.";
+LOG(WARNING) << "MAP type has more than 2 children types.";
 }
 
 for (size_t i = 0; i < _sub_columns.size(); i++) {


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) branch branch-2.1 updated (021ed1d6905 -> d237a4d303e)

2024-06-29 Thread kxiao
This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a change to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


from 021ed1d6905 [fix](map)fix upgrade behavior from 1.2 version #36635 
(#36983)
 add d237a4d303e [fix](array)fix array_except/union for left const return 
only one row result #36776 (#36986)

No new revisions were added by this update.

Summary of changes:
 be/src/vec/functions/array/function_array_set.h  |  6 +-
 .../sql_functions/array_functions/test_array_functions.out   | 12 
 .../array_functions/test_array_functions.groovy  |  3 +++
 3 files changed, 20 insertions(+), 1 deletion(-)


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris-website) branch asf-site updated (e4f7477f0e -> 2ce1141979)

2024-06-29 Thread github-bot
This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a change to branch asf-site
in repository https://gitbox.apache.org/repos/asf/doris-website.git


 discard e4f7477f0e Automated deployment with doris branch @ 
618ebdb2a0c5f1114aa69300afa656cac9ce1315
 new 2ce1141979 Automated deployment with doris branch @ 
618ebdb2a0c5f1114aa69300afa656cac9ce1315

This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version.  This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:

 * -- * -- B -- O -- O -- O   (e4f7477f0e)
\
 N -- N -- N   refs/heads/asf-site (2ce1141979)

You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.

Any revisions marked "omit" are not gone; other references still
refer to them.  Any revisions marked "discard" are gone forever.

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 docs/1.2/search-index.json   | 2 +-
 docs/2.0/search-index.json   | 2 +-
 docs/dev/search-index.json   | 2 +-
 search-index.json| 2 +-
 zh-CN/docs/1.2/search-index.json | 2 +-
 zh-CN/docs/2.0/search-index.json | 2 +-
 zh-CN/docs/dev/search-index.json | 2 +-
 zh-CN/search-index.json  | 2 +-
 8 files changed, 8 insertions(+), 8 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) branch master updated: [fix](cgroup memory) Correct cgroup mem info cache (#36966)

2024-06-29 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
 new b61a1a8ae7b [fix](cgroup memory) Correct cgroup mem info cache (#36966)
b61a1a8ae7b is described below

commit b61a1a8ae7b1bdffdd54fc14b9cfef0c6abed0a4
Author: Hongkun Xu 
AuthorDate: Sun Jun 30 12:56:14 2024 +0800

[fix](cgroup memory) Correct cgroup mem info cache (#36966)

## Proposed changes
After upgrading to Doris 2.1.3, we noticed that the "sys available
memory" in be.INFO continuously decreases until it falls below the
warning water mark, leading to persistent garbage collection (GC)
despite the actual memory usage being very low. And The cache in cgroup
mem info is always 0. Consequently, I identified an error in the
calculation of available memory in cgroup memory:

1. The memory information for cgroup memory is stored in the file
"memory.stat" rather than "memory.meminfo" (in fact, the
"memory.meminfo" file does not exist). You can see the files under the
cgroup path in the attached screenshot1.
2. The output content of "memory.stat" is shown in the screenshot1
below.

https://github.com/apache/doris/assets/38196564/e654322e-9bf4-4f5e-951f-99e101ebbf47";>
https://github.com/apache/doris/assets/38196564/02cf8899-7618-4d5f-bf59-68fa0c90ebf2";>



My change is about two steps:
1. Modified the file name for mem info in cgroup.
2. Modified the process for extracting the cache from cgroup.

Co-authored-by: Xinyi Zou 
---
 be/src/util/cgroup_util.cpp | 2 +-
 be/src/util/mem_info.cpp| 8 
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/be/src/util/cgroup_util.cpp b/be/src/util/cgroup_util.cpp
index a2c3e294e66..9ad78696a6f 100644
--- a/be/src/util/cgroup_util.cpp
+++ b/be/src/util/cgroup_util.cpp
@@ -184,7 +184,7 @@ Status CGroupUtil::find_cgroup_mem_info(std::string* 
file_path) {
 }
 string cgroup_path;
 RETURN_IF_ERROR(find_abs_cgroup_path("memory", &cgroup_path));
-*file_path = cgroup_path + "/memory.meminfo";
+*file_path = cgroup_path + "/memory.stat";
 return Status::OK();
 }
 
diff --git a/be/src/util/mem_info.cpp b/be/src/util/mem_info.cpp
index fc5d5512f1a..45e609d7100 100644
--- a/be/src/util/mem_info.cpp
+++ b/be/src/util/mem_info.cpp
@@ -154,7 +154,7 @@ void MemInfo::refresh_proc_meminfo() {
 if (fields.size() < 2) {
 continue;
 }
-std::string key = fields[0].substr(0, fields[0].size() - 1);
+std::string key = fields[0].substr(0, fields[0].size());
 
 StringParser::ParseResult result;
 auto mem_value = 
StringParser::string_to_int(fields[1].data(),
@@ -180,19 +180,19 @@ void MemInfo::refresh_proc_meminfo() {
 // 
https://serverfault.com/questions/902009/the-memory-usage-reported-in-cgroup-differs-from-the-free-command
 // memory.usage_in_bytes ~= free.used + free.(buff/cache) - (buff)
 // so, memory.usage_in_bytes - memory.meminfo["Cached"]
-_s_cgroup_mem_usage = cgroup_mem_usage - 
_s_cgroup_mem_info_bytes["Cached"];
+_s_cgroup_mem_usage = cgroup_mem_usage - 
_s_cgroup_mem_info_bytes["cache"];
 // wait 10s, 100 * 100ms, avoid too frequently.
 _s_cgroup_mem_refresh_wait_times = -100;
 LOG(INFO) << "Refresh cgroup memory win, refresh again after 10s, 
cgroup mem limit: "
   << _s_cgroup_mem_limit << ", cgroup mem usage: " << 
_s_cgroup_mem_usage
-  << ", cgroup mem info cached: " << 
_s_cgroup_mem_info_bytes["Cached"];
+  << ", cgroup mem info cached: " << 
_s_cgroup_mem_info_bytes["cache"];
 } else {
 // find cgroup failed, wait 300s, 1000 * 100ms.
 _s_cgroup_mem_refresh_wait_times = -3000;
 LOG(INFO)
 << "Refresh cgroup memory failed, refresh again after 
300s, cgroup mem limit: "
 << _s_cgroup_mem_limit << ", cgroup mem usage: " << 
_s_cgroup_mem_usage
-<< ", cgroup mem info cached: " << 
_s_cgroup_mem_info_bytes["Cached"];
+<< ", cgroup mem info cached: " << 
_s_cgroup_mem_info_bytes["cache"];
 }
 } else {
 if (config::enable_use_cgroup_memory_info) {


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) branch master updated (b61a1a8ae7b -> aa438927ff8)

2024-06-29 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


from b61a1a8ae7b [fix](cgroup memory) Correct cgroup mem info cache (#36966)
 add aa438927ff8 [improvement](segmentcache) limit segment cache by memory 
or segment num (#37026)

No new revisions were added by this update.

Summary of changes:
 be/src/common/config.cpp   |  2 +-
 be/src/olap/lru_cache.cpp  |  5 +
 be/src/olap/lru_cache.h|  4 +++-
 be/src/olap/segment_loader.h   | 13 -
 be/src/runtime/exec_env_init.cpp   |  8 
 be/test/testutil/run_all_tests.cpp |  2 +-
 6 files changed, 22 insertions(+), 12 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) branch branch-3.0 updated (fef6c7df5b8 -> 3df52cbf761)

2024-06-29 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a change to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git


from fef6c7df5b8 [Fix](Variant) fix variant partial update with row store 
enabled (#36793)
 new f04c185cc6c [Refactor](meta) Delete useless classes and write methods 
(#36894)
 new 5b68de56cd1 [refactor](nereids) New distribute planner (#36531)
 new 018ff0ae94c [feature](insert)support external hive truncate table DDL 
(#36801)
 new a89a43c2a97 [Fix](hive-writer) Fixed the issue where 
`uncompletedMpuPendingUploads` did not remove objects correctly. (#36905)
 new 05f694a9405 [Fix](multi-catalog) Fix the transaction is not removed in 
abnormal situations by removing transaction in finally block. (#36705)
 new 15f162f6905 [Fix](variant) ignore serialization of nothing type 
(#36997)
 new 13e2a19bbb5 [chore](profile) rm useless code of profile (#36915)
 new fee6a24ef64 [chore](upgrade) turn off fallback_to_original_planner 
when upgrade (#37005)
 new c0591389cc7 [enhancement](cloud) batching get visible version from 
MetaService (#34615)
 new 039d4f722e3 [fix](regression test) fix unstable single compaction test 
p2 (#36881)
 new 1cbc322d0c5 [improvement](jdbc catalog) Modify the maximum number of 
connections in the connection pool to 30 by default (#36720)
 new 74eaf05cb85 [fix](statistics)Escape fetch partition stats sql. (#36941)
 new 05ca6c66f51 [fix](mtmv)fix mtmv dead lock (#37009)
 new b2f90a1ce4f [Fix](autoinc) try fix concurrent load problem with auto 
inc column (#36421)
 new 66ca7e94c83 [bugfix](testcase)add java error log output (#35998)
 new 82e4633053f Revert "[feature](cloud) support file cache only cache 
index pages (#36273) (#36797)
 new 9255c366ec1 (cloud-merge) Get fileCacheSize by RPC always (#36857)
 new 3666c75576f [performance](load) do not copy input_block in memtable 
(#36939)
 new 900976cf44a Revert "[enhancement](compaction) optimizing memory usage 
for compaction (#36492)" (#37032)
 new 848fda6afd3 [fix](cgroup memory) Correct cgroup mem info cache (#36966)
 new 3df52cbf761 [improvement](segmentcache) limit segment cache by memory 
or segment num (#37026)

The 21 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 be/src/cloud/cloud_base_compaction.cpp |  10 -
 be/src/cloud/cloud_cumulative_compaction.cpp   |  13 +-
 be/src/common/config.cpp   |  10 +-
 be/src/common/config.h |   8 -
 be/src/olap/base_compaction.cpp|  10 -
 be/src/olap/base_tablet.h  |   5 -
 be/src/olap/compaction.cpp |  15 +-
 be/src/olap/compaction.h   |   2 -
 be/src/olap/cumulative_compaction.cpp  |  15 +-
 be/src/olap/iterators.h|  15 +-
 be/src/olap/lru_cache.cpp  |   5 +
 be/src/olap/lru_cache.h|   4 +-
 be/src/olap/memtable.cpp   |  14 +-
 be/src/olap/merger.cpp |  67 +---
 be/src/olap/merger.h   |   6 +-
 be/src/olap/rowset/rowset_meta.h   |  15 -
 be/src/olap/rowset/segcompaction.cpp   |   2 +-
 be/src/olap/rowset/segment_v2/column_reader.cpp|   4 +-
 .../rowset/segment_v2/indexed_column_reader.cpp|   3 +-
 be/src/olap/segment_loader.h   |  13 +-
 be/src/olap/tablet_reader.h|   2 -
 be/src/runtime/exec_env_init.cpp   |   8 +-
 be/src/util/cgroup_util.cpp|   2 +-
 be/src/util/mem_info.cpp   |   8 +-
 be/src/vec/core/block.cpp  |  25 +-
 be/src/vec/core/block.h|   5 +-
 be/src/vec/data_types/data_type_object.cpp |  14 +-
 be/src/vec/olap/vertical_block_reader.cpp  |  18 +-
 be/src/vec/olap/vertical_block_reader.h|   3 +-
 be/src/vec/olap/vertical_merge_iterator.cpp|  29 +-
 be/src/vec/olap/vertical_merge_iterator.h  |  25 +-
 be/src/vec/sink/autoinc_buffer.cpp | 150 +
 be/src/vec/sink/autoinc_buffer.h   |  35 ++-
 be/test/olap/base_compaction_test.cpp  |  84 --
 be/test/olap/rowid_conversion_test.cpp |   6 +-
 be/test/testutil/run_all_tests.cpp |   2 +-
 be/test/vec/olap/vertical_compaction_test.cpp  |  14 +-
 cloud/src/meta-service/meta_service.cpp|   3 +
 cloud/src/meta-service/meta_service_txn.cpp|   6 +-
 .../apache/doris/jdbc/JdbcDataSourceConfig.java|   2 +-
 .../java/org/

(doris) 01/21: [Refactor](meta) Delete useless classes and write methods (#36894)

2024-06-29 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit f04c185cc6c780483977918e19e6a14a35b3dc16
Author: Peyz <30936555+iszhang...@users.noreply.github.com>
AuthorDate: Fri Jun 28 17:08:24 2024 +0800

[Refactor](meta) Delete useless classes and write methods (#36894)


Co-authored-by: zhangpeicheng 
---
 .../java/org/apache/doris/common/io/DeepCopy.java  |   1 +
 .../java/org/apache/doris/backup/BackupMeta.java   |   1 +
 .../apache/doris/catalog/CatalogRecycleBin.java|   2 +
 .../main/java/org/apache/doris/catalog/Column.java |  30 +-
 .../java/org/apache/doris/catalog/ColumnStats.java |  26 +
 .../org/apache/doris/catalog/DistributionInfo.java |   1 +
 .../apache/doris/catalog/HashDistributionInfo.java |   1 +
 .../apache/doris/catalog/ListPartitionInfo.java|   1 +
 .../apache/doris/catalog/MaterializedIndex.java|  11 +-
 .../java/org/apache/doris/catalog/OlapTable.java   |   3 +-
 .../java/org/apache/doris/catalog/Partition.java   |   2 +
 .../org/apache/doris/catalog/PartitionInfo.java|  20 +---
 .../doris/catalog/RandomDistributionInfo.java  |   1 +
 .../apache/doris/catalog/RangePartitionInfo.java   |   1 +
 .../java/org/apache/doris/catalog/Replica.java |  22 +---
 .../apache/doris/catalog/SinglePartitionInfo.java  |   1 +
 .../main/java/org/apache/doris/catalog/Tablet.java |  12 +--
 .../org/apache/doris/catalog/TempPartitions.java   |  24 +
 .../apache/doris/cloud/catalog/CloudPartition.java |   9 +-
 .../apache/doris/cloud/catalog/CloudReplica.java   |  17 +--
 .../apache/doris/persist/BackendIdsUpdateInfo.java |  63 
 .../java/org/apache/doris/persist/ClusterInfo.java | 114 -
 .../apache/doris/transaction/TabletCommitInfo.java |  11 +-
 .../org/apache/doris/catalog/ColumnStatTest.java   |  21 ++--
 .../java/org/apache/doris/catalog/ColumnTest.java  |  18 ++--
 .../doris/catalog/MaterializedIndexTest.java   |   6 +-
 .../doris/catalog/RangePartitionInfoTest.java  |   6 +-
 .../java/org/apache/doris/catalog/ReplicaTest.java |  12 +--
 .../java/org/apache/doris/catalog/TabletTest.java  |   6 +-
 .../apache/doris/catalog/TempPartitionTest.java|   4 +-
 30 files changed, 74 insertions(+), 373 deletions(-)

diff --git 
a/fe/fe-common/src/main/java/org/apache/doris/common/io/DeepCopy.java 
b/fe/fe-common/src/main/java/org/apache/doris/common/io/DeepCopy.java
index 106205f2256..282e6a0e16a 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/common/io/DeepCopy.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/common/io/DeepCopy.java
@@ -39,6 +39,7 @@ public class DeepCopy {
 // deep copy orig to dest.
 // the param "c" is the implementation class of "dest".
 // And the "dest" class must has method "readFields(DataInput)"
+@Deprecated
 public static boolean copy(Writable orig, Writable dest, Class c, int 
metaVersion) {
 MetaContext metaContext = new MetaContext();
 metaContext.setMetaVersion(metaVersion);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/backup/BackupMeta.java 
b/fe/fe-core/src/main/java/org/apache/doris/backup/BackupMeta.java
index 45fdd1261ce..e27c8d19a84 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/backup/BackupMeta.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/backup/BackupMeta.java
@@ -140,6 +140,7 @@ public class BackupMeta implements Writable, 
GsonPostProcessable {
 Text.writeString(out, GsonUtils.GSON.toJson(this));
 }
 
+@Deprecated
 public void readFields(DataInput in) throws IOException {
 int size = in.readInt();
 for (int i = 0; i < size; i++) {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/CatalogRecycleBin.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/CatalogRecycleBin.java
index 6b6fe5284a4..17cc5cd148b 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/CatalogRecycleBin.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/CatalogRecycleBin.java
@@ -1305,6 +1305,7 @@ public class CatalogRecycleBin extends MasterDaemon 
implements Writable, GsonPos
 updateDbInfoForLowerVersion();
 }
 
+@Deprecated
 public void readFields(DataInput in) throws IOException {
 int count = in.readInt();
 for (int i = 0; i < count; i++) {
@@ -1439,6 +1440,7 @@ public class CatalogRecycleBin extends MasterDaemon 
implements Writable, GsonPos
 return table;
 }
 
+@Deprecated
 public void readFields(DataInput in) throws IOException {
 dbId = in.readLong();
 table = Table.read(in);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java
index 8d453b1b0f0..0aca7e4e28e 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java
+++ b/fe

(doris) 03/21: [feature](insert)support external hive truncate table DDL (#36801)

2024-06-29 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 018ff0ae94cb16df3f120b08e135e078f25ab741
Author: slothever <18522955+w...@users.noreply.github.com>
AuthorDate: Fri Jun 28 17:27:44 2024 +0800

[feature](insert)support external hive truncate table DDL (#36801)

## Proposed changes

Issue Number: #31442

1. support hive3 'truncate table'
2. forbiden hive2 'truncate table' because of it is not supported by HMS
Client API on hive2
---
 .../apache/doris/analysis/TruncateTableStmt.java   |   3 -
 .../main/java/org/apache/doris/catalog/Env.java|   6 +-
 .../org/apache/doris/datasource/CatalogIf.java |   3 +
 .../apache/doris/datasource/ExternalCatalog.java   |  23 +
 .../doris/datasource/hive/HMSCachedClient.java |   2 +
 .../doris/datasource/hive/HiveMetadataOps.java |  16 
 .../hive/PostgreSQLJdbcHMSCachedClient.java|   4 +
 .../datasource/hive/ThriftHMSCachedClient.java |  19 +++-
 .../datasource/iceberg/IcebergMetadataOps.java |   5 ++
 .../datasource/operations/ExternalMetadataOps.java |   8 ++
 .../doris/datasource/TestHMSCachedClient.java  |   3 +
 .../hive/ddl/test_hive_truncate_table.out  |  24 +
 .../hive/ddl/test_hive_truncate_table.groovy   | 100 +
 13 files changed, 210 insertions(+), 6 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/TruncateTableStmt.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/TruncateTableStmt.java
index a275879692c..1a9fbd3bafd 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/TruncateTableStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/TruncateTableStmt.java
@@ -23,7 +23,6 @@ import org.apache.doris.common.ErrorCode;
 import org.apache.doris.common.ErrorReport;
 import org.apache.doris.common.UserException;
 import org.apache.doris.common.util.InternalDatabaseUtil;
-import org.apache.doris.common.util.Util;
 import org.apache.doris.mysql.privilege.PrivPredicate;
 import org.apache.doris.qe.ConnectContext;
 
@@ -44,8 +43,6 @@ public class TruncateTableStmt extends DdlStmt {
 public void analyze(Analyzer analyzer) throws AnalysisException, 
UserException {
 super.analyze(analyzer);
 tblRef.getName().analyze(analyzer);
-// disallow external catalog
-Util.prohibitExternalCatalog(tblRef.getName().getCtl(), 
this.getClass().getSimpleName());
 
 if (tblRef.hasExplicitAlias()) {
 throw new AnalysisException("Not support truncate table with 
alias");
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java
index 207a16a2678..a3da3482f77 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java
@@ -5577,8 +5577,10 @@ public class Env {
  * otherwise, it will only truncate those specified partitions.
  *
  */
-public void truncateTable(TruncateTableStmt truncateTableStmt) throws 
DdlException {
-getInternalCatalog().truncateTable(truncateTableStmt);
+public void truncateTable(TruncateTableStmt stmt) throws DdlException {
+CatalogIf catalogIf = 
catalogMgr.getCatalogOrException(stmt.getTblRef().getName().getCtl(),
+catalog -> new DdlException(("Unknown catalog " + catalog)));
+catalogIf.truncateTable(stmt);
 }
 
 public void replayTruncateTable(TruncateTableInfo info) throws 
MetaNotFoundException {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/CatalogIf.java 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/CatalogIf.java
index ceee1a68157..a79897c67df 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/CatalogIf.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/CatalogIf.java
@@ -22,6 +22,7 @@ import org.apache.doris.analysis.CreateTableStmt;
 import org.apache.doris.analysis.DropDbStmt;
 import org.apache.doris.analysis.DropTableStmt;
 import org.apache.doris.analysis.TableName;
+import org.apache.doris.analysis.TruncateTableStmt;
 import org.apache.doris.catalog.DatabaseIf;
 import org.apache.doris.catalog.Env;
 import org.apache.doris.catalog.TableIf;
@@ -194,4 +195,6 @@ public interface CatalogIf {
 boolean createTable(CreateTableStmt stmt) throws UserException;
 
 void dropTable(DropTableStmt stmt) throws DdlException;
+
+void truncateTable(TruncateTableStmt truncateTableStmt) throws 
DdlException;
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java
index 929378ae80f..b643176aaf1 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java
+++ b/fe/fe-core/src/main/java/org/apach

(doris) 13/21: [fix](mtmv)fix mtmv dead lock (#37009)

2024-06-29 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 05ca6c66f51d265d321ffbed6bae37d9183f1b89
Author: zhangdong <493738...@qq.com>
AuthorDate: Fri Jun 28 21:09:35 2024 +0800

[fix](mtmv)fix mtmv dead lock (#37009)

Cause of occurrence:
- dropBaseTable: Holding db's writeLock ,notify mtmv alter status to
schema_change,need mv's writeLock
- task(insert overwrite):Holding mv's readLock,when generage plan need
db's readLock

fix:
- mtmv alter status to schema_change need mv's writeMvLock instead of
mv's writeLock
---
 fe/fe-core/src/main/java/org/apache/doris/alter/Alter.java | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/Alter.java 
b/fe/fe-core/src/main/java/org/apache/doris/alter/Alter.java
index a029f604dfd..26045f8527a 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/alter/Alter.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/alter/Alter.java
@@ -916,7 +916,7 @@ public class Alter {
 Database db = 
Env.getCurrentInternalCatalog().getDbOrDdlException(tbl.getDb());
 mtmv = (MTMV) db.getTableOrMetaException(tbl.getTbl(), 
TableType.MATERIALIZED_VIEW);
 
-mtmv.writeLock();
+mtmv.writeMvLock();
 switch (alterMTMV.getOpType()) {
 case ALTER_REFRESH_INFO:
 mtmv.alterRefreshInfo(alterMTMV.getRefreshInfo());
@@ -945,7 +945,7 @@ public class Alter {
 LOG.warn(e);
 } finally {
 if (mtmv != null) {
-mtmv.writeUnlock();
+mtmv.writeMvUnlock();
 }
 }
 }


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) 16/21: Revert "[feature](cloud) support file cache only cache index pages (#36273) (#36797)

2024-06-29 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 82e4633053f6e7d1789069685b6031c93d28927c
Author: zhengyu 
AuthorDate: Sat Jun 29 02:13:28 2024 +0800

Revert "[feature](cloud) support file cache only cache index pages (#36273) 
(#36797)

This reverts commit f5c40a5c3bac3091707c8324c422889fa7f030c8.
This is a experimental (and buggy) commit. I hava found that caching
index only helps little. With that be the result, I think it is a good
time to
revert it.
---
 be/src/common/config.cpp| 2 --
 be/src/common/config.h  | 2 --
 be/src/olap/rowset/segment_v2/column_reader.cpp | 4 +---
 be/src/olap/rowset/segment_v2/indexed_column_reader.cpp | 3 +--
 4 files changed, 2 insertions(+), 9 deletions(-)

diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index 91707b5a8a2..3e948f4cca2 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -1003,8 +1003,6 @@ DEFINE_Bool(enable_file_cache, "false");
 // format: 
[{"path":"/path/to/file_cache","total_size":21474836480,"query_limit":10737418240},{"path":"/path/to/file_cache2","total_size":21474836480,"query_limit":10737418240}]
 DEFINE_String(file_cache_path, "");
 DEFINE_Int64(file_cache_each_block_size, "1048576"); // 1MB
-// only cache index pages (prerequisite: enable_file_cache = true)
-DEFINE_Bool(file_cache_index_only, "false");
 
 DEFINE_Bool(clear_file_cache, "false");
 DEFINE_Bool(enable_file_cache_query_limit, "false");
diff --git a/be/src/common/config.h b/be/src/common/config.h
index f35aeb61747..6f0065e2fe3 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -1047,8 +1047,6 @@ DECLARE_Bool(enable_file_cache);
 // format: 
[{"path":"/path/to/file_cache","total_size":21474836480,"query_limit":10737418240,"normal_percent":85,
 "disposable_percent":10, "index_percent":5}]
 DECLARE_String(file_cache_path);
 DECLARE_Int64(file_cache_each_block_size);
-// only cache index pages (prerequisite: enable_file_cache = true)
-DECLARE_Bool(file_cache_index_only);
 DECLARE_Bool(clear_file_cache);
 DECLARE_Bool(enable_file_cache_query_limit);
 DECLARE_Int32(file_cache_enter_disk_resource_limit_mode_percent);
diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp 
b/be/src/olap/rowset/segment_v2/column_reader.cpp
index 7396bce1a55..392917e0d83 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/column_reader.cpp
@@ -342,11 +342,9 @@ Status ColumnReader::read_page(const 
ColumnIteratorOptions& iter_opts, const Pag
PageHandle* handle, Slice* page_body, 
PageFooterPB* footer,
BlockCompressionCodec* codec) const {
 iter_opts.sanity_check();
-bool use_page_cache = iter_opts.use_page_cache &&
-  (!config::file_cache_index_only || iter_opts.type == 
INDEX_PAGE);
 PageReadOptions opts {
 .verify_checksum = _opts.verify_checksum,
-.use_page_cache = use_page_cache,
+.use_page_cache = iter_opts.use_page_cache,
 .kept_in_memory = _opts.kept_in_memory,
 .type = iter_opts.type,
 .file_reader = iter_opts.file_reader,
diff --git a/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp 
b/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp
index 94429920877..59251b5595d 100644
--- a/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp
@@ -115,9 +115,8 @@ Status IndexedColumnReader::read_page(const PagePointer& 
pp, PageHandle* handle,
   PageFooterPB* footer, PageTypePB type,
   BlockCompressionCodec* codec, bool 
pre_decode) const {
 OlapReaderStatistics tmp_stats;
-bool use_page_cache = _use_page_cache && (!config::file_cache_index_only 
|| type == INDEX_PAGE);
 PageReadOptions opts {
-.use_page_cache = use_page_cache,
+.use_page_cache = _use_page_cache,
 .kept_in_memory = _kept_in_memory,
 .pre_decode = pre_decode,
 .type = type,


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) 04/21: [Fix](hive-writer) Fixed the issue where `uncompletedMpuPendingUploads` did not remove objects correctly. (#36905)

2024-06-29 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit a89a43c2a97910b5f907490feb72faad94b34553
Author: Qi Chen 
AuthorDate: Fri Jun 28 17:41:02 2024 +0800

[Fix](hive-writer) Fixed the issue where `uncompletedMpuPendingUploads` did 
not remove objects correctly. (#36905)

[Fix](hive-writer) Fixed the issue where `uncompletedMpuPendingUploads`
did not remove objects correctly.
---
 .../apache/doris/datasource/hive/HMSTransaction.java   | 18 ++
 1 file changed, 18 insertions(+)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSTransaction.java 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSTransaction.java
index 824af6996a9..d883b9dc786 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSTransaction.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSTransaction.java
@@ -103,6 +103,24 @@ public class HMSTransaction implements Transaction {
 this.s3MPUPendingUpload = s3MPUPendingUpload;
 this.path = path;
 }
+
+@Override
+public boolean equals(Object o) {
+if (this == o) {
+return true;
+}
+if (o == null || getClass() != o.getClass()) {
+return false;
+}
+UncompletedMpuPendingUpload that = (UncompletedMpuPendingUpload) o;
+return Objects.equals(s3MPUPendingUpload, that.s3MPUPendingUpload) 
&& Objects.equals(path,
+that.path);
+}
+
+@Override
+public int hashCode() {
+return Objects.hash(s3MPUPendingUpload, path);
+}
 }
 
 private Set uncompletedMpuPendingUploads = 
new HashSet<>();


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) 05/21: [Fix](multi-catalog) Fix the transaction is not removed in abnormal situations by removing transaction in finally block. (#36705)

2024-06-29 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 05f694a94051c69be719a49327bd1c1064e04ef6
Author: Qi Chen 
AuthorDate: Fri Jun 28 17:41:20 2024 +0800

[Fix](multi-catalog) Fix the transaction is not removed in abnormal 
situations by removing transaction in finally block. (#36705)

[Fix] (multi-catalog) Fix the transaction is not removed in abnormal
situations by removing transaction in `finally` block.
---
 .../java/org/apache/doris/transaction/HiveTransactionManager.java  | 7 +--
 .../org/apache/doris/transaction/IcebergTransactionManager.java| 7 +--
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/transaction/HiveTransactionManager.java
 
b/fe/fe-core/src/main/java/org/apache/doris/transaction/HiveTransactionManager.java
index 838d135fa45..c48210ad452 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/transaction/HiveTransactionManager.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/transaction/HiveTransactionManager.java
@@ -59,8 +59,11 @@ public class HiveTransactionManager implements 
TransactionManager {
 
 @Override
 public void rollback(long id) {
-getTransactionWithException(id).rollback();
-transactions.remove(id);
+try {
+getTransactionWithException(id).rollback();
+} finally {
+transactions.remove(id);
+}
 }
 
 @Override
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/transaction/IcebergTransactionManager.java
 
b/fe/fe-core/src/main/java/org/apache/doris/transaction/IcebergTransactionManager.java
index f4b802aaa99..f373c133685 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/transaction/IcebergTransactionManager.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/transaction/IcebergTransactionManager.java
@@ -51,8 +51,11 @@ public class IcebergTransactionManager implements 
TransactionManager {
 
 @Override
 public void rollback(long id) {
-getTransactionWithException(id).rollback();
-transactions.remove(id);
+try {
+getTransactionWithException(id).rollback();
+} finally {
+transactions.remove(id);
+}
 }
 
 @Override


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) 19/21: Revert "[enhancement](compaction) optimizing memory usage for compaction (#36492)" (#37032)

2024-06-29 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 900976cf44a88ef4447cba19b776aceba1c5010e
Author: Yongqiang YANG <98214048+dataroar...@users.noreply.github.com>
AuthorDate: Sun Jun 30 11:08:22 2024 +0800

Revert "[enhancement](compaction) optimizing memory usage for compaction 
(#36492)" (#37032)

This reverts commit 99901814d8b90887f54b1768b98b4f0b78fab376.
---
 be/src/cloud/cloud_base_compaction.cpp |  10 --
 be/src/cloud/cloud_cumulative_compaction.cpp   |  13 +-
 be/src/common/config.cpp   |   6 -
 be/src/common/config.h |   6 -
 be/src/olap/base_compaction.cpp|  10 --
 be/src/olap/base_tablet.h  |   5 -
 be/src/olap/compaction.cpp |  15 +--
 be/src/olap/compaction.h   |   2 -
 be/src/olap/cumulative_compaction.cpp  |  15 +--
 be/src/olap/iterators.h|  15 +--
 be/src/olap/merger.cpp |  67 +-
 be/src/olap/merger.h   |   6 +-
 be/src/olap/rowset/rowset_meta.h   |  15 ---
 be/src/olap/rowset/segcompaction.cpp   |   2 +-
 be/src/olap/tablet_reader.h|   2 -
 be/src/vec/olap/vertical_block_reader.cpp  |  18 +--
 be/src/vec/olap/vertical_block_reader.h|   3 +-
 be/src/vec/olap/vertical_merge_iterator.cpp|  29 ++---
 be/src/vec/olap/vertical_merge_iterator.h  |  25 +---
 be/test/olap/base_compaction_test.cpp  |  84 -
 be/test/olap/rowid_conversion_test.cpp |   6 +-
 be/test/vec/olap/vertical_compaction_test.cpp  |  14 +--
 .../compaction_width_array_column.groovy   | 137 -
 23 files changed, 42 insertions(+), 463 deletions(-)

diff --git a/be/src/cloud/cloud_base_compaction.cpp 
b/be/src/cloud/cloud_base_compaction.cpp
index 4ceab8eb6e3..d4a86743a48 100644
--- a/be/src/cloud/cloud_base_compaction.cpp
+++ b/be/src/cloud/cloud_base_compaction.cpp
@@ -163,16 +163,6 @@ Status CloudBaseCompaction::pick_rowsets_to_compact() {
 return Status::Error("no suitable versions for 
compaction");
 }
 
-int score = 0;
-int rowset_cnt = 0;
-while (rowset_cnt < _input_rowsets.size()) {
-score += 
_input_rowsets[rowset_cnt++]->rowset_meta()->get_compaction_score();
-if (score > config::base_compaction_max_compaction_score) {
-break;
-}
-}
-_input_rowsets.resize(rowset_cnt);
-
 // 1. cumulative rowset must reach base_compaction_min_rowset_num threshold
 if (_input_rowsets.size() > config::base_compaction_min_rowset_num) {
 VLOG_NOTICE << "satisfy the base compaction policy. tablet=" << 
_tablet->tablet_id()
diff --git a/be/src/cloud/cloud_cumulative_compaction.cpp 
b/be/src/cloud/cloud_cumulative_compaction.cpp
index 2a26b1b294b..de318f979a5 100644
--- a/be/src/cloud/cloud_cumulative_compaction.cpp
+++ b/be/src/cloud/cloud_cumulative_compaction.cpp
@@ -354,20 +354,11 @@ Status 
CloudCumulativeCompaction::pick_rowsets_to_compact() {
 return st;
 }
 
-int64_t max_score = config::cumulative_compaction_max_deltas;
-auto process_memory_usage = 
doris::GlobalMemoryArbitrator::process_memory_usage();
-bool memory_usage_high = process_memory_usage > MemInfo::soft_mem_limit() 
* 0.8;
-if 
(cloud_tablet()->last_compaction_status.is() ||
-memory_usage_high) {
-max_score = std::max(config::cumulative_compaction_max_deltas /
- 
config::cumulative_compaction_max_deltas_factor,
- config::cumulative_compaction_min_deltas + 1);
-}
-
 size_t compaction_score = 0;
 auto compaction_policy = 
cloud_tablet()->tablet_meta()->compaction_policy();
 _engine.cumu_compaction_policy(compaction_policy)
-->pick_input_rowsets(cloud_tablet(), candidate_rowsets, max_score,
+->pick_input_rowsets(cloud_tablet(), candidate_rowsets,
+ config::cumulative_compaction_max_deltas,
  config::cumulative_compaction_min_deltas, 
&_input_rowsets,
  &_last_delete_version, &compaction_score);
 
diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index 3e948f4cca2..580793d36ab 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -385,7 +385,6 @@ DEFINE_mInt32(max_single_replica_compaction_threads, "-1");
 
 DEFINE_Bool(enable_base_compaction_idle_sched, "true");
 DEFINE_mInt64(base_compaction_min_rowset_num, "5");
-DEFINE_mInt64(base_compaction_max_compaction_score, "20");
 DEFINE_mDouble(base_compaction_min_data_ratio, "0.3");
 DEFINE_mInt64(base_compaction_dup_key_ma

(doris) 10/21: [fix](regression test) fix unstable single compaction test p2 (#36881)

2024-06-29 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 039d4f722e3a77c2970e3811e84ba3e9ea0f28f3
Author: Sun Chenyang 
AuthorDate: Fri Jun 28 19:22:05 2024 +0800

[fix](regression test) fix unstable single compaction test p2 (#36881)
---
 .../test_single_compaction_fault_injection.groovy  | 68 
 .../test_single_replica_compaction.groovy  | 72 --
 2 files changed, 83 insertions(+), 57 deletions(-)

diff --git 
a/regression-test/suites/compaction/test_single_compaction_fault_injection.groovy
 
b/regression-test/suites/compaction/test_single_compaction_fault_injection.groovy
index 7acafae51ab..ebc74257032 100644
--- 
a/regression-test/suites/compaction/test_single_compaction_fault_injection.groovy
+++ 
b/regression-test/suites/compaction/test_single_compaction_fault_injection.groovy
@@ -32,22 +32,20 @@ suite("test_single_compaction_fault_injection", "p2") {
 }
 
 def triggerCompaction = { be_host, be_http_port, compact_type, tablet_id ->
-StringBuilder sb = new StringBuilder();
-sb.append("curl -X POST http://${be_host}:${be_http_port}";)
-sb.append("/api/compaction/run?tablet_id=")
-sb.append(tablet_id)
-sb.append("&compact_type=${compact_type}")
-
-String command = sb.toString()
-logger.info(command)
-process = command.execute()
-code = process.waitFor()
-err = IOGroovyMethods.getText(new BufferedReader(new 
InputStreamReader(process.getErrorStream(;
-out = process.getText()
-logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" 
+ err)
-assertEquals(code, 0)
-return out
-} 
+if (compact_type == "cumulative") {
+def (code_1, out_1, err_1) = be_run_cumulative_compaction(be_host, 
be_http_port, tablet_id)
+logger.info("Run compaction: code=" + code_1 + ", out=" + out_1 + 
", err=" + err_1)
+assertEquals(code_1, 0)
+return out_1
+} else if (compact_type == "full") {
+def (code_2, out_2, err_2) = be_run_full_compaction(be_host, 
be_http_port, tablet_id)
+logger.info("Run compaction: code=" + code_2 + ", out=" + out_2 + 
", err=" + err_2)
+assertEquals(code_2, 0)
+return out_2
+} else {
+assertFalse(True)
+}
+}
 
 def triggerSingleCompaction = { be_host, be_http_port, tablet_id ->
 StringBuilder sb = new StringBuilder();
@@ -56,15 +54,33 @@ suite("test_single_compaction_fault_injection", "p2") {
 sb.append(tablet_id)
 sb.append("&compact_type=cumulative&remote=true")
 
-String command = sb.toString()
-logger.info(command)
-process = command.execute()
-code = process.waitFor()
-err = IOGroovyMethods.getText(new BufferedReader(new 
InputStreamReader(process.getErrorStream(;
-out = process.getText()
-logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" 
+ err)
-assertEquals(code, 0)
-return out
+Integer maxRetries = 10; // Maximum number of retries
+Integer retryCount = 0; // Current retry count
+Integer sleepTime = 5000; // Sleep time in milliseconds
+String cmd = sb.toString()
+def process
+int code_3
+String err_3
+String out_3
+
+while (retryCount < maxRetries) {
+process = cmd.execute()
+code_3 = process.waitFor()
+err_3 = IOGroovyMethods.getText(new BufferedReader(new 
InputStreamReader(process.getErrorStream(
+out_3 = process.getText()
+
+// If the command was successful, break the loop
+if (code_3 == 0) {
+break
+}
+
+// If the command was not successful, increment the retry count, 
sleep for a while and try again
+retryCount++
+sleep(sleepTime)
+}
+assertEquals(code_3, 0)
+logger.info("Get compaction status: code=" + code_3 + ", out=" + out_3)
+return out_3
 }
 def waitForCompaction = { be_host, be_http_port, tablet_id ->
 boolean running = true
@@ -337,7 +353,7 @@ suite("test_single_compaction_fault_injection", "p2") {
 
 // trigger master be to do base compaction
 
assertTrue(triggerCompaction(backendId_to_backendIP[master_backend_id], 
backendId_to_backendHttpPort[master_backend_id],
-"base", tablet_id).contains("Success")); 
+"full", tablet_id).contains("Success")); 
 waitForCompaction(backendId_to_backendIP[master_backend_id], 
backendId_to_backendHttpPort[master_backend_id], tablet_id)
 
 // trigger follower be to fetch compaction result
diff --git 
a/regression-test/suites/compaction/test_sin

(doris) 18/21: [performance](load) do not copy input_block in memtable (#36939)

2024-06-29 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 3666c75576f75ebfc86117beb2ccc24359dff743
Author: Kaijie Chen 
AuthorDate: Sat Jun 29 16:06:35 2024 +0800

[performance](load) do not copy input_block in memtable (#36939)

Pass `column_offset` to `Block::clone_without_columns()` and
`MutableBlock::add_rows()`,
so we do not need to copy `input_block` in `MemTable::insert()`.

To optimize performance in cases with many (e.g. 12k+) tablets:

Before:

```
-  DeltaWriterWriteTime:  1m58s
   -  MemTableWriterFlushTime:  0ns
   -  MemTableWriterLockTime:  1s41ms
   -  MemTableWriterShrinkTime:  0ns
   -  MemTableWriterWriteTime:  1m53s
  -  MemTableCopyCount:  12.044003M  (12044003)
  -  MemTableCopyTime:  42s857ms
  -  MemTableCopyTime0:  11s866ms
  -  MemTableInitTime:  13.384ms
  -  MemTableInsertTime:  51s543ms
```

After:

```
-  DeltaWriterWriteTime:  1m
   -  MemTableWriterFlushTime:  0ns
   -  MemTableWriterLockTime:  971.481ms
   -  MemTableWriterShrinkTime:  0ns
   -  MemTableWriterWriteTime:  55s274ms
  -  MemTableCopyCount:  0
  -  MemTableCopyTime:  0ns
  -  MemTableCopyTime0:  0ns
  -  MemTableInitTime:  18.746ms
  -  MemTableInsertTime:  53s772ms
```
---
 be/src/olap/memtable.cpp  | 14 ++
 be/src/vec/core/block.cpp | 25 ++---
 be/src/vec/core/block.h   |  5 +++--
 3 files changed, 27 insertions(+), 17 deletions(-)

diff --git a/be/src/olap/memtable.cpp b/be/src/olap/memtable.cpp
index 750026c289d..e55fd678bd2 100644
--- a/be/src/olap/memtable.cpp
+++ b/be/src/olap/memtable.cpp
@@ -180,16 +180,14 @@ int RowInBlockComparator::operator()(const RowInBlock* 
left, const RowInBlock* r
 
 Status MemTable::insert(const vectorized::Block* input_block,
 const std::vector& row_idxs) {
-vectorized::Block target_block = *input_block;
-target_block = input_block->copy_block(_column_offset);
 if (_is_first_insertion) {
 _is_first_insertion = false;
-auto cloneBlock = target_block.clone_without_columns();
+auto cloneBlock = input_block->clone_without_columns(&_column_offset);
 _input_mutable_block = 
vectorized::MutableBlock::build_mutable_block(&cloneBlock);
 _vec_row_comparator->set_block(&_input_mutable_block);
 _output_mutable_block = 
vectorized::MutableBlock::build_mutable_block(&cloneBlock);
 if (_keys_type != KeysType::DUP_KEYS) {
-_init_agg_functions(&target_block);
+_init_agg_functions(input_block);
 }
 if (_tablet_schema->has_sequence_col()) {
 if (_is_partial_update) {
@@ -210,11 +208,11 @@ Status MemTable::insert(const vectorized::Block* 
input_block,
 auto num_rows = row_idxs.size();
 size_t cursor_in_mutableblock = _input_mutable_block.rows();
 auto block_size0 = _input_mutable_block.allocated_bytes();
-RETURN_IF_ERROR(_input_mutable_block.add_rows(&target_block, 
row_idxs.data(),
-  row_idxs.data() + num_rows));
+RETURN_IF_ERROR(_input_mutable_block.add_rows(input_block, row_idxs.data(),
+  row_idxs.data() + num_rows, 
&_column_offset));
 auto block_size1 = _input_mutable_block.allocated_bytes();
 g_memtable_input_block_allocated_size << block_size1 - block_size0;
-auto input_size = size_t(target_block.bytes() * num_rows / 
target_block.rows() *
+auto input_size = size_t(input_block->bytes() * num_rows / 
input_block->rows() *
  config::memtable_insert_memory_ratio);
 _mem_usage += input_size;
 _insert_mem_tracker->consume(input_size);
@@ -348,7 +346,7 @@ Status MemTable::_sort_by_cluster_keys() {
 row_pos_vec.emplace_back(row_in_blocks[i]->_row_pos);
 }
 return _output_mutable_block.add_rows(&in_block, row_pos_vec.data(),
-  row_pos_vec.data() + 
in_block.rows());
+  row_pos_vec.data() + 
in_block.rows(), &_column_offset);
 }
 
 void MemTable::_sort_one_column(std::vector& row_in_blocks, Tie& 
tie,
diff --git a/be/src/vec/core/block.cpp b/be/src/vec/core/block.cpp
index 676674e8ec0..22062dc5310 100644
--- a/be/src/vec/core/block.cpp
+++ b/be/src/vec/core/block.cpp
@@ -653,12 +653,19 @@ Block Block::clone_with_columns(const Columns& columns) 
const {
 return res;
 }
 
-Block Block::clone_without_columns() const {
+Block Block::clone_without_columns(const std::vector* column_offset) 
const {
 Block res;
 
-size_t num_columns = data.size();
-for (size_t i = 0; i < num_columns; ++i) {
-res.insert({nullptr, data[i].type, data[i].n

(doris) 14/21: [Fix](autoinc) try fix concurrent load problem with auto inc column (#36421)

2024-06-29 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit b2f90a1ce4f527490f603ff652a94e6a8a4c674d
Author: bobhan1 
AuthorDate: Fri Jun 28 21:12:21 2024 +0800

[Fix](autoinc) try fix concurrent load problem with auto inc column (#36421)

1. increase the value of `AutoIncrementGenerator.BATCH_ID_INTERVAL` to
reduce the number of writes to BDBJE. (the default value of
`config::auto_inc_prefetch_size_ratio` is 10 and the default value of
`AutoIncIDBuffer::_batch_size` is 4064, so mostly the request length is
40960)
2. only allow master fe to offer `getAutoIncrementRange` service
3. write editlog before update `batchEndId` in memory in
`getAutoIncrementRange `
4. refactor `AutoIncIDBuffer`
---
 be/src/vec/sink/autoinc_buffer.cpp | 150 +
 be/src/vec/sink/autoinc_buffer.h   |  35 +++--
 .../doris/catalog/AutoIncrementGenerator.java  |   5 +-
 .../apache/doris/service/FrontendServiceImpl.java  |  10 ++
 gensrc/thrift/FrontendService.thrift   |   1 +
 .../unique/test_unique_auto_inc_concurrent.out |  10 ++
 .../unique/test_unique_auto_inc_concurrent.groovy  |  59 
 7 files changed, 203 insertions(+), 67 deletions(-)

diff --git a/be/src/vec/sink/autoinc_buffer.cpp 
b/be/src/vec/sink/autoinc_buffer.cpp
index c7c096ec6e8..f83dbcb55b8 100644
--- a/be/src/vec/sink/autoinc_buffer.cpp
+++ b/be/src/vec/sink/autoinc_buffer.cpp
@@ -19,14 +19,15 @@
 
 #include 
 
-#include 
+#include 
+#include 
 
+#include "common/logging.h"
 #include "common/status.h"
 #include "runtime/client_cache.h"
 #include "runtime/exec_env.h"
 #include "util/runtime_profile.h"
 #include "util/thrift_rpc_helper.h"
-#include "vec/sink/vtablet_block_convertor.h"
 
 namespace doris::vectorized {
 
@@ -42,54 +43,11 @@ void AutoIncIDBuffer::set_batch_size_at_least(size_t 
batch_size) {
 }
 }
 
-void AutoIncIDBuffer::_wait_for_prefetching() {
-if (_is_fetching) {
-_rpc_token->wait();
-}
-}
-
-Status AutoIncIDBuffer::sync_request_ids(size_t length,
- std::vector>* result) {
-std::unique_lock lock(_mutex);
-RETURN_IF_ERROR(_prefetch_ids(_prefetch_size()));
-if (_front_buffer.second > 0) {
-auto min_length = std::min(_front_buffer.second, length);
-length -= min_length;
-result->emplace_back(_front_buffer.first, min_length);
-_front_buffer.first += min_length;
-_front_buffer.second -= min_length;
-}
-if (length > 0) {
-_wait_for_prefetching();
-if (!_rpc_status.ok()) {
-return _rpc_status;
-}
-
-{
-std::lock_guard lock(_backend_buffer_latch);
-std::swap(_front_buffer, _backend_buffer);
-}
-
-DCHECK_LE(length, _front_buffer.second);
-if (length > _front_buffer.second) {
-return Status::RpcError("auto inc sync result length > front 
buffer. " +
-std::to_string(length) + " vs " +
-std::to_string(_front_buffer.second));
-}
-result->emplace_back(_front_buffer.first, length);
-_front_buffer.first += length;
-_front_buffer.second -= length;
-}
-return Status::OK();
-}
-
-Status AutoIncIDBuffer::_prefetch_ids(size_t length) {
-if (_front_buffer.second > _low_water_level_mark() || _is_fetching) {
-return Status::OK();
-}
+Result AutoIncIDBuffer::_fetch_ids_from_fe(size_t length) {
+constexpr uint32_t FETCH_AUTOINC_MAX_RETRY_TIMES = 3;
+_rpc_status = Status::OK();
 TNetworkAddress master_addr = 
ExecEnv::GetInstance()->master_info()->network_address;
-_is_fetching = true;
-RETURN_IF_ERROR(_rpc_token->submit_func([=, this]() {
+for (uint32_t retry_times = 0; retry_times < 
FETCH_AUTOINC_MAX_RETRY_TIMES; retry_times++) {
 TAutoIncrementRangeRequest request;
 TAutoIncrementRangeResult result;
 request.__set_db_id(_db_id);
@@ -97,7 +55,7 @@ Status AutoIncIDBuffer::_prefetch_ids(size_t length) {
 request.__set_column_id(_column_id);
 request.__set_length(length);
 
-int64_t get_auto_inc_range_rpc_ns;
+int64_t get_auto_inc_range_rpc_ns = 0;
 {
 SCOPED_RAW_TIMER(&get_auto_inc_range_rpc_ns);
 _rpc_status = ThriftRpcHelper::rpc(
@@ -109,15 +67,95 @@ Status AutoIncIDBuffer::_prefetch_ids(size_t length) {
 LOG(INFO) << "[auto-inc-range][start=" << result.start << ",length=" 
<< result.length
   << "][elapsed=" << get_auto_inc_range_rpc_ns / 100 << " 
ms]";
 
-if (!_rpc_status.ok() || result.length <= 0) {
-LOG(WARNING) << "Failed to fetch auto-incremnt range, encounter 
rpc failure."
- << "errmsg=" << _rpc

(doris) 15/21: [bugfix](testcase)add java error log output (#35998)

2024-06-29 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 66ca7e94c834a0ebdd3ea9976c7685bd87d932d5
Author: wuwenchi 
AuthorDate: Fri Jun 28 21:56:43 2024 +0800

[bugfix](testcase)add java error log output (#35998)

In the testcase pipeline, this spark-connector case has a 4% chance of
error, but since there is no error log, the cause of the error cannot be
located.
Therefore, an error log is added to facilitate problem location later.
---
 .../suites/connector_p0/spark_connector/spark_connector.groovy | 10 --
 .../connector_p0/spark_connector/spark_connector_arrow.groovy  | 10 --
 .../spark_connector/spark_connector_read_type.groovy   | 10 --
 3 files changed, 24 insertions(+), 6 deletions(-)

diff --git 
a/regression-test/suites/connector_p0/spark_connector/spark_connector.groovy 
b/regression-test/suites/connector_p0/spark_connector/spark_connector.groovy
index ecd4e6dfc14..2bd618fcc3c 100644
--- a/regression-test/suites/connector_p0/spark_connector/spark_connector.groovy
+++ b/regression-test/suites/connector_p0/spark_connector/spark_connector.groovy
@@ -28,7 +28,13 @@ suite("spark_connector", "connector") {
 logger.info("finish download spark doris demo ...")
 def run_cmd = "java -jar spark-doris-demo.jar 
$context.config.feHttpAddress $context.config.feHttpUser 
regression_test_connector_p0_spark_connector.$tableName"
 logger.info("run_cmd : $run_cmd")
-def run_spark_jar = run_cmd.execute().getText()
-logger.info("result: $run_spark_jar")
+def proc = run_cmd.execute()
+def sout = new StringBuilder()
+def serr = new StringBuilder()
+proc.consumeProcessOutput(sout, serr)
+proc.waitForOrKill(1200_000)
+if (proc.exitValue() != 0) {
+  logger.warn("failed to execute jar: code=${proc.exitValue()}, " + 
"output: ${sout.toString()}, error: ${serr.toString()}")
+}
 qt_select """ select * from $tableName order by order_id"""
 }
diff --git 
a/regression-test/suites/connector_p0/spark_connector/spark_connector_arrow.groovy
 
b/regression-test/suites/connector_p0/spark_connector/spark_connector_arrow.groovy
index 1cd2ed31d2e..a5fbc3b2835 100644
--- 
a/regression-test/suites/connector_p0/spark_connector/spark_connector_arrow.groovy
+++ 
b/regression-test/suites/connector_p0/spark_connector/spark_connector_arrow.groovy
@@ -136,8 +136,14 @@ suite("spark_connector_for_arrow", "connector") {
 logger.info("finish download spark doris demo ...")
 def run_cmd = "java -cp ${jar_name} 
org.apache.doris.spark.testcase.TestStreamLoadForArrowType 
$context.config.feHttpAddress $context.config.feHttpUser 
regression_test_connector_p0_spark_connector"
 logger.info("run_cmd : $run_cmd")
-def run_spark_jar = run_cmd.execute().getText()
-logger.info("result: $run_spark_jar")
+def proc = run_cmd.execute()
+def sout = new StringBuilder()
+def serr = new StringBuilder()
+proc.consumeProcessOutput(sout, serr)
+proc.waitForOrKill(1200_000)
+if (proc.exitValue() != 0) {
+  logger.warn("failed to execute jar: code=${proc.exitValue()}, " + 
"output: ${sout.toString()}, error: ${serr.toString()}")
+}
 
 qt_q01 """ select * from spark_connector_primitive """
 qt_q02 """ select * from spark_connector_array """
diff --git 
a/regression-test/suites/connector_p0/spark_connector/spark_connector_read_type.groovy
 
b/regression-test/suites/connector_p0/spark_connector/spark_connector_read_type.groovy
index 32a3ebf68c7..632e5e3d401 100644
--- 
a/regression-test/suites/connector_p0/spark_connector/spark_connector_read_type.groovy
+++ 
b/regression-test/suites/connector_p0/spark_connector/spark_connector_read_type.groovy
@@ -99,8 +99,14 @@ suite("spark_connector_read_type", "connector") {
 logger.info("finish download spark doris demo ...")
 def run_cmd = "java -jar spark-doris-read.jar 
$context.config.feHttpAddress $context.config.feHttpUser 
regression_test_connector_p0_spark_connector.$tableReadName 
regression_test_connector_p0_spark_connector.$tableWriterName"
 logger.info("run_cmd : $run_cmd")
-def run_spark_jar = run_cmd.execute().getText()
-logger.info("result: $run_spark_jar")
+def proc = run_cmd.execute()
+def sout = new StringBuilder()
+def serr = new StringBuilder()
+proc.consumeProcessOutput(sout, serr)
+proc.waitForOrKill(1200_000)
+if (proc.exitValue() != 0) {
+  logger.warn("failed to execute jar: code=${proc.exitValue()}, " + 
"output: ${sout.toString()}, error: ${serr.toString()}")
+}
 
 qt_select """ select * from $tableWriterName order by id"""
 


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) 21/21: [improvement](segmentcache) limit segment cache by memory or segment num (#37026)

2024-06-29 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 3df52cbf7610dc662ec525281b3a04eba360d219
Author: Yongqiang YANG <98214048+dataroar...@users.noreply.github.com>
AuthorDate: Sun Jun 30 12:57:14 2024 +0800

[improvement](segmentcache) limit segment cache by memory or segment num 
(#37026)

Also enlarge columns per segment.
---
 be/src/common/config.cpp   |  2 +-
 be/src/olap/lru_cache.cpp  |  5 +
 be/src/olap/lru_cache.h|  4 +++-
 be/src/olap/segment_loader.h   | 13 -
 be/src/runtime/exec_env_init.cpp   |  8 
 be/test/testutil/run_all_tests.cpp |  2 +-
 6 files changed, 22 insertions(+), 12 deletions(-)

diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index 580793d36ab..9df75b97bd6 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -1068,7 +1068,7 @@ DEFINE_mInt32(schema_cache_sweep_time_sec, "100");
 
 // max number of segment cache, default -1 for backward compatibility 
fd_number*2/5
 DEFINE_mInt32(segment_cache_capacity, "-1");
-DEFINE_mInt32(estimated_num_columns_per_segment, "30");
+DEFINE_mInt32(estimated_num_columns_per_segment, "200");
 DEFINE_mInt32(estimated_mem_per_column_reader, "1024");
 // The value is calculate by storage_page_cache_limit * 
index_page_cache_percentage
 DEFINE_mInt32(segment_cache_memory_percentage, "2");
diff --git a/be/src/olap/lru_cache.cpp b/be/src/olap/lru_cache.cpp
index 031082f6da8..741c2423915 100644
--- a/be/src/olap/lru_cache.cpp
+++ b/be/src/olap/lru_cache.cpp
@@ -22,6 +22,7 @@ namespace doris {
 
 DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(cache_capacity, MetricUnit::BYTES);
 DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(cache_usage, MetricUnit::BYTES);
+DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(cache_element_count, MetricUnit::NOUNIT);
 DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(cache_usage_ratio, MetricUnit::NOUNIT);
 DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(cache_lookup_count, 
MetricUnit::OPERATIONS);
 DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(cache_hit_count, MetricUnit::OPERATIONS);
@@ -542,6 +543,7 @@ ShardedLRUCache::ShardedLRUCache(const std::string& name, 
size_t total_capacity,
 _entity->register_hook(name, 
std::bind(&ShardedLRUCache::update_cache_metrics, this));
 INT_GAUGE_METRIC_REGISTER(_entity, cache_capacity);
 INT_GAUGE_METRIC_REGISTER(_entity, cache_usage);
+INT_GAUGE_METRIC_REGISTER(_entity, cache_element_count);
 INT_DOUBLE_METRIC_REGISTER(_entity, cache_usage_ratio);
 INT_ATOMIC_COUNTER_METRIC_REGISTER(_entity, cache_lookup_count);
 INT_ATOMIC_COUNTER_METRIC_REGISTER(_entity, cache_hit_count);
@@ -640,15 +642,18 @@ void ShardedLRUCache::update_cache_metrics() const {
 size_t total_usage = 0;
 size_t total_lookup_count = 0;
 size_t total_hit_count = 0;
+size_t total_element_count = 0;
 for (int i = 0; i < _num_shards; i++) {
 total_capacity += _shards[i]->get_capacity();
 total_usage += _shards[i]->get_usage();
 total_lookup_count += _shards[i]->get_lookup_count();
 total_hit_count += _shards[i]->get_hit_count();
+total_element_count += _shards[i]->get_element_count();
 }
 
 cache_capacity->set_value(total_capacity);
 cache_usage->set_value(total_usage);
+cache_element_count->set_value(total_element_count);
 cache_lookup_count->set_value(total_lookup_count);
 cache_hit_count->set_value(total_hit_count);
 cache_usage_ratio->set_value(total_capacity == 0 ? 0 : 
((double)total_usage / total_capacity));
diff --git a/be/src/olap/lru_cache.h b/be/src/olap/lru_cache.h
index 50676921044..059020deab5 100644
--- a/be/src/olap/lru_cache.h
+++ b/be/src/olap/lru_cache.h
@@ -60,7 +60,7 @@ enum LRUCacheType {
 };
 
 static constexpr LRUCacheType DEFAULT_LRU_CACHE_TYPE = LRUCacheType::SIZE;
-static constexpr uint32_t DEFAULT_LRU_CACHE_NUM_SHARDS = 16;
+static constexpr uint32_t DEFAULT_LRU_CACHE_NUM_SHARDS = 32;
 static constexpr size_t DEFAULT_LRU_CACHE_ELEMENT_COUNT_CAPACITY = 0;
 
 class CacheKey {
@@ -349,6 +349,7 @@ public:
 uint64_t get_hit_count() const { return _hit_count; }
 size_t get_usage() const { return _usage; }
 size_t get_capacity() const { return _capacity; }
+size_t get_element_count() const { return _table.element_count(); }
 
 private:
 void _lru_remove(LRUHandle* e);
@@ -433,6 +434,7 @@ private:
 std::shared_ptr _entity;
 IntGauge* cache_capacity = nullptr;
 IntGauge* cache_usage = nullptr;
+IntGauge* cache_element_count = nullptr;
 DoubleGauge* cache_usage_ratio = nullptr;
 IntAtomicCounter* cache_lookup_count = nullptr;
 IntAtomicCounter* cache_hit_count = nullptr;
diff --git a/be/src/olap/segment_loader.h b/be/src/olap/segment_loader.h
index 4d1f3f7a910..5bb8fae3c41 100644
--- a/be/src/olap/segment_loader.h
+++ b/be/src/olap/segment_loader.h
@@ -80,10 +80,11 @@ public:
   

(doris) 17/21: (cloud-merge) Get fileCacheSize by RPC always (#36857)

2024-06-29 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 9255c366ec145a2692bd79731b7974f0f1048aa4
Author: Lightman <31928846+lchangli...@users.noreply.github.com>
AuthorDate: Sat Jun 29 09:25:40 2024 +0800

(cloud-merge) Get fileCacheSize by RPC always (#36857)

Get fileCacheSize every time when do warm up job.
---
 .../apache/doris/cloud/CacheHotspotManager.java| 39 ++
 .../main/java/org/apache/doris/system/Backend.java | 10 --
 2 files changed, 18 insertions(+), 31 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/cloud/CacheHotspotManager.java 
b/fe/fe-core/src/main/java/org/apache/doris/cloud/CacheHotspotManager.java
index 304f76dee48..4f359446aad 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/cloud/CacheHotspotManager.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/cloud/CacheHotspotManager.java
@@ -152,7 +152,6 @@ public class CacheHotspotManager extends MasterDaemon {
 TGetTopNHotPartitionsResponse resp = respPair.first;
 if (resp.isSetHotTables()) {
 resp.getHotTables().forEach((THotTableMessage hotTable) -> 
{
-
respPair.second.setfileCacheCapacityBytes(resp.file_cache_size);
 if (hotTable.isSetHotPartitions()) {
 hotTable.hot_partitions.forEach((THotPartition 
partition) -> {
 insertIntoTable(clusterToBeList.getKey(), 
hotTable.table_id,
@@ -337,26 +336,24 @@ public class CacheHotspotManager extends MasterDaemon {
 .getBackendsByClusterName(clusterName);
 Long totalFileCache = 0L;
 for (Backend backend : backends) {
-Long fileCacheSize = backend.getfileCacheCapactiyBytes();
-if (fileCacheSize == 0) {
-boolean ok = false;
-BackendService.Client client = null;
-TNetworkAddress address = null;
-try {
-address = new TNetworkAddress(backend.getHost(), 
backend.getBePort());
-client = ClientPool.backendPool.borrowObject(address);
-TGetTopNHotPartitionsResponse resp = 
client.getTopNHotPartitions(
-new TGetTopNHotPartitionsRequest());
-fileCacheSize = resp.file_cache_size;
-ok = true;
-} catch (Exception e) {
-throw new RuntimeException(e);
-} finally {
-if (ok) {
-ClientPool.backendPool.returnObject(address, client);
-} else {
-ClientPool.backendPool.invalidateObject(address, 
client);
-}
+Long fileCacheSize = 0L;
+boolean ok = false;
+BackendService.Client client = null;
+TNetworkAddress address = null;
+try {
+address = new TNetworkAddress(backend.getHost(), 
backend.getBePort());
+client = ClientPool.backendPool.borrowObject(address);
+TGetTopNHotPartitionsResponse resp = 
client.getTopNHotPartitions(
+new TGetTopNHotPartitionsRequest());
+fileCacheSize = resp.file_cache_size;
+ok = true;
+} catch (Exception e) {
+throw new RuntimeException(e);
+} finally {
+if (ok) {
+ClientPool.backendPool.returnObject(address, client);
+} else {
+ClientPool.backendPool.invalidateObject(address, client);
 }
 }
 totalFileCache += fileCacheSize;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/system/Backend.java 
b/fe/fe-core/src/main/java/org/apache/doris/system/Backend.java
index 902b90843b6..a366aca5d6b 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/system/Backend.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/system/Backend.java
@@ -148,8 +148,6 @@ public class Backend implements Writable {
 // send some queries to this BE, it is not an important problem.
 private AtomicBoolean isShutDown = new AtomicBoolean(false);
 
-private long fileCacheCapactiyBytes = 0;
-
 public Backend() {
 this.host = "";
 this.version = "";
@@ -241,14 +239,6 @@ public class Backend implements Writable {
 return heartbeatPort;
 }
 
-public void setfileCacheCapacityBytes(long fileCacheCapactiyBytes) {
-this.fileCacheCapactiyBytes = fileCacheCapactiyBytes;
-}
-
-public long getfileCacheCapactiyBytes() {
-return fileCacheCapactiyBytes;
-}
-
 public int getHttpPort() {
 return httpPort;
 }


--

(doris) 12/21: [fix](statistics)Escape fetch partition stats sql. (#36941)

2024-06-29 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 74eaf05cb855d4b3a1c0b2b18082839a503715da
Author: Jibing-Li <64681310+jibing...@users.noreply.github.com>
AuthorDate: Fri Jun 28 20:51:52 2024 +0800

[fix](statistics)Escape fetch partition stats sql. (#36941)

Sql to fetch partition stats need to be escaped.
---
 .../apache/doris/statistics/AnalysisManager.java   |  2 +-
 .../PartitionColumnStatisticCacheLoader.java   |  2 +-
 .../doris/statistics/StatisticsRepository.java |  6 ++---
 .../suites/statistics/test_partition_stats.groovy  | 27 ++
 4 files changed, 32 insertions(+), 5 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
index 22bc11971c2..1e95adf1714 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java
@@ -788,7 +788,7 @@ public class AnalysisManager implements Writable {
 StringBuilder partNamePredicate = new StringBuilder();
 while (iterator.hasNext()) {
 partNamePredicate.append("'");
-partNamePredicate.append(iterator.next());
+
partNamePredicate.append(StatisticsUtil.escapeSQL(iterator.next()));
 partNamePredicate.append("'");
 partNamePredicate.append(",");
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/PartitionColumnStatisticCacheLoader.java
 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/PartitionColumnStatisticCacheLoader.java
index ac807a0d5f7..c365f6b1a74 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/PartitionColumnStatisticCacheLoader.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/PartitionColumnStatisticCacheLoader.java
@@ -57,7 +57,7 @@ public class PartitionColumnStatisticCacheLoader extends
 private Optional 
loadFromPartitionStatsTable(PartitionColumnStatisticCacheKey key) {
 List partitionResults;
 try {
-String partName = "'" + key.partId + "'";
+String partName = "'" + StatisticsUtil.escapeSQL(key.partId) + "'";
 partitionResults = StatisticsRepository.loadPartitionColumnStats(
 key.catalogId, key.dbId, key.tableId, key.idxId, partName, 
key.colName);
 } catch (InternalQueryExecutionException e) {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
index bbcc9de3a28..81192762f99 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java
@@ -144,12 +144,12 @@ public class StatisticsRepository {
 params.put("tableId", String.valueOf(table.getId()));
 StringJoiner sj = new StringJoiner(",");
 for (String colName : columnNames) {
-sj.add("'" + colName + "'");
+sj.add("'" + StatisticsUtil.escapeSQL(colName) + "'");
 }
 params.put("columnInfo", sj.toString());
 sj = new StringJoiner(",");
 for (String part : partitionNames) {
-sj.add("'" + part + "'");
+sj.add("'" + StatisticsUtil.escapeSQL(part) + "'");
 }
 params.put("partitionInfo", sj.toString());
 return 
StatisticsUtil.executeQuery(FETCH_PARTITIONS_STATISTIC_TEMPLATE, params);
@@ -418,7 +418,7 @@ public class StatisticsRepository {
 params.put("tableId", String.valueOf(tableId));
 params.put("indexId", String.valueOf(idxId));
 params.put("partName", partName);
-params.put("columnId", colName);
+params.put("columnId", StatisticsUtil.escapeSQL(colName));
 return StatisticsUtil.execStatisticQuery(new StringSubstitutor(params)
 .replace(FETCH_PARTITION_STATISTIC_TEMPLATE));
 }
diff --git a/regression-test/suites/statistics/test_partition_stats.groovy 
b/regression-test/suites/statistics/test_partition_stats.groovy
index 7658d50c47e..05216477323 100644
--- a/regression-test/suites/statistics/test_partition_stats.groovy
+++ b/regression-test/suites/statistics/test_partition_stats.groovy
@@ -928,6 +928,33 @@ suite("test_partition_stats") {
 assertEquals("1", result[0][7])
 assertEquals("20004", result[0][8])
 
+// Test escape special col name.
+sql """
+create table part9(
+k int null,
+v variant null
+)
+duplicate key (k)
+PARTITION BY RANGE(`k`)
+(
+PARTITION p1 VALUES [("0"), ("2")),
+PARTITION p2 VALUES [("2"), ("4")),
+PARTITION p3 VALUES [("4"), 

(doris) 11/21: [improvement](jdbc catalog) Modify the maximum number of connections in the connection pool to 30 by default (#36720)

2024-06-29 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 1cbc322d0c548d04cde2dd52ed7a462072a36af4
Author: zy-kkk 
AuthorDate: Fri Jun 28 20:13:34 2024 +0800

[improvement](jdbc catalog) Modify the maximum number of connections in the 
connection pool to 30 by default (#36720)

In many cases, we found that users would use JDBC Catalog to perform a
large number of queries, which resulted in the maximum of 10 connections
being insufficient, so I adjusted it to 30, which covered most needs.
---
 .../src/main/java/org/apache/doris/jdbc/JdbcDataSourceConfig.java | 2 +-
 fe/fe-core/src/main/java/org/apache/doris/catalog/JdbcResource.java   | 2 +-
 .../src/test/java/org/apache/doris/catalog/JdbcResourceTest.java  | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git 
a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/JdbcDataSourceConfig.java
 
b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/JdbcDataSourceConfig.java
index 5fdbc211ab0..a99377add25 100644
--- 
a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/JdbcDataSourceConfig.java
+++ 
b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/JdbcDataSourceConfig.java
@@ -31,7 +31,7 @@ public class JdbcDataSourceConfig {
 private TJdbcOperation op;
 private TOdbcTableType tableType;
 private int connectionPoolMinSize = 1;
-private int connectionPoolMaxSize = 10;
+private int connectionPoolMaxSize = 30;
 private int connectionPoolMaxWaitTime = 5000;
 private int connectionPoolMaxLifeTime = 180;
 private boolean connectionPoolKeepAlive = false;
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/JdbcResource.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/JdbcResource.java
index 3878500f917..1db801b024a 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/JdbcResource.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/JdbcResource.java
@@ -140,7 +140,7 @@ public class JdbcResource extends Resource {
 OPTIONAL_PROPERTIES_DEFAULT_VALUE.put(INCLUDE_DATABASE_LIST, "");
 OPTIONAL_PROPERTIES_DEFAULT_VALUE.put(EXCLUDE_DATABASE_LIST, "");
 OPTIONAL_PROPERTIES_DEFAULT_VALUE.put(CONNECTION_POOL_MIN_SIZE, "1");
-OPTIONAL_PROPERTIES_DEFAULT_VALUE.put(CONNECTION_POOL_MAX_SIZE, "10");
+OPTIONAL_PROPERTIES_DEFAULT_VALUE.put(CONNECTION_POOL_MAX_SIZE, "30");
 OPTIONAL_PROPERTIES_DEFAULT_VALUE.put(CONNECTION_POOL_MAX_LIFE_TIME, 
"180");
 OPTIONAL_PROPERTIES_DEFAULT_VALUE.put(CONNECTION_POOL_MAX_WAIT_TIME, 
"5000");
 OPTIONAL_PROPERTIES_DEFAULT_VALUE.put(CONNECTION_POOL_KEEP_ALIVE, 
"false");
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/catalog/JdbcResourceTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/catalog/JdbcResourceTest.java
index 8e004d4b236..81c2157686a 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/catalog/JdbcResourceTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/JdbcResourceTest.java
@@ -87,7 +87,7 @@ public class JdbcResourceTest {
 // Verify the default properties were applied during the replay
 Map properties = jdbcResource.getCopiedProperties();
 Assert.assertEquals("1", properties.get("connection_pool_min_size"));
-Assert.assertEquals("10", properties.get("connection_pool_max_size"));
+Assert.assertEquals("30", properties.get("connection_pool_max_size"));
 Assert.assertEquals("180", 
properties.get("connection_pool_max_life_time"));
 Assert.assertEquals("5000", 
properties.get("connection_pool_max_wait_time"));
 Assert.assertEquals("false", 
properties.get("connection_pool_keep_alive"));
@@ -110,7 +110,7 @@ public class JdbcResourceTest {
 // Verify the default properties were applied during the replay
 Map properties = 
replayedResource.getCopiedProperties();
 Assert.assertEquals("1", properties.get("connection_pool_min_size"));
-Assert.assertEquals("10", properties.get("connection_pool_max_size"));
+Assert.assertEquals("30", properties.get("connection_pool_max_size"));
 Assert.assertEquals("180", 
properties.get("connection_pool_max_life_time"));
 Assert.assertEquals("5000", 
properties.get("connection_pool_max_wait_time"));
 Assert.assertEquals("false", 
properties.get("connection_pool_keep_alive"));


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) 09/21: [enhancement](cloud) batching get visible version from MetaService (#34615)

2024-06-29 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit c0591389cc706ea46dac72438e48e1ab42bd587e
Author: zhengyu 
AuthorDate: Fri Jun 28 19:05:58 2024 +0800

[enhancement](cloud) batching get visible version from MetaService (#34615)

Get visible versions one by one from MetaService is costly. Batching
them into one RPC will not only reduce the workload of RPC service but
also reduce the lag.
---
 cloud/src/meta-service/meta_service.cpp|  3 +++
 cloud/src/meta-service/meta_service_txn.cpp|  6 +-
 .../java/org/apache/doris/catalog/OlapTable.java   | 15 ++
 .../java/org/apache/doris/catalog/Partition.java   |  7 +++
 .../apache/doris/cloud/catalog/CloudPartition.java | 24 --
 .../cloud/datasource/CloudInternalCatalog.java |  4 
 .../transaction/CloudGlobalTransactionMgr.java |  4 +++-
 .../doris/common/NereidsSqlCacheManager.java   |  9 
 .../org/apache/doris/nereids/SqlCacheContext.java  |  1 -
 .../org/apache/doris/qe/cache/CacheAnalyzer.java   | 11 ++
 gensrc/proto/cloud.proto   |  3 +++
 11 files changed, 66 insertions(+), 21 deletions(-)

diff --git a/cloud/src/meta-service/meta_service.cpp 
b/cloud/src/meta-service/meta_service.cpp
index b234a48d88a..0bb1fb5f277 100644
--- a/cloud/src/meta-service/meta_service.cpp
+++ b/cloud/src/meta-service/meta_service.cpp
@@ -269,6 +269,7 @@ void 
MetaServiceImpl::get_version(::google::protobuf::RpcController* controller,
 return;
 }
 response->set_version(version_pb.version());
+response->add_version_update_time_ms(version_pb.update_time_ms());
 }
 { TEST_SYNC_POINT_CALLBACK("get_version_code", &code); }
 return;
@@ -373,6 +374,7 @@ void 
MetaServiceImpl::batch_get_version(::google::protobuf::RpcController* contr
 if (!value.has_value()) {
 // return -1 if the target version is not exists.
 response->add_versions(-1);
+response->add_version_update_time_ms(-1);
 } else if (is_table_version) {
 int64_t version = 0;
 if (!txn->decode_atomic_int(*value, &version)) {
@@ -389,6 +391,7 @@ void 
MetaServiceImpl::batch_get_version(::google::protobuf::RpcController* contr
 break;
 }
 response->add_versions(version_pb.version());
+
response->add_version_update_time_ms(version_pb.update_time_ms());
 }
 }
 }
diff --git a/cloud/src/meta-service/meta_service_txn.cpp 
b/cloud/src/meta-service/meta_service_txn.cpp
index 729696f8e2f..69fecc80788 100644
--- a/cloud/src/meta-service/meta_service_txn.cpp
+++ b/cloud/src/meta-service/meta_service_txn.cpp
@@ -1046,10 +1046,14 @@ void 
MetaServiceImpl::commit_txn(::google::protobuf::RpcController* controller,
 }
 
 // Save versions
+int64_t version_update_time_ms =
+
duration_cast(system_clock::now().time_since_epoch()).count();
+response->set_version_update_time_ms(version_update_time_ms);
 for (auto& i : new_versions) {
 std::string ver_val;
 VersionPB version_pb;
 version_pb.set_version(i.second);
+version_pb.set_update_time_ms(version_update_time_ms);
 if (!version_pb.SerializeToString(&ver_val)) {
 code = MetaServiceCode::PROTOBUF_SERIALIZE_ERR;
 ss << "failed to serialize version_pb when saving, txn_id=" << 
txn_id;
@@ -1059,7 +1063,7 @@ void 
MetaServiceImpl::commit_txn(::google::protobuf::RpcController* controller,
 
 txn->put(i.first, ver_val);
 LOG(INFO) << "xxx put partition_version_key=" << hex(i.first) << " 
version:" << i.second
-  << " txn_id=" << txn_id;
+  << " txn_id=" << txn_id << " update_time=" << 
version_update_time_ms;
 
 std::string_view ver_key = i.first;
 ver_key.remove_prefix(1); // Remove key space
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
index f5e3cc90e85..d5b1c258c5d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java
@@ -1149,6 +1149,21 @@ public class OlapTable extends Table implements 
MTMVRelatedTableIf, GsonPostProc
 return partition;
 }
 
+public void getVersionInBatchForCloudMode(Collection partitionIds) {
+if (Config.isCloudMode()) { // do nothing for non-cloud mode
+List partitions = partitionIds.stream()
+.sorted()
+.map(this::getPartition)
+.map(partition -> (Cloud

(doris) 06/21: [Fix](variant) ignore serialization of nothing type (#36997)

2024-06-29 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 15f162f69054ff62464f3cf1710ea6114fdbf438
Author: lihangyu <15605149...@163.com>
AuthorDate: Fri Jun 28 18:23:35 2024 +0800

[Fix](variant) ignore serialization of nothing type (#36997)

1. fix variant should not serialize nothing type
2. fix unstable cases
---
 be/src/vec/data_types/data_type_object.cpp   | 14 +++---
 regression-test/data/variant_p0/load.out | 12 ++--
 .../data/variant_p0/test_sub_path_pruning.out| 10 +-
 regression-test/suites/variant_p0/load.groovy|  8 
 .../suites/variant_p0/test_sub_path_pruning.groovy   | 20 +++-
 5 files changed, 33 insertions(+), 31 deletions(-)

diff --git a/be/src/vec/data_types/data_type_object.cpp 
b/be/src/vec/data_types/data_type_object.cpp
index 7adc4c17f56..7a75583cd7b 100644
--- a/be/src/vec/data_types/data_type_object.cpp
+++ b/be/src/vec/data_types/data_type_object.cpp
@@ -63,6 +63,9 @@ int64_t 
DataTypeObject::get_uncompressed_serialized_bytes(const IColumn& column,
 size += sizeof(uint32_t);
 for (const auto& entry : subcolumns) {
 auto type = entry->data.get_least_common_type();
+if (is_nothing(type)) {
+continue;
+}
 
 PColumnMeta column_meta_pb;
 column_meta_pb.set_name(entry->path.get_path());
@@ -91,15 +94,18 @@ char* DataTypeObject::serialize(const IColumn& column, 
char* buf, int be_exec_ve
 
 const auto& subcolumns = column_object.get_subcolumns();
 
-// 1. serialize num of subcolumns
-*reinterpret_cast(buf) = subcolumns.size();
+char* size_pos = buf;
 buf += sizeof(uint32_t);
 
+size_t num_of_columns = 0;
 // 2. serialize each subcolumn in a loop
 for (const auto& entry : subcolumns) {
 // 2.1 serialize subcolumn column meta pb (path and type)
 auto type = entry->data.get_least_common_type();
-
+if (is_nothing(type)) {
+continue;
+}
+++num_of_columns;
 PColumnMeta column_meta_pb;
 column_meta_pb.set_name(entry->path.get_path());
 type->to_pb_column_meta(&column_meta_pb);
@@ -113,6 +119,8 @@ char* DataTypeObject::serialize(const IColumn& column, 
char* buf, int be_exec_ve
 // 2.2 serialize subcolumn
 buf = type->serialize(entry->data.get_finalized_column(), buf, 
be_exec_version);
 }
+// serialize num of subcolumns
+*reinterpret_cast(size_pos) = num_of_columns;
 
 return buf;
 }
diff --git a/regression-test/data/variant_p0/load.out 
b/regression-test/data/variant_p0/load.out
index ab83a86cfb3..954faffccb0 100644
--- a/regression-test/data/variant_p0/load.out
+++ b/regression-test/data/variant_p0/load.out
@@ -79,11 +79,11 @@
 -- !sql --
 {"c":"123"}
 {"c":123}
-{"cc":[123.0]}
+{"cc":[123.2]}
 {"cc":[123.1]}
 {"ccc":123}
 {"ccc":123321}
-{"":123.0}
+{"":123.22}
 {"":123.11}
 {"c":[123]}
 {"c":[123456789]}
@@ -123,7 +123,7 @@
 1.101111800
 1. 17211
 \N 123456
-123191191
+123.22 191191
 \N 123456789101112
 
 -- !sql_7 --
@@ -155,7 +155,7 @@
 123{"A":123}
 123456 {"A":123456}
 123456789101112{"A":123456789101112}
-191191 {"A":191191,"a":123.0,"c":123}
+191191 {"A":191191,"a":123.22,"c":123}
 1800   {"A":1800,"a":1.10111,"c":[12345]}
 17211  {"A":17211,"a":1.,"c":11}
 
@@ -175,7 +175,7 @@
 \N 123456789101112 {"A":123456789101112}   \N
 \N \N  {"AA":[123456]} \N
 \N \N  {"AA":[123456789101112]}\N
-123191191  {"A":191191,"a":123.0,"c":123}  \N
+123.22 191191  {"A":191191,"a":123.22,"c":123} \N
 123\N  {"a":"123","c":123456}  \N
 1.101111800{"A":1800,"a":1.10111,"c":[12345]}  \N
 1. 17211   {"A":17211,"a":1.,"c":11}   \N
@@ -212,7 +212,7 @@
 [123]
 
 -- !sql_25 --
-5  54999.862   615
+5  54999.9935  615
 
 -- !sql_26 --
 5000
diff --git a/regression-test/data/variant_p0/test_sub_path_pruning.out 
b/regression-test/data/variant_p0/test_sub_path_pruning.out
index a48bc550d00..16328739167 100644
--- a/regression-test/data/variant_p0/test_sub_path_pruning.out
+++ b/regression-test/data/variant_p0/test_sub_path_pruning.out
@@ -233,11 +233,7 @@
 {"b":{"c":{"d":{"e":11}}},"c":{"d":{"e":12}},"d":{"e":13},"e":14}
 
 -- !sql --
-
-{"c":{"d":{"e":11}}}
-
--- !sql --
-""
+1
 {"c":{"d":{"e":11}}}
 
 -- !sql --
@@ -252,10 +248,6 @@
 \N
 
 
--- !sql --
-""
-{"e":11}
-
 -- !sql --
 1  1
 2  1
diff --git a/regression-test/suites/variant_p0/load.groovy 
b/regression-test/suites/variant_p0/load.groovy
index 4103a2b34fa..cbd6bc1178c 100644
--- a/regression-test/suites/variant_p0/load.groovy
+++ b/regression-test/suites/variant_p0/load.groovy
@@ -93,7 +93,7 @@ suite("regression_test_variant", "nonConcurrent"){
   

(doris) 08/21: [chore](upgrade) turn off fallback_to_original_planner when upgrade (#37005)

2024-06-29 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit fee6a24ef645c6db7cefeffe6b96c4496e4054ae
Author: morrySnow <101034200+morrys...@users.noreply.github.com>
AuthorDate: Fri Jun 28 18:56:32 2024 +0800

[chore](upgrade) turn off fallback_to_original_planner when upgrade (#37005)

upgrade from 2.1 or ealier version to 3.0.x will

- enable_nereids_planner to true
- enable_nereids_dml to true
- enable_fallback_to_original_planner to false
---
 fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java
index a3da3482f77..cda6f3b4602 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java
@@ -1590,8 +1590,15 @@ public class Env {
 SessionVariable.NEREIDS_TIMEOUT_SECOND, "30");
 }
 }
-if (journalVersion <= FeMetaVersion.VERSION_133) {
-VariableMgr.refreshDefaultSessionVariables("2.0 to 2.1",
+if (journalVersion <= FeMetaVersion.VERSION_129) {
+VariableMgr.refreshDefaultSessionVariables("2.1 to 3.0", 
SessionVariable.ENABLE_NEREIDS_PLANNER,
+"true");
+VariableMgr.refreshDefaultSessionVariables("2.1 to 3.0", 
SessionVariable.ENABLE_NEREIDS_DML,
+"true");
+VariableMgr.refreshDefaultSessionVariables("2.1 to 3.0",
+
SessionVariable.ENABLE_FALLBACK_TO_ORIGINAL_PLANNER,
+"false");
+VariableMgr.refreshDefaultSessionVariables("2.1 to 3.0",
 SessionVariable.ENABLE_MATERIALIZED_VIEW_REWRITE,
 "true");
 }


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) 20/21: [fix](cgroup memory) Correct cgroup mem info cache (#36966)

2024-06-29 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 848fda6afd31e94f906ddba90625fc1d6e391cf2
Author: Hongkun Xu 
AuthorDate: Sun Jun 30 12:56:14 2024 +0800

[fix](cgroup memory) Correct cgroup mem info cache (#36966)

## Proposed changes
After upgrading to Doris 2.1.3, we noticed that the "sys available
memory" in be.INFO continuously decreases until it falls below the
warning water mark, leading to persistent garbage collection (GC)
despite the actual memory usage being very low. And The cache in cgroup
mem info is always 0. Consequently, I identified an error in the
calculation of available memory in cgroup memory:

1. The memory information for cgroup memory is stored in the file
"memory.stat" rather than "memory.meminfo" (in fact, the
"memory.meminfo" file does not exist). You can see the files under the
cgroup path in the attached screenshot1.
2. The output content of "memory.stat" is shown in the screenshot1
below.

https://github.com/apache/doris/assets/38196564/e654322e-9bf4-4f5e-951f-99e101ebbf47";>
https://github.com/apache/doris/assets/38196564/02cf8899-7618-4d5f-bf59-68fa0c90ebf2";>



My change is about two steps:
1. Modified the file name for mem info in cgroup.
2. Modified the process for extracting the cache from cgroup.

Co-authored-by: Xinyi Zou 
---
 be/src/util/cgroup_util.cpp | 2 +-
 be/src/util/mem_info.cpp| 8 
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/be/src/util/cgroup_util.cpp b/be/src/util/cgroup_util.cpp
index a2c3e294e66..9ad78696a6f 100644
--- a/be/src/util/cgroup_util.cpp
+++ b/be/src/util/cgroup_util.cpp
@@ -184,7 +184,7 @@ Status CGroupUtil::find_cgroup_mem_info(std::string* 
file_path) {
 }
 string cgroup_path;
 RETURN_IF_ERROR(find_abs_cgroup_path("memory", &cgroup_path));
-*file_path = cgroup_path + "/memory.meminfo";
+*file_path = cgroup_path + "/memory.stat";
 return Status::OK();
 }
 
diff --git a/be/src/util/mem_info.cpp b/be/src/util/mem_info.cpp
index fc5d5512f1a..45e609d7100 100644
--- a/be/src/util/mem_info.cpp
+++ b/be/src/util/mem_info.cpp
@@ -154,7 +154,7 @@ void MemInfo::refresh_proc_meminfo() {
 if (fields.size() < 2) {
 continue;
 }
-std::string key = fields[0].substr(0, fields[0].size() - 1);
+std::string key = fields[0].substr(0, fields[0].size());
 
 StringParser::ParseResult result;
 auto mem_value = 
StringParser::string_to_int(fields[1].data(),
@@ -180,19 +180,19 @@ void MemInfo::refresh_proc_meminfo() {
 // 
https://serverfault.com/questions/902009/the-memory-usage-reported-in-cgroup-differs-from-the-free-command
 // memory.usage_in_bytes ~= free.used + free.(buff/cache) - (buff)
 // so, memory.usage_in_bytes - memory.meminfo["Cached"]
-_s_cgroup_mem_usage = cgroup_mem_usage - 
_s_cgroup_mem_info_bytes["Cached"];
+_s_cgroup_mem_usage = cgroup_mem_usage - 
_s_cgroup_mem_info_bytes["cache"];
 // wait 10s, 100 * 100ms, avoid too frequently.
 _s_cgroup_mem_refresh_wait_times = -100;
 LOG(INFO) << "Refresh cgroup memory win, refresh again after 10s, 
cgroup mem limit: "
   << _s_cgroup_mem_limit << ", cgroup mem usage: " << 
_s_cgroup_mem_usage
-  << ", cgroup mem info cached: " << 
_s_cgroup_mem_info_bytes["Cached"];
+  << ", cgroup mem info cached: " << 
_s_cgroup_mem_info_bytes["cache"];
 } else {
 // find cgroup failed, wait 300s, 1000 * 100ms.
 _s_cgroup_mem_refresh_wait_times = -3000;
 LOG(INFO)
 << "Refresh cgroup memory failed, refresh again after 
300s, cgroup mem limit: "
 << _s_cgroup_mem_limit << ", cgroup mem usage: " << 
_s_cgroup_mem_usage
-<< ", cgroup mem info cached: " << 
_s_cgroup_mem_info_bytes["Cached"];
+<< ", cgroup mem info cached: " << 
_s_cgroup_mem_info_bytes["cache"];
 }
 } else {
 if (config::enable_use_cgroup_memory_info) {


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) 07/21: [chore](profile) rm useless code of profile (#36915)

2024-06-29 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 13e2a19bbb59eb4532320ee6085b0f70e844c9cd
Author: zhiqiang 
AuthorDate: Fri Jun 28 18:44:06 2024 +0800

[chore](profile) rm useless code of profile (#36915)
---
 .../doris/common/profile/ExecutionProfile.java | 23 --
 1 file changed, 23 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/common/profile/ExecutionProfile.java
 
b/fe/fe-core/src/main/java/org/apache/doris/common/profile/ExecutionProfile.java
index 3c683fced9d..ebe41c1146f 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/common/profile/ExecutionProfile.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/common/profile/ExecutionProfile.java
@@ -76,8 +76,6 @@ public class ExecutionProfile {
 private Map fragmentProfiles;
 // Profile for load channels. Only for load job.
 private RuntimeProfile loadChannelProfile;
-// FragmentId -> InstanceId -> RuntimeProfile
-private Map> 
fragmentInstancesProfiles;
 
 // use to merge profile from multi be
 private Map>> 
multiBeProfile = null;
@@ -85,8 +83,6 @@ public class ExecutionProfile {
 // Not serialize this property, it is only used to get profile id.
 private SummaryProfile summaryProfile;
 
-// BE only has instance id, does not have fragmentid, so should use this 
map to find fragmentid.
-private Map instanceIdToFragmentId;
 private Map fragmentIdBeNum;
 private Map seqNoToFragmentId;
 
@@ -112,8 +108,6 @@ public class ExecutionProfile {
 }
 loadChannelProfile = new RuntimeProfile("LoadChannels");
 root.addChild(loadChannelProfile);
-fragmentInstancesProfiles = Maps.newHashMap();
-instanceIdToFragmentId = Maps.newHashMap();
 }
 
 private List> getMultiBeProfile(int fragmentId) {
@@ -299,23 +293,6 @@ public class ExecutionProfile {
 
multiBeProfile.get(params.fragment_id).put(backend.getHeartbeatAddress(), 
taskProfile);
 }
 
-// MultiInstances may update the profile concurrently
-public synchronized void addInstanceProfile(PlanFragmentId fragmentId, 
TUniqueId instanceId,
-RuntimeProfile instanceProfile) {
-Map instanceProfiles = 
fragmentInstancesProfiles.get(fragmentId);
-if (instanceProfiles == null) {
-instanceProfiles = Maps.newHashMap();
-fragmentInstancesProfiles.put(fragmentId, instanceProfiles);
-}
-RuntimeProfile existingInstanceProfile = 
instanceProfiles.get(instanceId);
-if (existingInstanceProfile == null) {
-instanceProfiles.put(instanceId, instanceProfile);
-instanceIdToFragmentId.put(instanceId, fragmentId);
-fragmentProfiles.get(fragmentId.asInt()).addChild(instanceProfile);
-return;
-}
-}
-
 public synchronized void addFragmentBackend(PlanFragmentId fragmentId, 
Long backendId) {
 fragmentIdBeNum.put(fragmentId.asInt(), 
fragmentIdBeNum.get(fragmentId.asInt()) + 1);
 }


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) branch master updated: [Fix](autoinc) Hanlde the processing of auto_increment column on exchange node rather than on TabletWriter when using `TABLET_SINK_SHUFFLE_PARTITIONED` (#36836)

2024-06-29 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
 new c73391b75d0 [Fix](autoinc) Hanlde the processing of auto_increment 
column on exchange node rather than on TabletWriter when using 
`TABLET_SINK_SHUFFLE_PARTITIONED` (#36836)
c73391b75d0 is described below

commit c73391b75d03aca3646f7cf6dcdd091668f89c13
Author: bobhan1 
AuthorDate: Sun Jun 30 13:07:27 2024 +0800

[Fix](autoinc) Hanlde the processing of auto_increment column on exchange 
node rather than on TabletWriter when using `TABLET_SINK_SHUFFLE_PARTITIONED` 
(#36836)

## Proposed changes

Issue Number: close #36638

https://github.com/apache/doris/pull/30914 add partition tablet sink
shuffle and the processing of auto_increment column should be handled on
exchange node raher than TabletWriter when using partition tablet sink
shuffle.

branch-2.1-pick: https://github.com/apache/doris/pull/37029
---
 be/src/pipeline/exec/exchange_sink_operator.cpp|  5 +-
 be/src/vec/sink/vtablet_block_convertor.cpp|  5 +-
 be/src/vec/sink/writer/vtablet_writer.cpp  |  2 +
 be/src/vec/sink/writer/vtablet_writer_v2.cpp   |  2 +
 .../unique/test_unique_table_auto_inc.out  | 62 ++
 .../unique/test_unique_table_auto_inc.groovy   | 38 +
 6 files changed, 110 insertions(+), 4 deletions(-)

diff --git a/be/src/pipeline/exec/exchange_sink_operator.cpp 
b/be/src/pipeline/exec/exchange_sink_operator.cpp
index 0ccded0b825..198bc555024 100644
--- a/be/src/pipeline/exec/exchange_sink_operator.cpp
+++ b/be/src/pipeline/exec/exchange_sink_operator.cpp
@@ -193,9 +193,12 @@ Status ExchangeSinkLocalState::open(RuntimeState* state) {
 
std::make_unique(_vpartition.get(), 
find_tablet_mode);
 _tablet_sink_tuple_desc = 
_state->desc_tbl().get_tuple_descriptor(p._tablet_sink_tuple_id);
 _tablet_sink_row_desc = p._pool->add(new 
RowDescriptor(_tablet_sink_tuple_desc, false));
-//_block_convertor no need init_autoinc_info here
+// if _part_type == TPartitionType::TABLET_SINK_SHUFFLE_PARTITIONED, 
we handle the processing of auto_increment column
+// on exchange node rather than on TabletWriter
 _block_convertor =
 
std::make_unique(_tablet_sink_tuple_desc);
+_block_convertor->init_autoinc_info(_schema->db_id(), 
_schema->table_id(),
+_state->batch_size());
 _location = p._pool->add(new 
OlapTableLocationParam(p._tablet_sink_location));
 _row_distribution.init(
 {.state = _state,
diff --git a/be/src/vec/sink/vtablet_block_convertor.cpp 
b/be/src/vec/sink/vtablet_block_convertor.cpp
index 7f7f4c76008..feb6633511e 100644
--- a/be/src/vec/sink/vtablet_block_convertor.cpp
+++ b/be/src/vec/sink/vtablet_block_convertor.cpp
@@ -505,8 +505,7 @@ Status 
OlapTableBlockConvertor::_fill_auto_inc_cols(vectorized::Block* block, si
 vectorized::ColumnInt64::Container& dst_values = dst_column->get_data();
 
 vectorized::ColumnPtr src_column_ptr = block->get_by_position(idx).column;
-if (const vectorized::ColumnConst* const_column =
-check_and_get_column(src_column_ptr)) 
{
+if (const auto* const_column = 
check_and_get_column(src_column_ptr)) {
 // for insert stmt like "insert into tbl1 select null,col1,col2,... 
from tbl2" or
 // "insert into tbl1 select 1,col1,col2,... from tbl2", the type of 
literal's column
 // will be `ColumnConst`
@@ -529,7 +528,7 @@ Status 
OlapTableBlockConvertor::_fill_auto_inc_cols(vectorized::Block* block, si
 int64_t value = const_column->get_int(0);
 dst_values.resize_fill(rows, value);
 }
-} else if (const vectorized::ColumnNullable* src_nullable_column =
+} else if (const auto* src_nullable_column =

check_and_get_column(src_column_ptr)) {
 auto src_nested_column_ptr = 
src_nullable_column->get_nested_column_ptr();
 const auto& null_map_data = src_nullable_column->get_null_map_data();
diff --git a/be/src/vec/sink/writer/vtablet_writer.cpp 
b/be/src/vec/sink/writer/vtablet_writer.cpp
index 5d36ca37805..6d388a7d958 100644
--- a/be/src/vec/sink/writer/vtablet_writer.cpp
+++ b/be/src/vec/sink/writer/vtablet_writer.cpp
@@ -1197,6 +1197,8 @@ Status VTabletWriter::_init(RuntimeState* state, 
RuntimeProfile* profile) {
 }
 
 _block_convertor = 
std::make_unique(_output_tuple_desc);
+// if partition_type is TABLET_SINK_SHUFFLE_PARTITIONED, we handle the 
processing of auto_increment column
+// on exchange node rather than on TabletWriter
 _block_convertor->init_autoinc_info(
 _schema->db_id(), _schema->table_id(), _state->batch_size(),
  

(doris) branch branch-2.1 updated: [branch-2.1] Pick "[Fix](autoinc) try fix concurrent load problem with auto inc column #36421" (#37027)

2024-06-29 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
 new 12dddfc26c9 [branch-2.1] Pick "[Fix](autoinc) try fix concurrent load 
problem with auto inc column #36421" (#37027)
12dddfc26c9 is described below

commit 12dddfc26c9f36c1ad01051947f7e5830533a5cb
Author: bobhan1 
AuthorDate: Sun Jun 30 13:10:03 2024 +0800

[branch-2.1] Pick "[Fix](autoinc) try fix concurrent load problem with auto 
inc column #36421" (#37027)

## Proposed changes

pick https://github.com/apache/doris/pull/36421
---
 be/src/vec/sink/autoinc_buffer.cpp | 150 +
 be/src/vec/sink/autoinc_buffer.h   |  35 +++--
 .../doris/catalog/AutoIncrementGenerator.java  |   5 +-
 .../apache/doris/service/FrontendServiceImpl.java  |  10 ++
 gensrc/thrift/FrontendService.thrift   |   1 +
 .../unique/test_unique_auto_inc_concurrent.out |  10 ++
 .../unique/test_unique_auto_inc_concurrent.groovy  |  59 
 7 files changed, 203 insertions(+), 67 deletions(-)

diff --git a/be/src/vec/sink/autoinc_buffer.cpp 
b/be/src/vec/sink/autoinc_buffer.cpp
index c7c096ec6e8..f83dbcb55b8 100644
--- a/be/src/vec/sink/autoinc_buffer.cpp
+++ b/be/src/vec/sink/autoinc_buffer.cpp
@@ -19,14 +19,15 @@
 
 #include 
 
-#include 
+#include 
+#include 
 
+#include "common/logging.h"
 #include "common/status.h"
 #include "runtime/client_cache.h"
 #include "runtime/exec_env.h"
 #include "util/runtime_profile.h"
 #include "util/thrift_rpc_helper.h"
-#include "vec/sink/vtablet_block_convertor.h"
 
 namespace doris::vectorized {
 
@@ -42,54 +43,11 @@ void AutoIncIDBuffer::set_batch_size_at_least(size_t 
batch_size) {
 }
 }
 
-void AutoIncIDBuffer::_wait_for_prefetching() {
-if (_is_fetching) {
-_rpc_token->wait();
-}
-}
-
-Status AutoIncIDBuffer::sync_request_ids(size_t length,
- std::vector>* result) {
-std::unique_lock lock(_mutex);
-RETURN_IF_ERROR(_prefetch_ids(_prefetch_size()));
-if (_front_buffer.second > 0) {
-auto min_length = std::min(_front_buffer.second, length);
-length -= min_length;
-result->emplace_back(_front_buffer.first, min_length);
-_front_buffer.first += min_length;
-_front_buffer.second -= min_length;
-}
-if (length > 0) {
-_wait_for_prefetching();
-if (!_rpc_status.ok()) {
-return _rpc_status;
-}
-
-{
-std::lock_guard lock(_backend_buffer_latch);
-std::swap(_front_buffer, _backend_buffer);
-}
-
-DCHECK_LE(length, _front_buffer.second);
-if (length > _front_buffer.second) {
-return Status::RpcError("auto inc sync result length > front 
buffer. " +
-std::to_string(length) + " vs " +
-std::to_string(_front_buffer.second));
-}
-result->emplace_back(_front_buffer.first, length);
-_front_buffer.first += length;
-_front_buffer.second -= length;
-}
-return Status::OK();
-}
-
-Status AutoIncIDBuffer::_prefetch_ids(size_t length) {
-if (_front_buffer.second > _low_water_level_mark() || _is_fetching) {
-return Status::OK();
-}
+Result AutoIncIDBuffer::_fetch_ids_from_fe(size_t length) {
+constexpr uint32_t FETCH_AUTOINC_MAX_RETRY_TIMES = 3;
+_rpc_status = Status::OK();
 TNetworkAddress master_addr = 
ExecEnv::GetInstance()->master_info()->network_address;
-_is_fetching = true;
-RETURN_IF_ERROR(_rpc_token->submit_func([=, this]() {
+for (uint32_t retry_times = 0; retry_times < 
FETCH_AUTOINC_MAX_RETRY_TIMES; retry_times++) {
 TAutoIncrementRangeRequest request;
 TAutoIncrementRangeResult result;
 request.__set_db_id(_db_id);
@@ -97,7 +55,7 @@ Status AutoIncIDBuffer::_prefetch_ids(size_t length) {
 request.__set_column_id(_column_id);
 request.__set_length(length);
 
-int64_t get_auto_inc_range_rpc_ns;
+int64_t get_auto_inc_range_rpc_ns = 0;
 {
 SCOPED_RAW_TIMER(&get_auto_inc_range_rpc_ns);
 _rpc_status = ThriftRpcHelper::rpc(
@@ -109,15 +67,95 @@ Status AutoIncIDBuffer::_prefetch_ids(size_t length) {
 LOG(INFO) << "[auto-inc-range][start=" << result.start << ",length=" 
<< result.length
   << "][elapsed=" << get_auto_inc_range_rpc_ns / 100 << " 
ms]";
 
-if (!_rpc_status.ok() || result.length <= 0) {
-LOG(WARNING) << "Failed to fetch auto-incremnt range, encounter 
rpc failure."
- << "errmsg=" << _rpc_status.to_string();
-return;
+if (_rpc_status.is()) {
+LOG_WARNING(
+"Failed to fetch auto-incremn