(doris) branch master updated: [bugfix](hive)Prevent multiple `fs` from being generated (#36954)

2024-07-02 Thread morningman
This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
 new d33e1a0df63 [bugfix](hive)Prevent multiple `fs` from being generated 
(#36954)
d33e1a0df63 is described below

commit d33e1a0df6375a271603cdc347f652dc2ec2752f
Author: wuwenchi 
AuthorDate: Tue Jul 2 14:59:57 2024 +0800

[bugfix](hive)Prevent multiple `fs` from being generated (#36954)

If the user has already registered, there is no need to register again,
otherwise `fs` will generate multiple instances.
---
 .../apache/doris/common/security/authentication/HadoopUGI.java | 10 ++
 1 file changed, 10 insertions(+)

diff --git 
a/fe/fe-common/src/main/java/org/apache/doris/common/security/authentication/HadoopUGI.java
 
b/fe/fe-common/src/main/java/org/apache/doris/common/security/authentication/HadoopUGI.java
index 5e61664c8fa..1a86b9e327a 100644
--- 
a/fe/fe-common/src/main/java/org/apache/doris/common/security/authentication/HadoopUGI.java
+++ 
b/fe/fe-common/src/main/java/org/apache/doris/common/security/authentication/HadoopUGI.java
@@ -76,6 +76,16 @@ public class HadoopUGI {
 ((SimpleAuthenticationConfig) 
config).setUsername(hadoopUserName);
 LOG.debug(AuthenticationConfig.HADOOP_USER_NAME + " is unset, 
use default user: hadoop");
 }
+
+try {
+ugi = UserGroupInformation.getLoginUser();
+if (ugi.getUserName().equals(hadoopUserName)) {
+return ugi;
+}
+} catch (IOException e) {
+LOG.warn("A SecurityException occurs with simple, do login 
immediately.", e);
+}
+
 ugi = UserGroupInformation.createRemoteUser(hadoopUserName);
 UserGroupInformation.setLoginUser(ugi);
 LOG.debug("Login by proxy user, hadoop.username: {}", 
hadoopUserName);


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) branch master updated: [Improvement](inverted index) Remove the check for inverted index file exists (#36945)

2024-07-02 Thread kxiao
This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
 new a25ac144323 [Improvement](inverted index) Remove the check for 
inverted index file exists (#36945)
a25ac144323 is described below

commit a25ac144323ddc77f32766dee9a76c6bd070898c
Author: Sun Chenyang 
AuthorDate: Tue Jul 2 15:10:08 2024 +0800

[Improvement](inverted index) Remove the check for inverted index file 
exists (#36945)

Remove the check for inverted index file exists to avoid latency of remote 
fs eg. s3
---
 be/src/clucene |  2 +-
 .../segment_v2/inverted_index_file_reader.cpp  | 20 +---
 .../segment_v2/inverted_index_fs_directory.cpp | 49 --
 .../rowset/segment_v2/inverted_index_reader.cpp| 15 +-
 .../olap/rowset/segment_v2/inverted_index_reader.h |  2 -
 .../test_index_not_found_fault_injection.out   | 13 +
 .../test_index_not_found_fault_injection.groovy| 59 ++
 7 files changed, 108 insertions(+), 52 deletions(-)

diff --git a/be/src/clucene b/be/src/clucene
index dd200e10e72..5db9db68e44 16
--- a/be/src/clucene
+++ b/be/src/clucene
@@ -1 +1 @@
-Subproject commit dd200e10e72120445bd897f3dcc515702f4dc80b
+Subproject commit 5db9db68e448b8ccfd360d02666bbac44e6f8d1a
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_file_reader.cpp 
b/be/src/olap/rowset/segment_v2/inverted_index_file_reader.cpp
index 7a744ea939e..dbd86bb93a5 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_file_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_file_reader.cpp
@@ -23,6 +23,7 @@
 #include "olap/rowset/segment_v2/inverted_index_compound_reader.h"
 #include "olap/rowset/segment_v2/inverted_index_fs_directory.h"
 #include "olap/tablet_schema.h"
+#include "util/debug_points.h"
 
 namespace doris::segment_v2 {
 
@@ -41,14 +42,17 @@ Status InvertedIndexFileReader::_init_from_v2(int32_t 
read_buffer_size) {
 
 std::unique_lock lock(_mutex); // Lock for writing
 try {
-bool exists = false;
-RETURN_IF_ERROR(_fs->exists(index_file_full_path, &exists));
-if (!exists) {
+int64_t file_size = 0;
+Status st = _fs->file_size(index_file_full_path, &file_size);
+DBUG_EXECUTE_IF("inverted file read error: index file not found", {
+st = Status::Error("index file not 
found");
+})
+if (st.code() == ErrorCode::NOT_FOUND) {
 return Status::Error(
 "inverted index file {} is not found", 
index_file_full_path);
+} else if (!st.ok()) {
+return st;
 }
-int64_t file_size = 0;
-RETURN_IF_ERROR(_fs->file_size(index_file_full_path, &file_size));
 if (file_size == 0) {
 LOG(WARNING) << "inverted index file " << index_file_full_path << 
" is empty.";
 return Status::Error(
@@ -157,6 +161,10 @@ Result> 
InvertedIndexFileReader::_open(
 dir->close();
 _CLDELETE(dir)
 }
+if (err.number() == CL_ERR_FileNotFound) {
+return 
ResultError(Status::Error(
+"inverted index path: {} not exist.", 
index_file_path));
+}
 return 
ResultError(Status::Error(
 "CLuceneError occur when open idx file {}, error msg: {}", 
index_file_path,
 err.what()));
@@ -174,7 +182,7 @@ Result> 
InvertedIndexFileReader::_open(
 if (index_it == _indices_entries.end()) {
 std::ostringstream errMsg;
 errMsg << "No index with id " << index_id << " found";
-return 
ResultError(Status::Error(
+return 
ResultError(Status::Error(
 "CLuceneError occur when open idx file {}, error msg: {}",
 
InvertedIndexDescriptor::get_index_file_path_v2(_index_path_prefix),
 errMsg.str()));
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp 
b/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp
index 499a3e41d9a..54d484d1199 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp
@@ -112,30 +112,28 @@ bool DorisFSDirectory::FSIndexInput::open(const 
io::FileSystemSPtr& fs, const ch
 reader_options.cache_type = config::enable_file_cache ? 
io::FileCachePolicy::FILE_BLOCK_CACHE
   : 
io::FileCachePolicy::NO_CACHE;
 reader_options.is_doris_table = true;
-if (!fs->open_file(path, &h->_reader, &reader_options).ok()) {
-error.set(CL_ERR_IO, "open file error");
+Status st = fs->open_file(path, &h->_reader, &reader_options);
+DBUG_EXECUTE_IF("inverted file read 

(doris) branch master updated: [update](session-var) set ENABLE_COMMON_EXPR_PUSHDOWN_FOR_INVERTED_INDEX default to true (#35387)

2024-07-02 Thread kxiao
This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
 new 74d88d5f76e [update](session-var) set 
ENABLE_COMMON_EXPR_PUSHDOWN_FOR_INVERTED_INDEX default to true (#35387)
74d88d5f76e is described below

commit 74d88d5f76eb8c04f3425788992ba1ff79a1211c
Author: amory 
AuthorDate: Tue Jul 2 15:20:05 2024 +0800

[update](session-var) set ENABLE_COMMON_EXPR_PUSHDOWN_FOR_INVERTED_INDEX 
default to true (#35387)

set session variable ENABLE_COMMON_EXPR_PUSHDOWN_FOR_INVERTED_INDEX default 
value to true and limit it to array_contains function.
---
 be/src/olap/rowset/segment_v2/segment_iterator.cpp| 7 +++
 fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java | 7 ---
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp 
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index a8a9758afcd..82a3c8ba509 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -90,6 +90,7 @@
 #include "vec/exprs/vexpr_context.h"
 #include "vec/exprs/vliteral.h"
 #include "vec/exprs/vslot_ref.h"
+#include "vec/functions/array/function_array_index.h"
 #include "vec/json/path_in_data.h"
 
 namespace doris {
@@ -1342,6 +1343,12 @@ Status SegmentIterator::_apply_inverted_index() {
 // _inverted_index_iterators has all column ids which has inverted 
index
 // _common_expr_columns has all column ids from 
_common_expr_ctxs_push_down
 // if current bitmap is already empty just return
+if (!(expr_ctx->root()->node_type() == 
TExprNodeType::FUNCTION_CALL &&
+  expr_ctx->root()->fn().name.function_name ==
+  vectorized::ArrayContainsAction::name)) {
+// now we only support ArrayContains function to evaluate 
inverted index
+continue;
+}
 if (_row_bitmap.isEmpty()) {
 break;
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java 
b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index 7aa78c2bbb2..59b5d89ac2b 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -1471,9 +1471,9 @@ public class SessionVariable implements Serializable, 
Writable {
 public boolean enableInvertedIndexQuery = true;
 
 // Whether enable query expr with inverted index.
-@VariableMgr.VarAttr(name = 
ENABLE_COMMON_EXPR_PUSHDOWN_FOR_INVERTED_INDEX, needForward = true, description 
= {
-"是否启用表达式上使用 inverted index。", "Set whether to use inverted index 
query for expr."})
-public boolean enableCommonExpPushDownForInvertedIndex = false;
+@VariableMgr.VarAttr(name = 
ENABLE_COMMON_EXPR_PUSHDOWN_FOR_INVERTED_INDEX, fuzzy = true, needForward = 
true,
+description = {"是否启用表达式上使用 inverted index。", "Set whether to use 
inverted index query for expr."})
+public boolean enableCommonExpPushDownForInvertedIndex = true;
 
 // Whether enable pushdown count agg to scan node when using inverted 
index match.
 @VariableMgr.VarAttr(name = ENABLE_PUSHDOWN_COUNT_ON_INDEX, needForward = 
true, description = {
@@ -2008,6 +2008,7 @@ public class SessionVariable implements Serializable, 
Writable {
 this.enableLocalExchange = random.nextBoolean();
 // This will cause be dead loop, disable it first
 // this.disableJoinReorder = random.nextBoolean();
+this.enableCommonExpPushDownForInvertedIndex = random.nextBoolean();
 this.disableStreamPreaggregations = random.nextBoolean();
 this.partitionedHashJoinRowsThreshold = random.nextBoolean() ? 8 : 
1048576;
 this.partitionedHashAggRowsThreshold = random.nextBoolean() ? 8 : 
1048576;


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) branch branch-2.1 updated (859a7c80b8b -> 7443e8fcf2a)

2024-07-02 Thread kxiao
This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a change to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


from 859a7c80b8b [fix](parquet) prevent parquet page reader print much 
warning logs (#37012)
 add 7443e8fcf2a [cherry-pick](branch-2.1) fix single compaction test p2 
#34568 #36881 (#37075)

No new revisions were added by this update.

Summary of changes:
 be/src/olap/olap_server.cpp|   1 +
 be/src/olap/single_replica_compaction.cpp  |   9 +
 be/src/olap/tablet.cpp |   2 +
 be/src/util/doris_metrics.cpp  |   6 +
 be/src/util/doris_metrics.h|   3 +
 .../apache/doris/service/FrontendServiceImpl.java  |   5 +
 ... => test_single_compaction_fault_injection.out} |   0
 .../test_single_compaction_fault_injection.groovy  | 375 +
 .../test_single_replica_compaction.groovy  |  72 ++--
 9 files changed, 442 insertions(+), 31 deletions(-)
 copy regression-test/data/compaction/{test_single_replica_compaction.out => 
test_single_compaction_fault_injection.out} (100%)
 create mode 100644 
regression-test/suites/compaction/test_single_compaction_fault_injection.groovy


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) branch branch-2.1 updated: [fix](multi-catalog) Revert #36575 and check nullptr of data column (#37086)

2024-07-02 Thread morningman
This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
 new d0eea3886d3 [fix](multi-catalog) Revert #36575 and check nullptr of 
data column (#37086)
d0eea3886d3 is described below

commit d0eea3886d3d1952ce98d8a5aa72799de7be3742
Author: Ashin Gau 
AuthorDate: Tue Jul 2 15:32:52 2024 +0800

[fix](multi-catalog) Revert #36575 and check nullptr of data column (#37086)

Revert #36575, because `VScanner::get_block` will check
`DCHECK(block->rows() == 0)`, so block should be cleared when `eof =
true`.
---
 be/src/vec/core/block.cpp  | 6 --
 be/src/vec/exec/format/orc/vorc_reader.cpp | 2 ++
 be/src/vec/exec/format/parquet/vparquet_reader.cpp | 1 +
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/be/src/vec/core/block.cpp b/be/src/vec/core/block.cpp
index 39f952c837a..dd1a659ae15 100644
--- a/be/src/vec/core/block.cpp
+++ b/be/src/vec/core/block.cpp
@@ -704,8 +704,10 @@ void Block::clear_column_data(int column_size) noexcept {
 }
 }
 for (auto& d : data) {
-DCHECK_EQ(d.column->use_count(), 1) << " " << print_use_count();
-(*std::move(d.column)).assume_mutable()->clear();
+if (d.column) {
+DCHECK_EQ(d.column->use_count(), 1) << " " << print_use_count();
+(*std::move(d.column)).assume_mutable()->clear();
+}
 }
 row_same_bit.clear();
 }
diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp 
b/be/src/vec/exec/format/orc/vorc_reader.cpp
index 6e4f5674e29..16909f0023a 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.cpp
+++ b/be/src/vec/exec/format/orc/vorc_reader.cpp
@@ -1594,6 +1594,7 @@ Status OrcReader::get_next_block_impl(Block* block, 
size_t* read_rows, bool* eof
 } catch (std::exception& e) {
 std::string _err_msg = e.what();
 if (_io_ctx && _io_ctx->should_stop && _err_msg == "stop") {
+block->clear_column_data();
 *eof = true;
 *read_rows = 0;
 return Status::OK();
@@ -1663,6 +1664,7 @@ Status OrcReader::get_next_block_impl(Block* block, 
size_t* read_rows, bool* eof
 } catch (std::exception& e) {
 std::string _err_msg = e.what();
 if (_io_ctx && _io_ctx->should_stop && _err_msg == "stop") {
+block->clear_column_data();
 *eof = true;
 *read_rows = 0;
 return Status::OK();
diff --git a/be/src/vec/exec/format/parquet/vparquet_reader.cpp 
b/be/src/vec/exec/format/parquet/vparquet_reader.cpp
index db3399eb0dd..f99786dc6e2 100644
--- a/be/src/vec/exec/format/parquet/vparquet_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_reader.cpp
@@ -525,6 +525,7 @@ Status ParquetReader::get_next_block(Block* block, size_t* 
read_rows, bool* eof)
 Status batch_st =
 _current_group_reader->next_batch(block, _batch_size, read_rows, 
&_row_group_eof);
 if (batch_st.is()) {
+block->clear_column_data();
 _current_group_reader.reset(nullptr);
 *read_rows = 0;
 *eof = true;


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) branch branch-2.0 updated: [fix](array) fix array_except/union for left const return only one row result #36776 (#36934)

2024-07-02 Thread kxiao
This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
 new 0c9d20c17cf [fix](array) fix array_except/union for left const return 
only one row result #36776 (#36934)
0c9d20c17cf is described below

commit 0c9d20c17cf79a5683fde41e636d9bac50a7fe37
Author: amory 
AuthorDate: Tue Jul 2 15:36:25 2024 +0800

[fix](array) fix array_except/union for left const return only one row 
result #36776 (#36934)
---
 be/src/vec/functions/array/function_array_set.h  |  7 ++-
 .../sql_functions/array_functions/test_array_functions.out   | 12 
 .../array_functions/test_array_functions.groovy  |  3 +++
 3 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/be/src/vec/functions/array/function_array_set.h 
b/be/src/vec/functions/array/function_array_set.h
index 41bf53f921f..e28835d38ed 100644
--- a/be/src/vec/functions/array/function_array_set.h
+++ b/be/src/vec/functions/array/function_array_set.h
@@ -181,7 +181,12 @@ private:
 constexpr auto execute_left_column_first = 
Impl::Action::execute_left_column_first;
 size_t current = 0;
 Impl impl;
-for (size_t row = 0; row < left_data.offsets_ptr->size(); ++row) {
+size_t row_size = left_data.offsets_ptr->size();
+if constexpr (LCONST) {
+// if left param is const column, we should use right data offset 
to get truely row_size
+row_size = right_data.offsets_ptr->size();
+}
+for (size_t row = 0; row < row_size; ++row) {
 size_t count = 0;
 size_t left_off = (*left_data.offsets_ptr)[index_check_const(row, 
LCONST) - 1];
 size_t left_len = (*left_data.offsets_ptr)[index_check_const(row, 
LCONST)] - left_off;
diff --git 
a/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions.out
 
b/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions.out
index 1b60f427058..664ea154982 100644
--- 
a/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions.out
+++ 
b/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions.out
@@ -1659,14 +1659,26 @@
 10005  [10005, null, null] [null, 3, 10005, 2, 1]
 10006  [60002, 60002, 60003, null, 60005]  [null, 3, 60002, 60005, 60003, 
2, 1]
 
+-- !select_union_left_const --
+10005  [10005, null, null] [null, 3, 10005, 2, 1]
+10006  [60002, 60002, 60003, null, 60005]  [null, 3, 60002, 60005, 60003, 
2, 1]
+
 -- !select_except --
 10005  [10005, null, null] [10005, null]
 10006  [60002, 60002, 60003, null, 60005]  [60002, 60003, null, 60005]
 
+-- !select_except_left_const --
+10005  [10005, null, null] [1, 2, 3]
+10006  [60002, 60002, 60003, null, 60005]  [1, 2, 3]
+
 -- !select_intersect --
 10005  [10005, null, null] [null]
 10006  [60002, 60002, 60003, null, 60005]  [null]
 
+-- !select_intersect_left_const --
+10005  [10005, null, null] [null]
+10006  [60002, 60002, 60003, null, 60005]  [null]
+
 -- !select_array_datetimev2_1 --
 1  ["2023-01-19 18:11:11.111", "2023-01-19 18:22:22.222", "2023-01-19 
18:33:33.333"]   ["2023-01-19 18:22:22.222", "2023-01-19 18:33:33.333", 
"2023-01-19 18:44:44.444"]   ["2023-01-19 18:11:11.11", "2023-01-19 
18:22:22.22", "2023-01-19 18:33:33.33"]
 
diff --git 
a/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions.groovy
 
b/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions.groovy
index c043ada07af..f278483e707 100644
--- 
a/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions.groovy
+++ 
b/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions.groovy
@@ -273,8 +273,11 @@ suite("test_array_functions") {
 sql """ insert into ${tableName3} values 
(10006,'b',[60002,60002,60003,null,60005]) """
 
 qt_select_union "select class_id, student_ids, 
array_union(student_ids,[1,2,3]) from ${tableName3} order by class_id;"
+qt_select_union_left_const "select class_id, student_ids, 
array_union([1,2,3], student_ids,[1,2,3]) from ${tableName3} order by class_id;"
 qt_select_except "select class_id, student_ids, 
array_except(student_ids,[1,2,3]) from ${tableName3} order by class_id;"
+qt_select_except_left_const "select class_id, student_ids, 
array_except([1,2,3], student_ids) from ${tableName3} order by class_id;"
 qt_select_intersect "select class_id, student_ids, 
array_intersect(student_ids,[1,2,3,null]) from ${tableName3} order by class_id;"
+qt_select_intersect_left_const "select class_id, student_ids, 
array_intersect([1,2,3,null], student_ids) from ${tableName3} order by 
class_id;"
 
 def tableName4 = "tbl_test_array_datetimev2_funct

(doris) branch master updated: Mv external p2 test case to p0. (#37070)

2024-07-02 Thread lijibing
This is an automated email from the ASF dual-hosted git repository.

lijibing pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
 new 3716fb536c8 Mv external p2 test case to p0. (#37070)
3716fb536c8 is described below

commit 3716fb536c8ca3f9181d8149ef22eaee39c86054
Author: Jibing-Li <64681310+jibing...@users.noreply.github.com>
AuthorDate: Tue Jul 2 15:47:17 2024 +0800

Mv external p2 test case to p0. (#37070)

Mv external p2 test case to p0.
---
 .../orc_partitioned_columns/create_table.hql   |  20 
 .../orc_partitioned_columns/data.tar.gz| Bin 0 -> 722 bytes
 .../multi_catalog/orc_partitioned_columns/run.sh   |  12 +++
 .../orc_partitioned_one_column/create_table.hql|  20 
 .../orc_partitioned_one_column/data.tar.gz | Bin 0 -> 1064 bytes
 .../orc_partitioned_one_column/run.sh  |  12 +++
 .../parquet_partitioned_columns/create_table.hql   |  20 
 .../parquet_partitioned_columns/data.tar.gz| Bin 0 -> 639 bytes
 .../parquet_partitioned_columns/run.sh |  12 +++
 .../create_table.hql   |  20 
 .../parquet_partitioned_one_column/data.tar.gz | Bin 0 -> 729 bytes
 .../parquet_partitioned_one_column/run.sh  |  12 +++
 .../test_mixed_par_locations_orc/create_table.hql  |  22 
 .../test_mixed_par_locations_orc/data.tar.gz   | Bin 0 -> 1325 bytes
 .../test_mixed_par_locations_orc/run.sh|  12 +++
 .../create_table.hql   |  22 
 .../test_mixed_par_locations_parquet/data.tar.gz   | Bin 0 -> 952 bytes
 .../test_mixed_par_locations_parquet/run.sh|  12 +++
 .../create_table.hql   |  18 
 .../data.tar.gz| Bin 0 -> 781 bytes
 .../run.sh |  12 +++
 .../create_table.hql   |  18 
 .../data.tar.gz| Bin 0 -> 611 bytes
 .../run.sh |  12 +++
 .../create_table.hql   |  18 
 .../data.tar.gz| Bin 0 -> 316 bytes
 .../run.sh |  12 +++
 .../text_partitioned_columns/create_table.hql  |  21 
 .../text_partitioned_columns/data.tar.gz   | Bin 0 -> 410 bytes
 .../multi_catalog/text_partitioned_columns/run.sh  |  12 +++
 .../text_partitioned_one_column/create_table.hql   |  21 
 .../text_partitioned_one_column/data.tar.gz| Bin 0 -> 321 bytes
 .../text_partitioned_one_column/run.sh |  12 +++
 .../hive/test_external_catalog_hive_partition.out  | 118 -
 .../hive/test_mixed_par_locations.out  |  36 +++
 .../hive/test_truncate_char_or_varchar_columns.out |  84 +++
 .../test_external_catalog_hive_partition.groovy|  20 ++--
 .../hive/test_hive_statistic_auto.groovy   |  16 +--
 .../hive/test_hive_statistic_clean.groovy  |  24 +++--
 .../hive/test_mixed_par_locations.groovy   |  57 ++
 .../test_truncate_char_or_varchar_columns.groovy   |  16 +--
 .../hive/test_mixed_par_locations.groovy   |  62 ---
 42 files changed, 646 insertions(+), 139 deletions(-)

diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/orc_partitioned_columns/create_table.hql
 
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/orc_partitioned_columns/create_table.hql
new file mode 100644
index 000..3cc9ce67032
--- /dev/null
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/orc_partitioned_columns/create_table.hql
@@ -0,0 +1,20 @@
+CREATE DATABASE IF NOT EXISTS multi_catalog;
+USE multi_catalog;
+
+CREATE TABLE `orc_partitioned_columns`(
+  `t_timestamp` timestamp)
+PARTITIONED BY (
+ `t_int` int,
+ `t_float` float,
+ `t_string` string)
+ROW FORMAT SERDE
+  'org.apache.hadoop.hive.ql.io.orc.OrcSerde'
+WITH SERDEPROPERTIES (
+  'serialization.format' = '1')
+STORED AS INPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'
+OUTPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
+LOCATION '/user/doris/suites/multi_catalog/orc_partitioned_columns';
+
+msck repair table orc_partitioned_columns;
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/orc_partitioned_columns/data.tar.gz
 
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/orc_partitioned_columns/data.tar.gz
new file mode 100644
index 000..ea87f4489b0
Binary files /dev/null and 
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/orc_partitioned_columns/data.tar.gz
 differ
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/orc_partitioned_columns/run.sh
 
b/docker/thirdpa

(doris) branch master updated (3716fb536c8 -> d60dbb507e6)

2024-07-02 Thread eldenmoon
This is an automated email from the ASF dual-hosted git repository.

eldenmoon pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


from 3716fb536c8 Mv external p2 test case to p0. (#37070)
 add d60dbb507e6 [feature](json)support explode_json_object func (#36887)

No new revisions were added by this update.

Summary of changes:
 .../table_function/table_function_factory.cpp  |   2 +
 .../{vexplode_map.cpp => vexplode_json_object.cpp} | 115 +++--
 .../{vexplode_map.h => vexplode_json_object.h} |  30 ++
 be/src/vec/functions/function_fake.cpp |  14 +++
 .../catalog/BuiltinTableGeneratingFunctions.java   |   4 +
 .../ExplodeJsonObject.java}|  28 ++---
 .../ExplodeJsonObjectOuter.java}   |  26 +++--
 .../visitor/TableGeneratingFunctionVisitor.java|  10 ++
 .../jsonb_p0/test_jsonb_load_and_function.out  |  76 ++
 .../jsonb_p0/test_jsonb_load_and_function.groovy   |   9 ++
 .../jsonb_p0/test_jsonb_load_and_function.groovy   |   5 +
 11 files changed, 214 insertions(+), 105 deletions(-)
 copy be/src/vec/exprs/table_function/{vexplode_map.cpp => 
vexplode_json_object.cpp} (51%)
 copy be/src/vec/exprs/table_function/{vexplode_map.h => 
vexplode_json_object.h} (71%)
 copy 
fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/{scalar/StPolyfromtext.java
 => generator/ExplodeJsonObject.java} (68%)
 copy 
fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/{scalar/StPolyfromtext.java
 => generator/ExplodeJsonObjectOuter.java} (68%)


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) branch master updated: [opt](ctas) add a variable to control varchar length in ctas (#37069)

2024-07-02 Thread morrysnow
This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
 new 2e63fefabd4 [opt](ctas) add a variable to control varchar length in 
ctas (#37069)
2e63fefabd4 is described below

commit 2e63fefabd4a4a732cfac10187c274d3d3a42b12
Author: morrySnow <101034200+morrys...@users.noreply.github.com>
AuthorDate: Tue Jul 2 16:20:01 2024 +0800

[opt](ctas) add a variable to control varchar length in ctas (#37069)

add a new session variable: use_max_length_of_varchar_in_ctas

In CTAS (Create Table As Select), if CHAR/VARCHAR columns do not
originate from the source table, whether to set the length of such a
column to MAX, which is 65533. The default is true.
---
 .../nereids/trees/plans/commands/CreateTableCommand.java | 10 ++
 .../src/main/java/org/apache/doris/qe/SessionVariable.java   |  9 +
 regression-test/data/nereids_p0/create_table/test_ctas.out   |  3 +++
 .../suites/nereids_p0/create_table/test_ctas.groovy  | 12 
 4 files changed, 26 insertions(+), 8 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/CreateTableCommand.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/CreateTableCommand.java
index 7df93d8b9a9..f2dd92fe328 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/CreateTableCommand.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/CreateTableCommand.java
@@ -143,10 +143,12 @@ public class CreateTableCommand extends Command 
implements ForwardWithSync {
 }
 }
 } else {
-dataType = TypeCoercionUtils.replaceSpecifiedType(dataType,
-VarcharType.class, VarcharType.MAX_VARCHAR_TYPE);
-dataType = TypeCoercionUtils.replaceSpecifiedType(dataType,
-CharType.class, VarcharType.MAX_VARCHAR_TYPE);
+if (ctx.getSessionVariable().useMaxLengthOfVarcharInCtas) {
+dataType = 
TypeCoercionUtils.replaceSpecifiedType(dataType,
+VarcharType.class, 
VarcharType.MAX_VARCHAR_TYPE);
+dataType = 
TypeCoercionUtils.replaceSpecifiedType(dataType,
+CharType.class, VarcharType.MAX_VARCHAR_TYPE);
+}
 }
 }
 // if the column is an expression, we set it to nullable, 
otherwise according to the nullable of the slot.
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java 
b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index 59b5d89ac2b..29d3fbf985b 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -603,6 +603,8 @@ public class SessionVariable implements Serializable, 
Writable {
 
 public static final String MAX_COLUMN_READER_NUM = "max_column_reader_num";
 
+public static final String USE_MAX_LENGTH_OF_VARCHAR_IN_CTAS = 
"use_max_length_of_varchar_in_ctas";
+
 public static final List DEBUG_VARIABLES = ImmutableList.of(
 SKIP_DELETE_PREDICATE,
 SKIP_DELETE_BITMAP,
@@ -1985,6 +1987,13 @@ public class SessionVariable implements Serializable, 
Writable {
 checker = "checkExternalAggPartitionBits", fuzzy = true)
 public int externalAggPartitionBits = 5; // means that the hash table will 
be partitioned into 32 blocks.
 
+@VariableMgr.VarAttr(name = USE_MAX_LENGTH_OF_VARCHAR_IN_CTAS, description 
= {
+"在CTAS中,如果 CHAR / VARCHAR 列不来自于源表,是否是将这一列的长度设置为 MAX,即65533。默认为 
true。",
+"In CTAS (Create Table As Select), if CHAR/VARCHAR columns do not 
originate from the source table,"
++ " whether to set the length of such a column to MAX, 
which is 65533. The default is true."
+})
+public boolean useMaxLengthOfVarcharInCtas = true;
+
 public boolean isEnableJoinSpill() {
 return enableJoinSpill;
 }
diff --git a/regression-test/data/nereids_p0/create_table/test_ctas.out 
b/regression-test/data/nereids_p0/create_table/test_ctas.out
index 447d405ad31..976a2ead90b 100644
--- a/regression-test/data/nereids_p0/create_table/test_ctas.out
+++ b/regression-test/data/nereids_p0/create_table/test_ctas.out
@@ -21,3 +21,6 @@ r2{"title":"Amount","value":2.1}
 2.12.2 2.3 2.402.502.60
 2.12.2 2.3 2.402.502.60
 
+-- !desc --
+__substring_0  VARCHAR(30) Yes true\N  
+
diff --git a/regression-test/suites/nereids_p0/create_table/test_ctas.groovy 
b/regression-test/suites

(doris-kafka-connector) branch master updated: [Fix]Fix the E2E test stream load data is not sink to doris (#37)

2024-07-02 Thread diwu
This is an automated email from the ASF dual-hosted git repository.

diwu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris-kafka-connector.git


The following commit(s) were added to refs/heads/master by this push:
 new ec4ce3d  [Fix]Fix the E2E test stream load data is not sink to doris 
(#37)
ec4ce3d is described below

commit ec4ce3d04ee2cf0aaf93f1ff9af1ef0aa659d0b2
Author: wudongliang <46414265+donglian...@users.noreply.github.com>
AuthorDate: Tue Jul 2 16:26:11 2024 +0800

[Fix]Fix the E2E test stream load data is not sink to doris (#37)
---
 .../e2e/kafka/KafkaContainerServiceImpl.java   |  2 +-
 .../e2e/sink/stringconverter/StringMsgE2ETest.java | 23 --
 .../e2e/string_converter/string_msg_connector.json |  2 +-
 3 files changed, 19 insertions(+), 8 deletions(-)

diff --git 
a/src/test/java/org/apache/doris/kafka/connector/e2e/kafka/KafkaContainerServiceImpl.java
 
b/src/test/java/org/apache/doris/kafka/connector/e2e/kafka/KafkaContainerServiceImpl.java
index 083cdb2..4e38ab3 100644
--- 
a/src/test/java/org/apache/doris/kafka/connector/e2e/kafka/KafkaContainerServiceImpl.java
+++ 
b/src/test/java/org/apache/doris/kafka/connector/e2e/kafka/KafkaContainerServiceImpl.java
@@ -214,7 +214,7 @@ public class KafkaContainerServiceImpl implements 
KafkaContainerService {
 
 // The current thread sleeps for 10 seconds so that connect can 
consume messages to doris in
 // time.
-Thread.sleep(1);
+Thread.sleep(6);
 }
 
 @Override
diff --git 
a/src/test/java/org/apache/doris/kafka/connector/e2e/sink/stringconverter/StringMsgE2ETest.java
 
b/src/test/java/org/apache/doris/kafka/connector/e2e/sink/stringconverter/StringMsgE2ETest.java
index 9ab8891..cd3f455 100644
--- 
a/src/test/java/org/apache/doris/kafka/connector/e2e/sink/stringconverter/StringMsgE2ETest.java
+++ 
b/src/test/java/org/apache/doris/kafka/connector/e2e/sink/stringconverter/StringMsgE2ETest.java
@@ -33,8 +33,11 @@ import org.junit.AfterClass;
 import org.junit.Assert;
 import org.junit.BeforeClass;
 import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 public class StringMsgE2ETest extends AbstractStringE2ESinkTest {
+private static final Logger LOG = 
LoggerFactory.getLogger(StringMsgE2ETest.class);
 private static String connectorName;
 private static String jsonMsgConnectorContent;
 private static DorisOptions dorisOptions;
@@ -80,12 +83,20 @@ public class StringMsgE2ETest extends 
AbstractStringE2ESinkTest {
 
 String table = dorisOptions.getTopicMapTable(topic);
 Statement statement = getJdbcConnection().createStatement();
-ResultSet resultSet = statement.executeQuery("select * from " + 
database + "." + table);
-if (resultSet.next()) {
-Assert.assertEquals(1, resultSet.getString("id"));
-Assert.assertEquals("zhangsan", resultSet.getString("name"));
-Assert.assertEquals(12, resultSet.getString("12"));
-}
+String querySql = "select * from " + database + "." + table;
+LOG.info("start to query result from doris. sql={}", querySql);
+ResultSet resultSet = statement.executeQuery(querySql);
+
+Assert.assertTrue(resultSet.next());
+
+int id = resultSet.getInt("id");
+String name = resultSet.getString("name");
+int age = resultSet.getInt("age");
+LOG.info("Query result is id={}, name={}, age={}", id, name, age);
+
+Assert.assertEquals(1, id);
+Assert.assertEquals("zhangsan", name);
+Assert.assertEquals(12, age);
 }
 
 @AfterClass
diff --git a/src/test/resources/e2e/string_converter/string_msg_connector.json 
b/src/test/resources/e2e/string_converter/string_msg_connector.json
index 77340ea..dd994cc 100644
--- a/src/test/resources/e2e/string_converter/string_msg_connector.json
+++ b/src/test/resources/e2e/string_converter/string_msg_connector.json
@@ -5,7 +5,7 @@
 "topics":"string_test",
 "tasks.max":"1",
 "doris.topic2table.map": "string_test:string_msg_tab",
-"buffer.count.records":"10",
+"buffer.count.records":"1",
 "buffer.flush.time":"120",
 "buffer.size.bytes":"1000",
 "doris.urls":"127.0.0.1",


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris-kafka-connector) branch master updated: [Improve]Improve the unit test case of DorisWriter (#34)

2024-07-02 Thread diwu
This is an automated email from the ASF dual-hosted git repository.

diwu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris-kafka-connector.git


The following commit(s) were added to refs/heads/master by this push:
 new 7bf7c17  [Improve]Improve the unit test case of DorisWriter (#34)
7bf7c17 is described below

commit 7bf7c171a5add364622f292e70e2119fb894d686
Author: wudongliang <46414265+donglian...@users.noreply.github.com>
AuthorDate: Tue Jul 2 16:26:38 2024 +0800

[Improve]Improve the unit test case of DorisWriter (#34)
---
 .../kafka/connector/writer/TestCopyIntoWriter.java | 22 +-
 .../connector/writer/TestStreamLoadWriter.java | 21 -
 2 files changed, 17 insertions(+), 26 deletions(-)

diff --git 
a/src/test/java/org/apache/doris/kafka/connector/writer/TestCopyIntoWriter.java 
b/src/test/java/org/apache/doris/kafka/connector/writer/TestCopyIntoWriter.java
index 302b9ea..60bcd81 100644
--- 
a/src/test/java/org/apache/doris/kafka/connector/writer/TestCopyIntoWriter.java
+++ 
b/src/test/java/org/apache/doris/kafka/connector/writer/TestCopyIntoWriter.java
@@ -32,7 +32,6 @@ import java.util.Properties;
 import org.apache.doris.kafka.connector.cfg.DorisOptions;
 import org.apache.doris.kafka.connector.cfg.DorisSinkConnectorConfig;
 import org.apache.doris.kafka.connector.connection.JdbcConnectionProvider;
-import org.apache.doris.kafka.connector.exception.CopyLoadException;
 import org.apache.doris.kafka.connector.metrics.DorisConnectMonitor;
 import org.apache.doris.kafka.connector.writer.load.CopyLoad;
 import org.apache.kafka.connect.sink.SinkRecord;
@@ -67,22 +66,14 @@ public class TestCopyIntoWriter {
 dorisOptions = new DorisOptions((Map) props);
 }
 
-@Test(expected = CopyLoadException.class)
+@Test
 public void fetchOffset() {
-DorisConnectMonitor dorisConnectMonitor = 
mock(DorisConnectMonitor.class);
-dorisWriter =
-new CopyIntoWriter(
-"test5",
-0,
-dorisOptions,
-new JdbcConnectionProvider(dorisOptions),
-dorisConnectMonitor);
+dorisWriter = mockCopyIntoWriter(new String[] {});
 dorisWriter.fetchOffset();
 Assert.assertEquals(-1l, 
dorisWriter.getOffsetPersistedInDoris().longValue());
 }
 
-@Test
-public void fetchOffsetTest() {
+private CopyIntoWriter mockCopyIntoWriter(String[] listLoadFiles) {
 DorisConnectMonitor dorisConnectMonitor = 
mock(DorisConnectMonitor.class);
 CopyIntoWriter copyIntoWriter =
 spy(
@@ -93,7 +84,12 @@ public class TestCopyIntoWriter {
 new JdbcConnectionProvider(dorisOptions),
 dorisConnectMonitor));
 
doReturn(Arrays.asList(listLoadFiles)).when(copyIntoWriter).listLoadFiles();
-dorisWriter = copyIntoWriter;
+return copyIntoWriter;
+}
+
+@Test
+public void fetchOffsetTest() {
+dorisWriter = mockCopyIntoWriter(listLoadFiles);
 dorisWriter.fetchOffset();
 
System.out.println(dorisWriter.getOffsetPersistedInDoris().longValue());
 Assert.assertEquals(168172036, 
dorisWriter.getOffsetPersistedInDoris().longValue());
diff --git 
a/src/test/java/org/apache/doris/kafka/connector/writer/TestStreamLoadWriter.java
 
b/src/test/java/org/apache/doris/kafka/connector/writer/TestStreamLoadWriter.java
index 7e44a2d..ea54211 100644
--- 
a/src/test/java/org/apache/doris/kafka/connector/writer/TestStreamLoadWriter.java
+++ 
b/src/test/java/org/apache/doris/kafka/connector/writer/TestStreamLoadWriter.java
@@ -36,7 +36,6 @@ import java.util.Properties;
 import org.apache.doris.kafka.connector.cfg.DorisOptions;
 import org.apache.doris.kafka.connector.cfg.DorisSinkConnectorConfig;
 import org.apache.doris.kafka.connector.connection.JdbcConnectionProvider;
-import org.apache.doris.kafka.connector.exception.StreamLoadException;
 import org.apache.doris.kafka.connector.metrics.DorisConnectMonitor;
 import org.apache.doris.kafka.connector.writer.commit.DorisCommittable;
 import org.apache.doris.kafka.connector.writer.load.DorisStreamLoad;
@@ -82,22 +81,14 @@ public class TestStreamLoadWriter {
 "VISIBLE");
 }
 
-@Test(expected = StreamLoadException.class)
+@Test
 public void fetchOffset() {
-DorisConnectMonitor dorisConnectMonitor = 
mock(DorisConnectMonitor.class);
-dorisWriter =
-new StreamLoadWriter(
-"avro-complex10",
-2,
-dorisOptions,
-new JdbcConnectionProvider(dorisOptions),
-dorisConnectMonitor);
+dorisWriter = mockStreamLoadWriter(new HashMap<>());
 dorisWriter.fetchOffset();
 Assert.assertEquals(-1l, 
dorisWriter.

Error while running notifications feature from refs/heads/master:.asf.yaml in doris-website!

2024-07-02 Thread Apache Infrastructure


An error occurred while running notifications feature in .asf.yaml!:
Invalid notification target 'comm...@foo.apache.org'. Must be a valid 
@doris.apache.org list!


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) branch branch-2.0 updated: [branch-2.0][improvement](mysql catalog) disable mysql AbandonedConnectionCleanup Thread (#36942)

2024-07-02 Thread zykkk
This is an automated email from the ASF dual-hosted git repository.

zykkk pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
 new aedde4777ff [branch-2.0][improvement](mysql catalog) disable mysql 
AbandonedConnectionCleanup Thread (#36942)
aedde4777ff is described below

commit aedde4777ffbbdcf16049697daef5995b1a2190e
Author: zy-kkk 
AuthorDate: Tue Jul 2 16:36:46 2024 +0800

[branch-2.0][improvement](mysql catalog) disable mysql 
AbandonedConnectionCleanup Thread (#36942)

pick (#36655)

When using mysql catalog, mysql jdbc driver will generate an
`AbandonedConnectionCleanupThread` for each database connection. This is
a virtual reference, which will accumulate over time as database
connections are constantly created, eventually causing OOM. Therefore,
in our usage scenario, we need to turn off this thread because our
database connection recycling depends on the connection pool. But please
note that this switch is only for MySQL JDBC Driver versions greater
than 8.0.22
---
 .../jdbc-scanner/src/main/java/org/apache/doris/jdbc/JdbcExecutor.java | 3 +++
 .../java/org/apache/doris/datasource/jdbc/client/JdbcMySQLClient.java  | 2 ++
 2 files changed, 5 insertions(+)

diff --git 
a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/JdbcExecutor.java
 
b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/JdbcExecutor.java
index 0517786f398..a2ef1936e8f 100644
--- 
a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/JdbcExecutor.java
+++ 
b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/JdbcExecutor.java
@@ -98,6 +98,9 @@ public class JdbcExecutor {
 throw new InternalException(e.getMessage());
 }
 tableType = request.table_type;
+if (tableType == TOdbcTableType.MYSQL) {
+
System.setProperty("com.mysql.cj.disableAbandonedConnectionCleanup", "true");
+}
 this.config = new JdbcDataSourceConfig()
 .setCatalogId(request.catalog_id)
 .setJdbcUser(request.jdbc_user)
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/client/JdbcMySQLClient.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/client/JdbcMySQLClient.java
index cd7f7aece3d..ed39c890f7a 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/client/JdbcMySQLClient.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/client/JdbcMySQLClient.java
@@ -44,6 +44,8 @@ public class JdbcMySQLClient extends JdbcClient {
 
 protected JdbcMySQLClient(JdbcClientConfig jdbcClientConfig) {
 super(jdbcClientConfig);
+// Disable abandoned connection cleanup
+System.setProperty("com.mysql.cj.disableAbandonedConnectionCleanup", 
"true");
 convertDateToNull = isConvertDatetimeToNull(jdbcClientConfig);
 Connection conn = null;
 Statement stmt = null;


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) 04/44: [Refactor](Recycler) Refactor azure obj client's batch delete function to check the delete response (#37037)

2024-07-02 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit ae38c5ed9951930c6b63e1572c045be837f65034
Author: AlexYue 
AuthorDate: Mon Jul 1 02:54:45 2024 +0800

[Refactor](Recycler) Refactor azure obj client's batch delete function to 
check the delete response (#37037)

This pr use #36590's new code to refactor batch delete for Azure obj client.
---
 cloud/src/recycler/azure_obj_client.cpp | 42 +++--
 cloud/test/mock_accessor.cpp|  2 --
 2 files changed, 35 insertions(+), 9 deletions(-)

diff --git a/cloud/src/recycler/azure_obj_client.cpp 
b/cloud/src/recycler/azure_obj_client.cpp
index 02f906f1cef..60cd79abb95 100644
--- a/cloud/src/recycler/azure_obj_client.cpp
+++ b/cloud/src/recycler/azure_obj_client.cpp
@@ -34,9 +34,12 @@
 #include "common/logging.h"
 #include "common/sync_point.h"
 
+using namespace Azure::Storage::Blobs;
+
 namespace doris::cloud {
 
 static constexpr size_t BlobBatchMaxOperations = 256;
+static constexpr char BlobNotFound[] = "BlobNotFound";
 
 template 
 ObjectStorageResponse do_azure_client_call(Func f, std::string_view url, 
std::string_view key) {
@@ -55,8 +58,7 @@ ObjectStorageResponse do_azure_client_call(Func f, 
std::string_view url, std::st
 
 class AzureListIterator final : public ObjectListIterator {
 public:
-
AzureListIterator(std::shared_ptr 
client,
-  std::string prefix)
+AzureListIterator(std::shared_ptr client, std::string 
prefix)
 : client_(std::move(client)), req_({.Prefix = std::move(prefix)}) {
 TEST_SYNC_POINT_CALLBACK("AzureListIterator", &req_);
 }
@@ -116,8 +118,8 @@ public:
 }
 
 private:
-std::shared_ptr client_;
-Azure::Storage::Blobs::ListBlobsOptions req_;
+std::shared_ptr client_;
+ListBlobsOptions req_;
 std::vector results_;
 bool is_valid_ {true};
 bool has_more_ {true};
@@ -181,14 +183,35 @@ ObjectStorageResponse 
AzureObjClient::delete_objects(const std::string& bucket,
 TEST_SYNC_POINT_CALLBACK("AzureObjClient::delete_objects", 
&batch_size);
 std::advance(chunk_end,
  std::min(batch_size, 
static_cast(std::distance(begin, end;
+
std::vector> 
deferred_resps;
+deferred_resps.reserve(std::distance(begin, chunk_end));
 for (auto it = begin; it != chunk_end; ++it) {
-batch.DeleteBlob(*it);
+deferred_resps.emplace_back(batch.DeleteBlob(*it));
 }
 auto resp = do_azure_client_call([&]() { client_->SubmitBatch(batch); 
}, client_->GetUrl(),
  *begin);
 if (resp.ret != 0) {
 return resp;
 }
+for (auto&& defer : deferred_resps) {
+try {
+auto r = defer.GetResponse();
+if (!r.Value.Deleted) {
+LOG_INFO("Azure batch delete failed, url {}", 
client_->GetUrl());
+return {-1};
+}
+} catch (Azure::Storage::StorageException& e) {
+if (Azure::Core::Http::HttpStatusCode::NotFound == 
e.StatusCode &&
+0 == strcmp(e.ErrorCode.c_str(), BlobNotFound)) {
+continue;
+}
+auto msg = fmt::format(
+"Azure request failed because {}, http code {}, 
request id {}, url {}",
+e.Message, static_cast(e.StatusCode), 
e.RequestId, client_->GetUrl());
+LOG_WARNING(msg);
+return {-1, std::move(msg)};
+}
+}
 
 begin = chunk_end;
 }
@@ -197,8 +220,13 @@ ObjectStorageResponse AzureObjClient::delete_objects(const 
std::string& bucket,
 }
 
 ObjectStorageResponse AzureObjClient::delete_object(ObjectStoragePathRef path) 
{
-return do_azure_client_call([&]() { client_->DeleteBlob(path.key); }, 
client_->GetUrl(),
-path.key);
+return do_azure_client_call(
+[&]() {
+if (auto r = client_->DeleteBlob(path.key); !r.Value.Deleted) {
+throw std::runtime_error("Delete azure blob failed");
+}
+},
+client_->GetUrl(), path.key);
 }
 
 ObjectStorageResponse 
AzureObjClient::delete_objects_recursively(ObjectStoragePathRef path,
diff --git a/cloud/test/mock_accessor.cpp b/cloud/test/mock_accessor.cpp
index 9746e64fd30..77a8afe24b1 100644
--- a/cloud/test/mock_accessor.cpp
+++ b/cloud/test/mock_accessor.cpp
@@ -18,8 +18,6 @@
 
 #include "mock_accessor.h"
 
-#include 
-#include 
 #include 
 
 #include 


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) 10/44: [fix](ES Catalog)Add array types support in esquery function (#36936)

2024-07-02 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit ec1422aa9a025f0b77a1d921d93023e2cd6f4bb2
Author: qiye 
AuthorDate: Mon Jul 1 10:46:43 2024 +0800

[fix](ES Catalog)Add array types support in esquery function (#36936)

Support array types in `esquery` function, and add some tests.
---
 .../expressions/functions/scalar/EsQuery.java  |  4 ++-
 gensrc/script/doris_builtins_functions.py  |  2 +-
 .../data/external_table_p0/es/test_es_query.out| 42 +++---
 .../external_table_p0/es/test_es_query.groovy  | 10 --
 4 files changed, 50 insertions(+), 8 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EsQuery.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EsQuery.java
index a5fbd339c9f..28a6988bca4 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EsQuery.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EsQuery.java
@@ -25,6 +25,7 @@ import 
org.apache.doris.nereids.trees.expressions.shape.BinaryExpression;
 import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
 import org.apache.doris.nereids.types.BooleanType;
 import org.apache.doris.nereids.types.VarcharType;
+import org.apache.doris.nereids.types.coercion.AnyDataType;
 
 import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableList;
@@ -38,7 +39,8 @@ public class EsQuery extends ScalarFunction
 implements BinaryExpression, ExplicitlyCastableSignature, 
PropagateNullable {
 
 public static final List SIGNATURES = ImmutableList.of(
-
FunctionSignature.ret(BooleanType.INSTANCE).args(VarcharType.SYSTEM_DEFAULT, 
VarcharType.SYSTEM_DEFAULT)
+
FunctionSignature.ret(BooleanType.INSTANCE).args(AnyDataType.INSTANCE_WITHOUT_INDEX,
+VarcharType.SYSTEM_DEFAULT)
 );
 
 /**
diff --git a/gensrc/script/doris_builtins_functions.py 
b/gensrc/script/doris_builtins_functions.py
index 1ce8127f17e..81c502d301d 100644
--- a/gensrc/script/doris_builtins_functions.py
+++ b/gensrc/script/doris_builtins_functions.py
@@ -1582,7 +1582,7 @@ visible_functions = {
 [['esquery'], 'BOOLEAN', ['DATEV2', 'VARCHAR'], ''],
 [['esquery'], 'BOOLEAN', ['DATETIMEV2', 'VARCHAR'], ''],
 [['esquery'], 'BOOLEAN', ['TIMEV2', 'VARCHAR'], ''],
-[['esquery'], 'BOOLEAN', ['ARRAY', 'VARCHAR'], ''],
+[['esquery'], 'BOOLEAN', ['ARRAY', 'VARCHAR'], '', ['T']],
 [['esquery'], 'BOOLEAN', ['MAP', 'VARCHAR'], ''],
 [['esquery'], 'BOOLEAN', ['STRING', 'VARCHAR'], ''],
 [['esquery'], 'BOOLEAN', ['VARIANT', 'VARCHAR'], ''],
diff --git a/regression-test/data/external_table_p0/es/test_es_query.out 
b/regression-test/data/external_table_p0/es/test_es_query.out
index 605e2f1aa93..d751719389f 100644
--- a/regression-test/data/external_table_p0/es/test_es_query.out
+++ b/regression-test/data/external_table_p0/es/test_es_query.out
@@ -1,9 +1,9 @@
 -- This file is automatically generated. You should know what you did if you 
want to edit this
 -- !sql01 --
-["2020-01-01", "2020-01-02"]   [-1, 0, 1, 2]   [0, 1, 2, 3]["d", "e", "f"] 
[128, 129, -129, -130]  ["192.168.0.1", "127.0.0.1"]string1 [1, 2, 3, 4]
2022-08-08  2022-08-08T12:10:10 text#1  ["2020-01-01", "2020-01-02"]
3.14[1, 2, 3, 4][1, 1.1, 1.2, 1.3]  [1, 2, 3, 4]["a", "b", "c"] 
["{"name":"Andy","age":18}", "{"name":"Tim","age":28}"] 2022-08-08T12:10:10 
2022-08-08T12:10:10 2022-08-08T20:10:10 [1, -2, -3, 4]  [1, 0, 1, 1]
[32768, 32769, -32769, -32770]  \N
+["2020-01-01 12:00:00", "2020-01-02 13:01:01"] [-1, 0, 1, 2]   [0, 1, 2, 3]
["d", "e", "f"] [128, 129, -129, -130]  ["192.168.0.1", "127.0.0.1"]string1 
[1, 2, 3, 4]2022-08-08  2022-08-08T12:10:10 text#1  ["2020-01-01", 
"2020-01-02"]3.14[1, 2, 3, 4][1, 1.1, 1.2, 1.3]  [1, 2, 3, 4]   
 ["a", "b", "c"] ["{"name":"Andy","age":18}", "{"name":"Tim","age":28}"] 
2022-08-08T12:10:10 2022-08-08T12:10:10 2022-08-08T20:10:10 [1, -2, 
-3, 4]  [1, 0, 1, 1][32768, 32769, -32769, -32770]  \N
 
 -- !sql02 --
-["2020-01-01", "2020-01-02"]   [-1, 0, 1, 2]   [0, 1, 2, 3]["d", "e", "f"] 
[128, 129, -129, -130]  ["192.168.0.1", "127.0.0.1"]string1 [1, 2, 3, 4]
2022-08-08  2022-08-08T12:10:10 text#1  ["2020-01-01", "2020-01-02"]
3.14[1, 2, 3, 4][1, 1.1, 1.2, 1.3]  [1, 2, 3, 4]["a", "b", "c"] 
["{"name":"Andy","age":18}", "{"name":"Tim","age":28}"] 2022-08-08T12:10:10 
2022-08-08T12:10:10 2022-08-08T20:10:10 [1, -2, -3, 4]  [1, 0, 1, 1]
[32768, 32769, -32769, -32770]  \N
+["2020-01-01 12:00:00", "2020-01-02 13:01:0

(doris) 19/44: [opt](arena) lazy memory allocation in arena (#36498)

2024-07-02 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 5c5261fc5315ff3af30b2eb81080471c023227bc
Author: zhiqiang 
AuthorDate: Mon Jul 1 14:57:38 2024 +0800

[opt](arena) lazy memory allocation in arena (#36498)

Arena should not allocate memory in this constructor.

After this pr merged, we shuold revert
https://github.com/apache/doris/pull/36299
---
 be/src/vec/common/arena.h | 59 +--
 1 file changed, 47 insertions(+), 12 deletions(-)

diff --git a/be/src/vec/common/arena.h b/be/src/vec/common/arena.h
index 4ab3ee4c606..65e8c1dfabe 100644
--- a/be/src/vec/common/arena.h
+++ b/be/src/vec/common/arena.h
@@ -84,20 +84,22 @@ private:
 size_t used() const { return pos - begin; }
 };
 
-size_t growth_factor;
-size_t linear_growth_threshold;
+size_t growth_factor = 2;
+size_t linear_growth_threshold = 128 * 1024 * 1024;
 
 /// Last contiguous chunk of memory.
 Chunk* head = nullptr;
-size_t size_in_bytes;
+size_t size_in_bytes = 0;
+size_t _initial_size = 4096;
 // The memory used by all chunks, excluding head.
-size_t _used_size_no_head;
+size_t _used_size_no_head = 0;
 
 static size_t round_up_to_page_size(size_t s) { return (s + 4096 - 1) / 
4096 * 4096; }
 
 /// If chunks size is less than 'linear_growth_threshold', then use 
exponential growth, otherwise - linear growth
 ///  (to not allocate too much excessive memory).
-size_t next_size(size_t min_next_size) const {
+size_t next_size(size_t min_next_size) {
+DCHECK(head != nullptr);
 size_t size_after_grow = 0;
 
 if (head->size() < linear_growth_threshold) {
@@ -120,12 +122,20 @@ private:
 }
 
 /// Add next contiguous chunk of memory with size not less than specified.
-void NO_INLINE add_chunk(size_t min_size) {
+void NO_INLINE _add_chunk(size_t min_size) {
+DCHECK(head != nullptr);
 _used_size_no_head += head->used();
 head = new Chunk(next_size(min_size + pad_right), head);
 size_in_bytes += head->size();
 }
 
+void _init_head_if_needed() {
+if (UNLIKELY(head == nullptr)) {
+head = new Chunk(_initial_size, nullptr);
+size_in_bytes += head->size();
+}
+}
+
 friend class ArenaAllocator;
 template 
 friend class AlignedArenaAllocator;
@@ -135,15 +145,18 @@ public:
   size_t linear_growth_threshold_ = 128 * 1024 * 1024)
 : growth_factor(growth_factor_),
   linear_growth_threshold(linear_growth_threshold_),
-  head(new Chunk(initial_size_, nullptr)),
-  size_in_bytes(head->size()),
+  _initial_size(initial_size_),
   _used_size_no_head(0) {}
 
 ~Arena() { delete head; }
 
 /// Get piece of memory, without alignment.
 char* alloc(size_t size) {
-if (UNLIKELY(head->pos + size > head->end)) add_chunk(size);
+_init_head_if_needed();
+
+if (UNLIKELY(head->pos + size > head->end)) {
+_add_chunk(size);
+}
 
 char* res = head->pos;
 head->pos += size;
@@ -153,6 +166,8 @@ public:
 
 /// Get piece of memory with alignment
 char* aligned_alloc(size_t size, size_t alignment) {
+_init_head_if_needed();
+
 do {
 void* head_pos = head->pos;
 size_t space = head->end - head->pos;
@@ -165,7 +180,7 @@ public:
 return res;
 }
 
-add_chunk(size + alignment);
+_add_chunk(size + alignment);
 } while (true);
 }
 
@@ -180,6 +195,8 @@ public:
  * the allocation it intended to roll back was indeed the last one.
   */
 void* rollback(size_t size) {
+DCHECK(head != nullptr);
+
 head->pos -= size;
 ASAN_POISON_MEMORY_REGION(head->pos, size + pad_right);
 return head->pos;
@@ -208,6 +225,8 @@ public:
 return result;
 }
 
+DCHECK(head != nullptr);
+
 // Extend an existing memory range with 'additional_bytes'.
 
 // This method only works for extending the last allocation. For lack 
of
@@ -291,6 +310,10 @@ public:
 * and only 128M can be reused when you apply for 4G memory again.
 */
 void clear() {
+if (head == nullptr) {
+return;
+}
+
 if (head->prev) {
 delete head->prev;
 head->prev = nullptr;
@@ -303,9 +326,21 @@ public:
 /// Size of chunks in bytes.
 size_t size() const { return size_in_bytes; }
 
-size_t used_size() const { return _used_size_no_head + head->used(); }
+size_t used_size() const {
+if (head == nullptr) {
+return _used_size_no_head;
+}
+
+return _used_size_no_head + head->used();
+}
+
+  

(doris) branch branch-3.0 updated (3df52cbf761 -> 9e4ca47e171)

2024-07-02 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a change to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git


from 3df52cbf761 [improvement](segmentcache) limit segment cache by memory 
or segment num (#37026)
 new ee164dcb865 [Fix](autoinc) Hanlde the processing of auto_increment 
column on exchange node rather than on TabletWriter when using 
`TABLET_SINK_SHUFFLE_PARTITIONED` (#36836)
 new 8be0ce48007 [streamload](2pc) Fix 2pc stream load txn in cloud mode 
(#37033)
 new d85a6963e15 [enhance](recycler) Refactor Accessor for Recycler (#36590)
 new ae38c5ed995 [Refactor](Recycler) Refactor azure obj client's batch 
delete function to check the delete response (#37037)
 new 1ed52cdeb8c [Exec](agg) Fix agg limit result error (#37025)
 new 29cb612a4d6 [refactor](spill) unify the entry point of spill tasks 
(#37020)
 new f0c10fa57a8 [chore](Regression) Remove useless get provider code in 
regression framework (#37000)
 new 145afabedc8 [regression-test](connector) Add a case for the response 
of streamload that the connector depends (#36864)
 new caac4b7cdb7 [fix](local shuffle) Fix wrong partitioned expr in local 
exchanger (#37017)
 new ec1422aa9a0 [fix](ES Catalog)Add array types support in esquery 
function (#36936)
 new 51bf9ab1328 [regression-test](case) modify statistics table name  
(#36689)
 new 6248c68f3cf [feature](function) support ip functions named 
ipv4_to_ipv6 and cut_ipv6 (#36883)
 new fe21f742813 [opt](hive) save hive table schema in transaction (#37008)
 new 9d61bebee69 [opt](function)avoid virtual function calls in geo 
functions (#37003)
 new d29764be14c [fix](cloud) Allow access to MS during the replay (#37053)
 new 0c7dbe46a6d [fix](cloud) Update mtime only if partitions have updated 
time (#37055)
 new 82095706765 [feat](Nereids) Optimize query by pushing down aggregation 
through join on foreign key (#36035)
 new 19774867822 [Migrate-Test](multi-catalog) Migrate p2 tests from p2 to 
p0. (#36989)
 new 5c5261fc531 [opt](arena) lazy memory allocation in arena (#36498)
 new cefa74dd893 [improvement](jdbc catalog)Rename config from 
`disable_jdbc_sqlserver_encrypt` to `force_sqlserver_jdbc_encrypt_false` 
(#37015)
 new c97839a240f [Fix](regression) Fix p0 case 
`test_unique_table_auto_inc_concurrent` (#37048)
 new 4bd260a7dc2 [test](migrate) move 2 cases from p2 to p0 (#37004)
 new 97f2da58331 [opt](catalog) add some profile for parquet reader and 
change meta cache config (#37040)
 new ffbfe6b1138 [fix](Nereids) simplify window expression should inherit 
data type (#37061)
 new ee516656cf4 [profile](fe)update format of min/hour/sec unit in profile 
(#37010)
 new 9cd6667e19b [fix](nereids) ColStatsMeta.partitionUpdateRows npe 
(#37044)
 new fc78d1fd288 [feat](nereids) support explain delete from clause (#36782)
 new a4001b3a92c [fix](map)fix upgrade behavior from 1.2 version (#36937)
 new f5eb1db5b86 [feature](nereids)use mtmv to match legacy mv (#33699)
 new 9126492697c [improve](json)improve json support empty keys (#36762)
 new f5de3d59e06 [fix](stmt) fix show create table consistency (#37074)
 new 5f262d610a3 [fix](auth)fix mtmv name to resolve conflicts (#36958)
 new cabad36926b [fix](mtmv)Fix mtmv name to resolve conflicts in 
regression test (#36902)
 new e1c8d50b2fd [fix](function) fix nereids fold constant wrong result of 
abs (#37065)
 new 6563f258ee9 [opt](split) add max wait time of getting splits (#36843)
 new 6c602c803c9 [test](tvf) move p2 tvf tests from p2 to p0 (#37081)
 new 538a885697c [test](migrate) move test_hive_text_complex_type from p2 
to p0 (#37007)
 new 2f8298e23be [fix](parquet) prevent parquet page reader print much 
warning logs (#37011)
 new e5d7eea453d [fix](regression) fix txn_insert case (#36892)
 new a7ba1b0eef8 [fix](routine-load) fix auto resume invalid when FE leader 
change (#37071)
 new 1ef9ab1453f [fix](merge-on-write) when full clone failed, duplicate 
key might occur (#37001)
 new 2206acececb [Fix]add set thread num config for wg flush pool (#37028)
 new 597bfebe2de [fix](nereids) fix This stopwatch is already running 
(#37095)
 new 9e4ca47e171 [fix](recycler) Fix DCHECK in ObjectListIterator (#37112)

The 44 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 be/src/cloud/cloud_meta_mgr.cpp|6 +-
 be/src/cloud/cloud_stream_load_executor.cpp|   69 +-
 be/src/common/config.cpp   |   10 +-
 be/src/common/config.h |6 +
 be/src/olap/delta_writer_v2.cpp|2 +-
 be/src/olap/rowset/segment_v2/column_reader.cpp   

(doris) 15/44: [fix](cloud) Allow access to MS during the replay (#37053)

2024-07-02 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit d29764be14cc3c0411a695c2cc2e3f6d2c635c8d
Author: walter 
AuthorDate: Mon Jul 1 14:08:01 2024 +0800

[fix](cloud) Allow access to MS during the replay (#37053)

In some metadata designs, the process involves writing to the edit log
first and then calling RPC to delete the data. The latter might fail, so
it is reasonable to continue calling RPC to delete the data during
replay. PR #36856 argues that not calling RPC in the checkpoint thread
is problematic.
---
 .../main/java/org/apache/doris/alter/CloudSchemaChangeJobV2.java| 4 
 .../src/main/java/org/apache/doris/cloud/rpc/MetaServiceProxy.java  | 6 --
 2 files changed, 10 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/alter/CloudSchemaChangeJobV2.java 
b/fe/fe-core/src/main/java/org/apache/doris/alter/CloudSchemaChangeJobV2.java
index 373e644ec9e..2fe3d748602 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/alter/CloudSchemaChangeJobV2.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/alter/CloudSchemaChangeJobV2.java
@@ -112,10 +112,6 @@ public class CloudSchemaChangeJobV2 extends 
SchemaChangeJobV2 {
 return;
 }
 
-if (Env.isCheckpointThread()) {
-return;
-}
-
 List shadowIdxList = 
indexIdMap.keySet().stream().collect(Collectors.toList());
 dropIndex(shadowIdxList);
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/cloud/rpc/MetaServiceProxy.java 
b/fe/fe-core/src/main/java/org/apache/doris/cloud/rpc/MetaServiceProxy.java
index b321b6cffc5..00f271099e4 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/cloud/rpc/MetaServiceProxy.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/cloud/rpc/MetaServiceProxy.java
@@ -17,7 +17,6 @@
 
 package org.apache.doris.cloud.rpc;
 
-import org.apache.doris.catalog.Env;
 import org.apache.doris.cloud.proto.Cloud;
 import org.apache.doris.common.Config;
 import org.apache.doris.rpc.RpcException;
@@ -101,11 +100,6 @@ public class MetaServiceProxy {
 }
 
 private MetaServiceClient getProxy() {
-if (Env.isCheckpointThread()) {
-LOG.error("You should not use RPC in the checkpoint thread");
-throw new RuntimeException("use RPC in the checkpoint thread");
-}
-
 if (Config.enable_check_compatibility_mode) {
 LOG.error("Should not use RPC in check compatibility mode");
 throw new RuntimeException("use RPC in the check compatibility 
mode");


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) 31/44: [fix](stmt) fix show create table consistency (#37074)

2024-07-02 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit f5de3d59e0673b1f95cefc5d0f7f279562f46418
Author: zclllyybb 
AuthorDate: Mon Jul 1 20:18:54 2024 +0800

[fix](stmt) fix show create table consistency (#37074)
---
 fe/fe-core/src/main/java/org/apache/doris/analysis/SlotRef.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotRef.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotRef.java
index 29f99b219aa..6bf11b0a953 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotRef.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotRef.java
@@ -271,7 +271,7 @@ public class SlotRef extends Expr {
 // virtual slot of an alias function
 // when we try to translate an alias function to Nereids style, 
the desc in the place holding slotRef
 // is null, and we just need the name of col.
-return col;
+return "`" + col + "`";
 } else if (desc.getSourceExprs() != null) {
 if (!disableTableName && (ToSqlContext.get() == null || 
ToSqlContext.get().isNeedSlotRefId())) {
 if (desc.getId().asInt() != 1) {


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) 16/44: [fix](cloud) Update mtime only if partitions have updated time (#37055)

2024-07-02 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 0c7dbe46a6d6246e2c8673ff771ea46647052c8e
Author: Gavin Chou 
AuthorDate: Mon Jul 1 14:08:43 2024 +0800

[fix](cloud) Update mtime only if partitions have updated time (#37055)

This PR fix the in-compatibility introduced by #34615
---
 .../src/main/java/org/apache/doris/cloud/catalog/CloudPartition.java  | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/cloud/catalog/CloudPartition.java 
b/fe/fe-core/src/main/java/org/apache/doris/cloud/catalog/CloudPartition.java
index 1246c5b640b..882bb7f6933 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/cloud/catalog/CloudPartition.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/cloud/catalog/CloudPartition.java
@@ -205,7 +205,9 @@ public class CloudPartition extends Partition {
 for (int i = 0; i < size; ++i) {
 Long version = versions.get(i);
 if (version > Partition.PARTITION_INIT_VERSION) {
-partitions.get(i).setCachedVisibleVersion(versions.get(i), 
versionUpdateTimesMs.get(i));
+// For compatibility, the existing partitions may not have 
mtime
+long mTime = versions.size() == versionUpdateTimesMs.size() ? 
versionUpdateTimesMs.get(i) : 0;
+partitions.get(i).setCachedVisibleVersion(versions.get(i), 
mTime);
 }
 }
 


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) 20/44: [improvement](jdbc catalog)Rename config from `disable_jdbc_sqlserver_encrypt` to `force_sqlserver_jdbc_encrypt_false` (#37015)

2024-07-02 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit cefa74dd893e9ed3468a66e117f7c297712e9a46
Author: zy-kkk 
AuthorDate: Mon Jul 1 15:01:05 2024 +0800

[improvement](jdbc catalog)Rename config from 
`disable_jdbc_sqlserver_encrypt` to `force_sqlserver_jdbc_encrypt_false` 
(#37015)

Front #36659
Renamed the configuration parameter `disable_jdbc_sqlserver_encrypt` to
`force_sqlserver_jdbc_encrypt_false` to make its purpose clearer and
more explicit. This new name better indicates that the parameter forces
the JDBC URL to set `encrypt=false` when enabled.
---
 fe/fe-common/src/main/java/org/apache/doris/common/Config.java  | 6 +++---
 fe/fe-core/src/main/java/org/apache/doris/catalog/JdbcResource.java | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java 
b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
index 44c63c88872..c4340d783ef 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
@@ -162,9 +162,9 @@ public class Config extends ConfigBase {
 "MySQL Jdbc Catalog mysql does not support pushdown functions"})
 public static String[] jdbc_mysql_unsupported_pushdown_functions = 
{"date_trunc", "money_format", "negative"};
 
-@ConfField(description = {"SQLServer Jdbc Catalog 关闭加密",
-"SQLServer Jdbc Catalog close encrypt"})
-public static boolean disable_jdbc_sqlserver_encrypt = false;
+@ConfField(description = {"强制 SQLServer Jdbc Catalog 加密为 false",
+"Force SQLServer Jdbc Catalog encrypt to false"})
+public static boolean force_sqlserver_jdbc_encrypt_false = false;
 
 @ConfField(mutable = true, masterOnly = true, description = {"broker load 
时,单个节点上 load 执行计划的默认并行度",
 "The default parallelism of the load execution plan on a single 
node when the broker load is submitted"})
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/JdbcResource.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/JdbcResource.java
index 1db801b024a..e9108076310 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/JdbcResource.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/JdbcResource.java
@@ -361,7 +361,7 @@ public class JdbcResource extends Resource {
 newJdbcUrl = checkAndSetJdbcBoolParam(dbType, newJdbcUrl, 
"reWriteBatchedInserts", "false", "true");
 }
 if (dbType.equals(SQLSERVER)) {
-if (Config.disable_jdbc_sqlserver_encrypt) {
+if (Config.force_sqlserver_jdbc_encrypt_false) {
 newJdbcUrl = checkAndSetJdbcBoolParam(dbType, newJdbcUrl, 
"encrypt", "true", "false");
 }
 newJdbcUrl = checkAndSetJdbcBoolParam(dbType, newJdbcUrl, 
"useBulkCopyForBatchInsert", "false", "true");


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) 17/44: [feat](Nereids) Optimize query by pushing down aggregation through join on foreign key (#36035)

2024-07-02 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 820957067654b8f3784cc2100ef0dd9270b4f31f
Author: 谢健 
AuthorDate: Mon Jul 1 14:37:23 2024 +0800

[feat](Nereids) Optimize query by pushing down aggregation through join on 
foreign key (#36035)

## Proposed changes

This PR optimizes query performance by pushing down aggregations through
joins when grouped by a foreign key. This adjustment reduces data
processing overhead above the join, improving both speed and resource
efficiency.

Transformation Example:

Before Optimization:
```
Aggregation(group by fk)
 |
   Join(pk = fk)
   /  \
  pk  fk
```
After Optimization:
```
 Join(pk = fk)
 / \
pk  Aggregation(group by fk)
   |
  fk
```
---
 .../doris/nereids/jobs/executor/Rewriter.java  |   6 +-
 .../apache/doris/nereids/properties/FuncDeps.java  |  19 ++
 .../org/apache/doris/nereids/rules/RuleType.java   |   2 +-
 .../rewrite/PushDownAggThroughJoinOnPkFk.java  | 348 +
 .../rewrite/PushDownAggThroughJoinOnPkFkTest.java  | 158 ++
 .../shape/query38.out  |  51 ++-
 .../shape/query87.out  |  51 ++-
 .../noStatsRfPrune/query38.out |  51 ++-
 .../noStatsRfPrune/query87.out |  51 ++-
 .../no_stats_shape/query38.out |  51 ++-
 .../no_stats_shape/query87.out |  51 ++-
 .../rf_prune/query38.out   |  51 ++-
 .../rf_prune/query87.out   |  51 ++-
 .../nereids_tpcds_shape_sf100_p0/shape/query38.out |  51 ++-
 .../nereids_tpcds_shape_sf100_p0/shape/query87.out |  51 ++-
 15 files changed, 770 insertions(+), 273 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java
index 9505bdca87d..0a2906ca055 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java
@@ -110,6 +110,7 @@ import 
org.apache.doris.nereids.rules.rewrite.PushConjunctsIntoEsScan;
 import org.apache.doris.nereids.rules.rewrite.PushConjunctsIntoJdbcScan;
 import org.apache.doris.nereids.rules.rewrite.PushConjunctsIntoOdbcScan;
 import org.apache.doris.nereids.rules.rewrite.PushDownAggThroughJoin;
+import org.apache.doris.nereids.rules.rewrite.PushDownAggThroughJoinOnPkFk;
 import org.apache.doris.nereids.rules.rewrite.PushDownAggThroughJoinOneSide;
 import org.apache.doris.nereids.rules.rewrite.PushDownDistinctThroughJoin;
 import org.apache.doris.nereids.rules.rewrite.PushDownFilterThroughProject;
@@ -348,8 +349,9 @@ public class Rewriter extends AbstractBatchJobExecutor {
 ),
 
 // this rule should be invoked after topic "Join pull up"
-topic("eliminate group by keys according to fd items",
-topDown(new EliminateGroupByKey())
+topic("eliminate Aggregate according to fd items",
+topDown(new EliminateGroupByKey()),
+topDown(new PushDownAggThroughJoinOnPkFk())
 ),
 
 topic("Limit optimization",
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/FuncDeps.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/FuncDeps.java
index c17fd2eee57..be7b0853605 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/FuncDeps.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/FuncDeps.java
@@ -62,6 +62,7 @@ public class FuncDeps {
 }
 
 private final Set items;
+// determinants -> dependencies
 private final Map, Set>> edges;
 
 public FuncDeps() {
@@ -159,6 +160,24 @@ public class FuncDeps {
 return items.contains(new FuncDepsItem(dominate, dependency));
 }
 
+public boolean isCircleDeps(Set dominate, Set dependency) {
+return items.contains(new FuncDepsItem(dominate, dependency))
+&& items.contains(new FuncDepsItem(dependency, dominate));
+}
+
+/**
+ * find the determinants of dependencies
+ */
+public Set> findDeterminats(Set dependency) {
+Set> determinants = new HashSet<>();
+for (FuncDepsItem item : items) {
+if (item.dependencies.equals(dependency)) {
+determinants.add(item.determinants);
+}
+}
+return determinants;
+}
+
 @Override
 public String toString() {
 return items.toString();
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/Ru

(doris) 24/44: [fix](Nereids) simplify window expression should inherit data type (#37061)

2024-07-02 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit ffbfe6b1138880aa01f7a596cc0f426de671725a
Author: morrySnow <101034200+morrys...@users.noreply.github.com>
AuthorDate: Mon Jul 1 16:02:38 2024 +0800

[fix](Nereids) simplify window expression should inherit data type (#37061)

intro by #33647

after window expression rewritten by literal.
literal's data type should same with original window expression.
---
 .../doris/nereids/rules/rewrite/SimplifyWindowExpression.java | 4 +++-
 .../simplify_window_expression/simplify_window_expression.out | 8 
 .../simplify_window_expression/simplify_window_expression.groovy  | 7 +--
 3 files changed, 16 insertions(+), 3 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SimplifyWindowExpression.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SimplifyWindowExpression.java
index 0db1d59b9e6..311fe57cf16 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SimplifyWindowExpression.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SimplifyWindowExpression.java
@@ -22,6 +22,7 @@ import org.apache.doris.nereids.pattern.MatchingContext;
 import org.apache.doris.nereids.rules.Rule;
 import org.apache.doris.nereids.rules.RuleType;
 import org.apache.doris.nereids.trees.expressions.Alias;
+import org.apache.doris.nereids.trees.expressions.Cast;
 import org.apache.doris.nereids.trees.expressions.Expression;
 import org.apache.doris.nereids.trees.expressions.NamedExpression;
 import org.apache.doris.nereids.trees.expressions.Slot;
@@ -91,7 +92,8 @@ public class SimplifyWindowExpression extends 
OneRewriteRuleFactory {
 String name = ((BoundFunction) function).getName();
 if ((name.equals(COUNT) && checkCount((Count) boundFunction))
 || REWRRITE_TO_CONST_WINDOW_FUNCTIONS.contains(name)) {
-projectionsBuilder.add(new Alias(alias.getExprId(), new 
TinyIntLiteral((byte) 1), alias.getName()));
+projectionsBuilder.add(new Alias(alias.getExprId(),
+new Cast(new TinyIntLiteral((byte) 1), 
function.getDataType()), alias.getName()));
 } else if (REWRRITE_TO_SLOT_WINDOW_FUNCTIONS.contains(name)) {
 projectionsBuilder.add(new Alias(alias.getExprId(),
 
TypeCoercionUtils.castIfNotSameType(boundFunction.child(0), 
boundFunction.getDataType()),
diff --git 
a/regression-test/data/nereids_rules_p0/simplify_window_expression/simplify_window_expression.out
 
b/regression-test/data/nereids_rules_p0/simplify_window_expression/simplify_window_expression.out
index e660cd7702c..7c72e1c31e7 100644
--- 
a/regression-test/data/nereids_rules_p0/simplify_window_expression/simplify_window_expression.out
+++ 
b/regression-test/data/nereids_rules_p0/simplify_window_expression/simplify_window_expression.out
@@ -265,3 +265,11 @@ PhysicalResultSink
 --filter((mal_test_simplify_window.__DORIS_DELETE_SIGN__ = 0))
 PhysicalOlapScan[mal_test_simplify_window]
 
+-- !check_output_type --
+\N 1   1
+1  1   1
+2  1   1
+3  1   1
+4  1   1
+6  1   1
+
diff --git 
a/regression-test/suites/nereids_rules_p0/simplify_window_expression/simplify_window_expression.groovy
 
b/regression-test/suites/nereids_rules_p0/simplify_window_expression/simplify_window_expression.groovy
index 10c76049e8e..252116e6314 100644
--- 
a/regression-test/suites/nereids_rules_p0/simplify_window_expression/simplify_window_expression.groovy
+++ 
b/regression-test/suites/nereids_rules_p0/simplify_window_expression/simplify_window_expression.groovy
@@ -16,8 +16,6 @@
 // under the License.
 
 suite("simplify_window_expression") {
-sql "SET enable_nereids_planner=true"
-sql "SET enable_fallback_to_original_planner=false"
 sql "set enable_parallel_result_sink=false;"
 sql """
   DROP TABLE IF EXISTS mal_test_simplify_window
@@ -111,4 +109,9 @@ suite("simplify_window_expression") {
 select a, rank() over (partition by a order by sum(b) desc) as ranking
 from mal_test_simplify_window group by a;
 """
+
+order_qt_check_output_type """
+select * from ( select a, rank() over (partition by a order by sum(b) 
desc) as ranking
+from mal_test_simplify_window group by a) t, (select 1 a) t2 where 
t.ranking = t2.a
+"""
 }
\ No newline at end of file


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) 33/44: [fix](mtmv)Fix mtmv name to resolve conflicts in regression test (#36902)

2024-07-02 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit cabad36926bc574bc2c25481e4a73d99e5f105d7
Author: zfr95 <87513668+zfr9...@users.noreply.github.com>
AuthorDate: Mon Jul 1 20:27:17 2024 +0800

[fix](mtmv)Fix mtmv name to resolve conflicts in regression test (#36902)

## Proposed changes

[fix](mtmv)Fix mtmv name to resolve conflicts
---
 .../mv/dimension_2_join_agg/dimension_join_agg_negative.groovy  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git 
a/regression-test/suites/nereids_rules_p0/mv/dimension_2_join_agg/dimension_join_agg_negative.groovy
 
b/regression-test/suites/nereids_rules_p0/mv/dimension_2_join_agg/dimension_join_agg_negative.groovy
index 14f527344b0..995e070d4b8 100644
--- 
a/regression-test/suites/nereids_rules_p0/mv/dimension_2_join_agg/dimension_join_agg_negative.groovy
+++ 
b/regression-test/suites/nereids_rules_p0/mv/dimension_2_join_agg/dimension_join_agg_negative.groovy
@@ -463,7 +463,7 @@ suite("dimension_join_agg_negative") {
 for (int i = 0; i < sql_list.size(); i++) {
 logger.info("sql_list current index: " + (i + 1))
 
-def mv_name = "mv_" + (i + 1)
+def mv_name = "mv_negative_" + (i + 1)
 
 create_all_mv(mv_name, sql_list[i])
 def job_name = getJobName(db, mv_name)


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) 35/44: [opt](split) add max wait time of getting splits (#36843)

2024-07-02 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 6563f258ee9155cd9bc5b9c8ba40267eadf62891
Author: Ashin Gau 
AuthorDate: Mon Jul 1 22:05:11 2024 +0800

[opt](split) add max wait time of getting splits (#36843)

Add session variable `fetch_splits_max_wait_time` to control the max
wait time of getting splits to prevent long waiting time.
---
 be/src/pipeline/exec/file_scan_operator.cpp|  3 ++-
 be/src/vec/exec/scan/split_source_connector.cpp|  2 +-
 be/src/vec/exec/scan/split_source_connector.h  | 15 +++
 be/src/vec/exec/scan/vfile_scanner.cpp |  2 --
 be/src/vec/exec/scan/vfile_scanner.h   |  1 -
 .../org/apache/doris/datasource/FileQueryScanNode.java |  3 ++-
 .../java/org/apache/doris/datasource/SplitSource.java  | 18 ++
 .../main/java/org/apache/doris/qe/SessionVariable.java | 17 +
 8 files changed, 39 insertions(+), 22 deletions(-)

diff --git a/be/src/pipeline/exec/file_scan_operator.cpp 
b/be/src/pipeline/exec/file_scan_operator.cpp
index 9e636d0331f..98cc91824f6 100644
--- a/be/src/pipeline/exec/file_scan_operator.cpp
+++ b/be/src/pipeline/exec/file_scan_operator.cpp
@@ -71,8 +71,9 @@ void FileScanLocalState::set_scan_ranges(RuntimeState* state,
 auto scan_range = 
scan_ranges[0].scan_range.ext_scan_range.file_scan_range;
 if (scan_range.__isset.split_source) {
 auto split_source = scan_range.split_source;
+RuntimeProfile::Counter* get_split_timer = 
ADD_TIMER(_runtime_profile, "GetSplitTime");
 _split_source = 
std::make_shared(
-state, split_source.split_source_id, 
split_source.num_splits);
+state, get_split_timer, split_source.split_source_id, 
split_source.num_splits);
 }
 }
 if (_split_source == nullptr) {
diff --git a/be/src/vec/exec/scan/split_source_connector.cpp 
b/be/src/vec/exec/scan/split_source_connector.cpp
index fae65543e53..9bba44b4e76 100644
--- a/be/src/vec/exec/scan/split_source_connector.cpp
+++ b/be/src/vec/exec/scan/split_source_connector.cpp
@@ -45,7 +45,7 @@ Status RemoteSplitSourceConnector::get_next(bool* has_next, 
TFileRangeDesc* rang
 std::lock_guard l(_range_lock);
 *has_next = false;
 if (_scan_index == _scan_ranges.size() && !_last_batch) {
-SCOPED_RAW_TIMER(&_get_split_timer);
+SCOPED_TIMER(_get_split_timer);
 Status coord_status;
 FrontendServiceConnection 
coord(_state->exec_env()->frontend_client_cache(),
 _state->get_query_ctx()->coord_addr, 
&coord_status);
diff --git a/be/src/vec/exec/scan/split_source_connector.h 
b/be/src/vec/exec/scan/split_source_connector.h
index bfda961df34..f62b45612bf 100644
--- a/be/src/vec/exec/scan/split_source_connector.h
+++ b/be/src/vec/exec/scan/split_source_connector.h
@@ -43,8 +43,6 @@ public:
 virtual int num_scan_ranges() = 0;
 
 virtual TFileScanRangeParams* get_params() = 0;
-
-virtual int64_t get_split_time() { return 0; }
 };
 
 /**
@@ -89,6 +87,7 @@ class RemoteSplitSourceConnector : public 
SplitSourceConnector {
 private:
 std::mutex _range_lock;
 RuntimeState* _state;
+RuntimeProfile::Counter* _get_split_timer;
 int64 _split_source_id;
 int _num_splits;
 
@@ -97,11 +96,13 @@ private:
 int _scan_index = 0;
 int _range_index = 0;
 
-int64_t _get_split_timer = 0;
-
 public:
-RemoteSplitSourceConnector(RuntimeState* state, int64 split_source_id, int 
num_splits)
-: _state(state), _split_source_id(split_source_id), 
_num_splits(num_splits) {}
+RemoteSplitSourceConnector(RuntimeState* state, RuntimeProfile::Counter* 
get_split_timer,
+   int64 split_source_id, int num_splits)
+: _state(state),
+  _get_split_timer(get_split_timer),
+  _split_source_id(split_source_id),
+  _num_splits(num_splits) {}
 
 Status get_next(bool* has_next, TFileRangeDesc* range) override;
 
@@ -114,8 +115,6 @@ public:
 TFileScanRangeParams* get_params() override {
 LOG(FATAL) << "Unreachable, params is got by 
file_scan_range_params_map";
 }
-
-int64_t get_split_time() override { return _get_split_timer; }
 };
 
 } // namespace doris::vectorized
diff --git a/be/src/vec/exec/scan/vfile_scanner.cpp 
b/be/src/vec/exec/scan/vfile_scanner.cpp
index 4932e164649..f6f029b9de0 100644
--- a/be/src/vec/exec/scan/vfile_scanner.cpp
+++ b/be/src/vec/exec/scan/vfile_scanner.cpp
@@ -138,7 +138,6 @@ Status VFileScanner::prepare(
 _file_counter = ADD_COUNTER(_local_state->scanner_profile(), "FileNumber", 
TUnit::UNIT);
 _has_fully_rf_file_counter =
 ADD_COUNTER(_local_state->scanner_profile(), 
"HasFullyRfFileNumber", TUnit::UNIT);
-_get_spl

(doris) 34/44: [fix](function) fix nereids fold constant wrong result of abs (#37065)

2024-07-02 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit e1c8d50b2fda98928bff2664e7fe30ce7b8fb08b
Author: zclllyybb 
AuthorDate: Mon Jul 1 21:19:01 2024 +0800

[fix](function) fix nereids fold constant wrong result of abs (#37065)

## Proposed changes

Issue Number: close #xxx

before:
```sql
mysql [optest]>select abs(cast(-9223372036854775808  as BIGINT));
+---+
| abs(cast(-9223372036854775808 as BIGINT)) |
+---+
| -9223372036854775808  |
+---+
1 row in set (0.00 sec)
```

now:
```sql
mysql [optest]>select abs(cast(-9223372036854775808  as BIGINT));
+---+
| abs(cast(-9223372036854775808 as BIGINT)) |
+---+
| 9223372036854775808   |
+---+
1 row in set (0.01 sec)
```
---
 .../functions/executable/ExecutableFunctions.java  |  8 +--
 .../functions/ExecutableFunctionsTest.java | 64 ++
 2 files changed, 68 insertions(+), 4 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/ExecutableFunctions.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/ExecutableFunctions.java
index aad56942f37..2e84542fd04 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/ExecutableFunctions.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/ExecutableFunctions.java
@@ -47,22 +47,22 @@ public class ExecutableFunctions {
  */
 @ExecFunction(name = "abs", argTypes = {"TINYINT"}, returnType = 
"SMALLINT")
 public static Expression abs(TinyIntLiteral literal) {
-return new SmallIntLiteral((byte) Math.abs(literal.getValue()));
+return new SmallIntLiteral((short) Math.abs(literal.getValue()));
 }
 
 @ExecFunction(name = "abs", argTypes = {"SMALLINT"}, returnType = "INT")
 public static Expression abs(SmallIntLiteral literal) {
-return new IntegerLiteral((short) Math.abs(literal.getValue()));
+return new IntegerLiteral(Math.abs(literal.getValue()));
 }
 
 @ExecFunction(name = "abs", argTypes = {"INT"}, returnType = "BIGINT")
 public static Expression abs(IntegerLiteral literal) {
-return new BigIntLiteral(Math.abs(literal.getValue()));
+return new BigIntLiteral(Math.abs((long) literal.getValue()));
 }
 
 @ExecFunction(name = "abs", argTypes = {"BIGINT"}, returnType = "LARGEINT")
 public static Expression abs(BigIntLiteral literal) {
-return new LargeIntLiteral(new 
BigInteger(Long.toString(Math.abs(literal.getValue();
+return new 
LargeIntLiteral(BigInteger.valueOf(literal.getValue()).abs());
 }
 
 @ExecFunction(name = "abs", argTypes = {"LARGEINT"}, returnType = 
"LARGEINT")
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/ExecutableFunctionsTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/ExecutableFunctionsTest.java
new file mode 100644
index 000..6c2e9f144be
--- /dev/null
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/ExecutableFunctionsTest.java
@@ -0,0 +1,64 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.trees.expressions.functions;
+
+import 
org.apache.doris.nereids.trees.expressions.functions.executable.ExecutableFunctions;
+import org.apache.doris.nereids.trees.expressions.literal.BigIntLiteral;
+import org.apache.doris.nereids.trees.expressions.literal.IntegerLiteral;
+import org.apache.doris.nereids.trees.expressions.literal.LargeIntLiteral;
+import org.apache.doris.nereids.trees.expressions.literal.SmallIntLiteral;
+import org.apache.doris.

(doris) 40/44: [fix](routine-load) fix auto resume invalid when FE leader change (#37071)

2024-07-02 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit a7ba1b0eef8af78aff99d1aa48f0296316f9f15e
Author: hui lai <1353307...@qq.com>
AuthorDate: Tue Jul 2 09:33:49 2024 +0800

[fix](routine-load) fix auto resume invalid when FE leader change (#37071)

We meet routine load pause and never be auto resume even if it meet the
conditions.
```
  Id: 134305
Name: 
lineitem_balance_dup_persistent_weekly_persistent_flow_weekly
  CreateTime: 2024-06-27 19:54:13
   PauseTime: 2024-06-28 23:02:46
 EndTime: NULL
  DbName: regression_test_stress_load_long_duration_load
   TableName: lineitem_balance_dup_persistent_weekly
IsMultiTable: false
   State: PAUSED
  DataSourceType: KAFKA
  CurrentTaskNum: 0
   JobProperties: 
{"max_batch_rows":"55","timezone":"Asia/Shanghai","send_batch_parallelism":"1","load_to_single_tablet":"false","column_separator":"','","line_delimiter":"\n","current_concurrent_number":"0","delete":"*","partial_columns":"false","merge_type":"APPEND","exec_mem_limit":"2147483648","strict_mode":"false","jsonpaths":"","max_batch_interval":"10","max_batch_size":"409715200","fuzzy_parse":"false","partitions":"*","columnToColumnExpr":"","whereExpr":"*","desired_co
 [...]
DataSourceProperties: 
{"topic":"test-topic-persistent-weekly-new","currentKafkaPartitions":"","brokerList":"xxx"}
CustomProperties: 
{"kafka_default_offsets":"OFFSET_BEGINNING","group.id":"test-consumer-group","client.id":"test-client-id"}
   Statistic: 
{"receivedBytes":2234836231654,"runningTxns":[],"errorRows":0,"committedTaskNum":1019074,"loadedRows":11693905636,"loadRowsRate":119675,"abortedTaskNum":13556,"errorRowsAfterResumed":0,"totalRows":11693905636,"unselectedRows":0,"receivedBytesRate":22871277,"taskExecuteTimeMs":97713660}
Progress: 
{"0":"81666390","1":"81605244","2":"80934894","3":"81531594","4":"81866067","5":"80841194","6":"81229045","7":"80854534","8":"81305844","9":"81384530","10":"81016926","11":"81018762","12":"81586996","13":"81028852","14":"80836728","15":"81536307","16":"81191324","17":"80790892","18":"81518108","19":"80853947","20":"80944134","21":"81567859","22":"80967795","23":"80962887","24":"81444757","25":"81182803","26":"81081053","27":"81374984","28":"81089548","29":"811612
 [...]
 Lag: 
{"0":-1,"1":-1,"2":-1,"3":-1,"4":-1,"5":-1,"6":-1,"7":-1,"8":-1,"9":-1,"10":-1,"11":-1,"12":-1,"13":-1,"14":-1,"15":-1,"16":-1,"17":-1,"18":-1,"19":-1,"20":-1,"21":-1,"22":-1,"23":-1,"24":-1,"25":-1,"26":-1,"27":-1,"28":-1,"29":-1,"30":-1,"31":-1,"32":-1,"33":-1,"34":-1,"35":-1,"36":-1,"37":-1,"38":-1,"39":-1,"40":-1,"41":-1,"42":-1,"43":-1,"44":-1,"45":-1,"46":-1,"47":-1,"48":-1,"49":-1,"50":-1,"51":-1,"52":-1,"53":-1,"54":-1,"55":-1,"56":-1,"57":-1,"58":-1,"59"
 [...]
ReasonOfStateChanged:
ErrorLogUrls:
OtherMsg:
User: root
 Comment:
```


If routine load pause and FE leader changes at the same time,
pauseReason will be null if FE leader changes, so auto resume logic will
never be triggered:
```
if (jobRoutine.pauseReason != null
&& jobRoutine.pauseReason.getCode() != 
InternalErrorCode.MANUAL_PAUSE_ERR
&& jobRoutine.pauseReason.getCode() != 
InternalErrorCode.TOO_MANY_FAILURE_ROWS_ERR
&& jobRoutine.pauseReason.getCode() != 
InternalErrorCode.CANNOT_RESUME_ERR) {
```
---
 .../main/java/org/apache/doris/load/routineload/RoutineLoadJob.java  | 5 +
 1 file changed, 5 insertions(+)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadJob.java
 
b/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadJob.java
index d870922e2d7..51f31837780 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadJob.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadJob.java
@@ -242,11 +242,16 @@ public abstract class RoutineLoadJob
 @SerializedName("pg")
 protected RoutineLoadProgress progress;
 
+@SerializedName("lrt")
 protected long latestResumeTimestamp; // the latest resume time
+@SerializedName("art")
 protected long autoResumeCount;
 // some other msg which need to show to user;
+@SerializedName("om")
 protected String otherMsg = "";
+@SerializedName("pr")
 protected ErrorReason pauseReason;
+@SerializedName("cr")
 protected ErrorReason cancelReason;
 
 @SerializedName("cts")


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h.

(doris) 36/44: [test](tvf) move p2 tvf tests from p2 to p0 (#37081)

2024-07-02 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 6c602c803c9e43d61214b8b0de8ae991b75a89dc
Author: Tiewei Fang <43782773+bepppo...@users.noreply.github.com>
AuthorDate: Mon Jul 1 22:10:22 2024 +0800

[test](tvf) move p2 tvf tests from p2 to p0 (#37081)

Move test `test_tvf_view` and `test_tvf_view_count` from
external_table_p2 to external_table_p0,
so that developer can run it locally with docker env.
---
 .../data/external_table_p0/tvf/test_tvf_view.out   | 28 ++
 .../external_table_p2/tvf/test_tvf_view_p2.out | 28 --
 .../tvf/test_tvf_view.groovy}  | 12 +-
 .../tvf/test_tvf_view_count.groovy}| 10 
 4 files changed, 39 insertions(+), 39 deletions(-)

diff --git a/regression-test/data/external_table_p0/tvf/test_tvf_view.out 
b/regression-test/data/external_table_p0/tvf/test_tvf_view.out
new file mode 100644
index 000..ddf5113bbf1
--- /dev/null
+++ b/regression-test/data/external_table_p0/tvf/test_tvf_view.out
@@ -0,0 +1,28 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !1 --
+100490
+
+-- !2 --
+1  goldenrod lavender spring chocolate laceManufacturer#1  
Brand#13PROMO BURNISHED COPPER  7   JUMBO PKG   901.00  ly. 
slyly ironi
+2  blush thistle blue yellow saddleManufacturer#1  Brand#13
LARGE BRUSHED BRASS 1   LG CASE 902.00  lar accounts amo
+3  spring green yellow purple cornsilk Manufacturer#4  Brand#42
STANDARD POLISHED BRASS 21  WRAP CASE   903.00  egular deposits hag
+4  cornflower chocolate smoke green pink   Manufacturer#3  Brand#34
SMALL PLATED BRASS  14  MED DRUM904.00  p furiously r
+5  forest brown coral puff cream   Manufacturer#3  Brand#32
STANDARD POLISHED TIN   15  SM PKG  905.00   wake carefully 
+6  bisque cornflower lawn forest magenta   Manufacturer#2  Brand#24
PROMO PLATED STEEL  4   MED BAG 906.00  sual a
+7  moccasin green thistle khaki floral Manufacturer#1  Brand#11
SMALL PLATED COPPER 45  SM BAG  907.00  lyly. ex
+8  misty lace thistle snow royal   Manufacturer#4  Brand#44PROMO 
BURNISHED TIN 41  LG DRUM 908.00  eposi
+9  thistle dim navajo dark gainsboro   Manufacturer#4  Brand#43
SMALL BURNISHED STEEL   12  WRAP CASE   909.00  ironic foxe
+10 linen pink saddle puff powder   Manufacturer#5  Brand#54LARGE 
BURNISHED STEEL   44  LG CAN  910.01  ithely final deposit
+
+-- !3 --
+1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+
diff --git a/regression-test/data/external_table_p2/tvf/test_tvf_view_p2.out 
b/regression-test/data/external_table_p2/tvf/test_tvf_view_p2.out
deleted file mode 100644
index 02304fb57ab..000
--- a/regression-test/data/external_table_p2/tvf/test_tvf_view_p2.out
+++ /dev/null
@@ -1,28 +0,0 @@
--- This file is automatically generated. You should know what you did if you 
want to edit this
--- !1 --
-852910
-
--- !2 --
-199147091  plum blush violet orange bisque Manufacturer#5  Brand#51
MEDIUM ANODIZED NICKEL  28  SM DRUM 1128.14 nding, final decoy
-199147092  brown tan chocolate moccasin peru   Manufacturer#4  
Brand#44STANDARD BRUSHED COPPER 40  JUMBO PKG   1129.14 ully 
even acc
-199147093  white sandy burlywood orange powder Manufacturer#2  
Brand#23MEDIUM PLATED COPPER15  MED PACK1130.14 
furiously special
-199147094  cyan almond olive steel navajo  Manufacturer#1  Brand#15
ECONOMY BRUSHED STEEL   12  WRAP PACK   1131.14 dolites.
-199147095  linen moccasin snow deep dimManufacturer#2  Brand#22
STANDARD POLISHED TIN   37  LG CASE 1132.14  furious
-199147096  dim violet ivory cream drab Manufacturer#4  Brand#44
MEDIUM ANODIZED COPPER  20  JUMBO CAN   1133.14 ions. sometime
-199147097  steel khaki smoke beige sienna  Manufacturer#2  Brand#21
STANDARD BRUSHED BRASS  36  WRAP CASE   1134.14 und the blithely iron
-199147098  cornsilk red brown cyan moccasinManufacturer#4  
Brand#43MEDIUM ANODIZED TIN 12  SM BOX  1135.14 hely across the
-199147099  slate wheat sienna almond springManufacturer#2  
Brand#25LARGE BURNISHED TIN 1   SM CAN  1136.14 uriously ironic 
packag
-199147100  orange gainsboro chocolate ivory grey   Manufacturer#4  
Brand#45PROMO POLISHED BRASS42  MED DRUM1137.15 sual req
-
--- !3 --
-199147091
-199147092
-199147093
-199147094
-199147095
-199147096
-199147097
-199147098
-199147099
-199147100
-
diff --git 
a/regression-test/suites/external_table_p2/tvf/test_tvf_view_p2.groovy 
b/regressi

(doris) 22/44: [test](migrate) move 2 cases from p2 to p0 (#37004)

2024-07-02 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 4bd260a7dc22e24defaeb1b126adcbeddf864ffd
Author: wuwenchi 
AuthorDate: Mon Jul 1 15:23:11 2024 +0800

[test](migrate) move 2 cases from p2 to p0 (#37004)

releated: #36787

- test_text_garbled_file
- test_hive_statistic_timeout
- test_hive_to_array
- test_hive_statistic
- test_hive_default_partition
---
 .../hive_textfile_array_all_types/create_table.hql |  27 ++
 .../hive_textfile_array_all_types/data.tar.gz  | Bin 0 -> 625 bytes
 .../hive_textfile_array_all_types/run.sh   |  13 +
 .../hive_textfile_array_delimiter/create_table.hql |  32 ++
 .../hive_textfile_array_delimiter/data.tar.gz  | Bin 0 -> 690 bytes
 .../hive_textfile_array_delimiter/run.sh   |  13 +
 .../hive_textfile_nestedarray/create_table.hql |  16 +
 .../hive_textfile_nestedarray/data.tar.gz  | Bin 0 -> 280 bytes
 .../multi_catalog/hive_textfile_nestedarray/run.sh |  13 +
 .../multi_catalog/logs1_parquet/create_table.hql   |  39 +++
 .../data/multi_catalog/logs1_parquet/run.sh|  22 ++
 .../multi_catalog/one_partition/create_table.hql   |  22 ++
 .../data/multi_catalog/one_partition/data.tar.gz   | Bin 0 -> 296 bytes
 .../data/multi_catalog/one_partition/run.sh|  13 +
 .../test_csv_format_error/create_table.hql |  68 
 .../test_csv_format_error/data.tar.gz  | Bin 0 -> 151583 bytes
 .../multi_catalog/test_csv_format_error/run.sh |  13 +
 .../test_date_string_partition/create_table.hql|  25 ++
 .../test_date_string_partition/data.tar.gz | Bin 0 -> 353 bytes
 .../test_date_string_partition/run.sh  |  13 +
 .../multi_catalog/two_partition/create_table.hql   |  25 ++
 .../data/multi_catalog/two_partition/data.tar.gz   | Bin 0 -> 375 bytes
 .../data/multi_catalog/two_partition/run.sh|  13 +
 .../data/statistics/statistics/create_table.hql|  33 ++
 .../scripts/data/statistics/statistics/data.tar.gz | Bin 0 -> 3956 bytes
 .../hive/scripts/data/statistics/statistics/run.sh |  13 +
 .../data/tpch_1000_parquet/part/create_table.hql   |  24 ++
 .../scripts/data/tpch_1000_parquet/part/run.sh |  22 ++
 .../hive/test_hive_default_partition.out   | 174 +++
 .../hive/test_hive_to_array.out|  21 ++
 .../hive/test_text_garbled_file.out| Bin 296830 -> 593565 bytes
 .../hive/test_hive_default_partition.groovy|  17 +-
 .../hive/test_hive_statistic.groovy| 344 +
 .../hive/test_hive_statistic_timeout.groovy|  23 +-
 .../hive/test_hive_to_array.groovy |  17 +-
 .../hive/test_text_garbled_file.groovy |  47 +++
 .../hive/test_hive_statistic.groovy| 338 
 .../hive/test_text_garbled_file.groovy |  46 ---
 38 files changed, 1084 insertions(+), 402 deletions(-)

diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_array_all_types/create_table.hql
 
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_array_all_types/create_table.hql
new file mode 100644
index 000..6b700396838
--- /dev/null
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_array_all_types/create_table.hql
@@ -0,0 +1,27 @@
+create database if not exists multi_catalog;
+use multi_catalog;
+
+CREATE TABLE IF NOT EXISTS `hive_textfile_array_all_types`(
+  `col1` array,
+  `col2` array,
+  `col3` array,
+  `col4` array,
+  `col5` array,
+  `col6` array,
+  `col7` array,
+  `col8` array,
+  `col9` array,
+  `col10` array,
+  `col11` array,
+  `col12` array,
+  `col13` array)
+ROW FORMAT SERDE
+  'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+STORED AS INPUTFORMAT
+  'org.apache.hadoop.mapred.TextInputFormat'
+OUTPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
+LOCATION
+  '/user/doris/suites/multi_catalog/hive_textfile_array_all_types';
+
+msck repair table hive_textfile_array_all_types;
\ No newline at end of file
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_array_all_types/data.tar.gz
 
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_array_all_types/data.tar.gz
new file mode 100644
index 000..b9d64ab29bc
Binary files /dev/null and 
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_array_all_types/data.tar.gz
 differ
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_array_all_types/run.sh
 
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_array_all_types/run.sh
new file mode 100755
index 000..7c2e7e7aed2
--- /dev/null
+++ 
b/docker/thirdparti

(doris) 39/44: [fix](regression) fix txn_insert case (#36892)

2024-07-02 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit e5d7eea453ddfc1d708b821f1123606b4845f5ca
Author: meiyi 
AuthorDate: Tue Jul 2 09:22:37 2024 +0800

[fix](regression) fix txn_insert case (#36892)

## Proposed changes

The delete predicate on mow table is implementated in insert, so in txn
insert, it is an insert rather than delete operation, the commit order
is changed, so the result is changed.
---
 regression-test/data/insert_p0/txn_insert.out |  2 --
 regression-test/suites/insert_p0/txn_insert.groovy|  2 +-
 .../txn_insert_concurrent_insert_duplicate.groovy | 13 +++--
 .../insert_p2/txn_insert_concurrent_insert_ud.groovy  | 15 +--
 .../insert_p2/txn_insert_concurrent_insert_unique.groovy  | 15 +--
 .../insert_p2/txn_insert_concurrent_insert_update.groovy  | 15 +--
 .../suites/insert_p2/txn_insert_with_schema_change.groovy |  1 +
 7 files changed, 36 insertions(+), 27 deletions(-)

diff --git a/regression-test/data/insert_p0/txn_insert.out 
b/regression-test/data/insert_p0/txn_insert.out
index 257bdcc0311..c1278f7fe2e 100644
--- a/regression-test/data/insert_p0/txn_insert.out
+++ b/regression-test/data/insert_p0/txn_insert.out
@@ -1211,14 +1211,12 @@
 5  1   15
 
 -- !select_cu2 --
-1  2   121
 2  2   122
 3  \N  30
 4  4   14
 5  \N  15
 
 -- !select_cu3 --
-1  2   111
 2  2   112
 3  \N  130
 4  4   14
diff --git a/regression-test/suites/insert_p0/txn_insert.groovy 
b/regression-test/suites/insert_p0/txn_insert.groovy
index 66b7be4e659..c403f4c132c 100644
--- a/regression-test/suites/insert_p0/txn_insert.groovy
+++ b/regression-test/suites/insert_p0/txn_insert.groovy
@@ -739,10 +739,10 @@ suite("txn_insert") {
 sql """ delete from ${unique_table}_2 where id <= 1; """
 sql """ commit """
 
-sql """ delete from ${unique_table}_3 where id <= 1; """
 sql """ insert into ${unique_table}_3(id, score) select id, 
score from ${unique_table}_0; """
 sql """ insert into ${unique_table}_3(id, score) select id, 
score from ${unique_table}_1; """
 sql """ update ${unique_table}_3 set score = score + 100 where 
id in (select id from ${unique_table}_0); """
+sql """ delete from ${unique_table}_3 where id <= 1; """
 } catch (Throwable e) {
 logger.warn("column update failed", e)
 assertTrue(false)
diff --git 
a/regression-test/suites/insert_p2/txn_insert_concurrent_insert_duplicate.groovy
 
b/regression-test/suites/insert_p2/txn_insert_concurrent_insert_duplicate.groovy
index 493b85064a5..048a07fb817 100644
--- 
a/regression-test/suites/insert_p2/txn_insert_concurrent_insert_duplicate.groovy
+++ 
b/regression-test/suites/insert_p2/txn_insert_concurrent_insert_duplicate.groovy
@@ -148,14 +148,13 @@ suite("txn_insert_concurrent_insert_duplicate") {
 CompletableFuture.allOf(futuresArray).get(30, TimeUnit.MINUTES)
 sql """ sync """
 
-logger.info("errors: " + errors)
+logger.info("error num: " + errors.size() + ", errors: " + errors)
 
 def result = sql """ select count() from ${tableName}_0 """
-logger.info("result: ${result}, expected: ${6001215 * threadNum}")
-assertTrue(result[0][0] >= 6001215 * threadNum)
-result = sql """ select count() from ${tableName}_1 """
-logger.info("result: ${result}")
-assertEquals(2999666 * threadNum, result[0][0])
+logger.info("${tableName}_0 row count: ${result}, expected: ${6001215 * 
threadNum}")
+
+def result2 = sql """ select count() from ${tableName}_1 """
+logger.info("${tableName}_1 row count: ${result2}, expected: ${2999666 * 
threadNum}")
 
 def tables = sql """ show tables from $dbName """
 logger.info("tables: $tables")
@@ -166,5 +165,7 @@ suite("txn_insert_concurrent_insert_duplicate") {
 }
 }
 
+assertTrue(result[0][0] >= 6001215 * threadNum)
+assertEquals(2999666 * threadNum, result2[0][0])
 assertEquals(0, errors.size())
 }
diff --git 
a/regression-test/suites/insert_p2/txn_insert_concurrent_insert_ud.groovy 
b/regression-test/suites/insert_p2/txn_insert_concurrent_insert_ud.groovy
index 555f4b3511a..787c556dc3d 100644
--- a/regression-test/suites/insert_p2/txn_insert_concurrent_insert_ud.groovy
+++ b/regression-test/suites/insert_p2/txn_insert_concurrent_insert_ud.groovy
@@ -161,14 +161,15 @@ suite("txn_insert_concurrent_insert_ud") {
 CompletableFuture.allOf(futuresArray).get(30, TimeUnit.MINUTES)
 sql """ sync """
 
-logger.info("errors: " + errors)
+logger.info("error num: " + errors.size() + ", errors: " + errors)
 
+def t0_row_count = 2000495 // 5000226 or 6001215
 def result = sql """ select 

(doris) 01/44: [Fix](autoinc) Hanlde the processing of auto_increment column on exchange node rather than on TabletWriter when using `TABLET_SINK_SHUFFLE_PARTITIONED` (#36836)

2024-07-02 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit ee164dcb86534fcf2f7d3d3794f84145b1379000
Author: bobhan1 
AuthorDate: Sun Jun 30 13:07:27 2024 +0800

[Fix](autoinc) Hanlde the processing of auto_increment column on exchange 
node rather than on TabletWriter when using `TABLET_SINK_SHUFFLE_PARTITIONED` 
(#36836)

## Proposed changes

Issue Number: close #36638

https://github.com/apache/doris/pull/30914 add partition tablet sink
shuffle and the processing of auto_increment column should be handled on
exchange node raher than TabletWriter when using partition tablet sink
shuffle.

branch-2.1-pick: https://github.com/apache/doris/pull/37029
---
 be/src/pipeline/exec/exchange_sink_operator.cpp|  5 +-
 be/src/vec/sink/vtablet_block_convertor.cpp|  5 +-
 be/src/vec/sink/writer/vtablet_writer.cpp  |  2 +
 be/src/vec/sink/writer/vtablet_writer_v2.cpp   |  2 +
 .../unique/test_unique_table_auto_inc.out  | 62 ++
 .../unique/test_unique_table_auto_inc.groovy   | 38 +
 6 files changed, 110 insertions(+), 4 deletions(-)

diff --git a/be/src/pipeline/exec/exchange_sink_operator.cpp 
b/be/src/pipeline/exec/exchange_sink_operator.cpp
index 0ccded0b825..198bc555024 100644
--- a/be/src/pipeline/exec/exchange_sink_operator.cpp
+++ b/be/src/pipeline/exec/exchange_sink_operator.cpp
@@ -193,9 +193,12 @@ Status ExchangeSinkLocalState::open(RuntimeState* state) {
 
std::make_unique(_vpartition.get(), 
find_tablet_mode);
 _tablet_sink_tuple_desc = 
_state->desc_tbl().get_tuple_descriptor(p._tablet_sink_tuple_id);
 _tablet_sink_row_desc = p._pool->add(new 
RowDescriptor(_tablet_sink_tuple_desc, false));
-//_block_convertor no need init_autoinc_info here
+// if _part_type == TPartitionType::TABLET_SINK_SHUFFLE_PARTITIONED, 
we handle the processing of auto_increment column
+// on exchange node rather than on TabletWriter
 _block_convertor =
 
std::make_unique(_tablet_sink_tuple_desc);
+_block_convertor->init_autoinc_info(_schema->db_id(), 
_schema->table_id(),
+_state->batch_size());
 _location = p._pool->add(new 
OlapTableLocationParam(p._tablet_sink_location));
 _row_distribution.init(
 {.state = _state,
diff --git a/be/src/vec/sink/vtablet_block_convertor.cpp 
b/be/src/vec/sink/vtablet_block_convertor.cpp
index 7f7f4c76008..feb6633511e 100644
--- a/be/src/vec/sink/vtablet_block_convertor.cpp
+++ b/be/src/vec/sink/vtablet_block_convertor.cpp
@@ -505,8 +505,7 @@ Status 
OlapTableBlockConvertor::_fill_auto_inc_cols(vectorized::Block* block, si
 vectorized::ColumnInt64::Container& dst_values = dst_column->get_data();
 
 vectorized::ColumnPtr src_column_ptr = block->get_by_position(idx).column;
-if (const vectorized::ColumnConst* const_column =
-check_and_get_column(src_column_ptr)) 
{
+if (const auto* const_column = 
check_and_get_column(src_column_ptr)) {
 // for insert stmt like "insert into tbl1 select null,col1,col2,... 
from tbl2" or
 // "insert into tbl1 select 1,col1,col2,... from tbl2", the type of 
literal's column
 // will be `ColumnConst`
@@ -529,7 +528,7 @@ Status 
OlapTableBlockConvertor::_fill_auto_inc_cols(vectorized::Block* block, si
 int64_t value = const_column->get_int(0);
 dst_values.resize_fill(rows, value);
 }
-} else if (const vectorized::ColumnNullable* src_nullable_column =
+} else if (const auto* src_nullable_column =

check_and_get_column(src_column_ptr)) {
 auto src_nested_column_ptr = 
src_nullable_column->get_nested_column_ptr();
 const auto& null_map_data = src_nullable_column->get_null_map_data();
diff --git a/be/src/vec/sink/writer/vtablet_writer.cpp 
b/be/src/vec/sink/writer/vtablet_writer.cpp
index 5d36ca37805..6d388a7d958 100644
--- a/be/src/vec/sink/writer/vtablet_writer.cpp
+++ b/be/src/vec/sink/writer/vtablet_writer.cpp
@@ -1197,6 +1197,8 @@ Status VTabletWriter::_init(RuntimeState* state, 
RuntimeProfile* profile) {
 }
 
 _block_convertor = 
std::make_unique(_output_tuple_desc);
+// if partition_type is TABLET_SINK_SHUFFLE_PARTITIONED, we handle the 
processing of auto_increment column
+// on exchange node rather than on TabletWriter
 _block_convertor->init_autoinc_info(
 _schema->db_id(), _schema->table_id(), _state->batch_size(),
 _schema->is_partial_update() && 
!_schema->auto_increment_coulumn().empty(),
diff --git a/be/src/vec/sink/writer/vtablet_writer_v2.cpp 
b/be/src/vec/sink/writer/vtablet_writer_v2.cpp
index 9bd154ce212..fbefd7a6d83 100644
--- a/be/src/vec/sink/writer/vtablet_writer_v2.cpp
+++ b/be

(doris) 02/44: [streamload](2pc) Fix 2pc stream load txn in cloud mode (#37033)

2024-07-02 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 8be0ce4800705d2d827fe625654cd8a07ed8f7d1
Author: Gavin Chou 
AuthorDate: Sun Jun 30 20:37:05 2024 +0800

[streamload](2pc) Fix 2pc stream load txn in cloud mode (#37033)

Abort load txn with label only should be forwarded to FE master to
handle due to lack of db id.
---
 be/src/cloud/cloud_meta_mgr.cpp|  6 +-
 be/src/cloud/cloud_stream_load_executor.cpp| 69 --
 be/src/common/config.cpp   |  2 +-
 .../runtime/stream_load/stream_load_executor.cpp   | 42 ++---
 .../apache/doris/service/FrontendServiceImpl.java  |  2 +-
 .../load_p0/stream_load/test_stream_load.groovy|  1 +
 6 files changed, 89 insertions(+), 33 deletions(-)

diff --git a/be/src/cloud/cloud_meta_mgr.cpp b/be/src/cloud/cloud_meta_mgr.cpp
index f0a377cba67..732f3023e91 100644
--- a/be/src/cloud/cloud_meta_mgr.cpp
+++ b/be/src/cloud/cloud_meta_mgr.cpp
@@ -839,8 +839,12 @@ Status CloudMetaMgr::abort_txn(const StreamLoadContext& 
ctx) {
 if (ctx.db_id > 0 && !ctx.label.empty()) {
 req.set_db_id(ctx.db_id);
 req.set_label(ctx.label);
-} else {
+} else if (ctx.txn_id > 0) {
 req.set_txn_id(ctx.txn_id);
+} else {
+LOG(WARNING) << "failed abort txn, with illegal input, db_id=" << 
ctx.db_id
+ << " txn_id=" << ctx.txn_id << " label=" << ctx.label;
+return Status::InternalError("failed to abort txn");
 }
 return retry_rpc("abort txn", req, &res, &MetaService_Stub::abort_txn);
 }
diff --git a/be/src/cloud/cloud_stream_load_executor.cpp 
b/be/src/cloud/cloud_stream_load_executor.cpp
index b7d428e59a4..92fb73eacc1 100644
--- a/be/src/cloud/cloud_stream_load_executor.cpp
+++ b/be/src/cloud/cloud_stream_load_executor.cpp
@@ -26,6 +26,12 @@
 
 namespace doris {
 
+enum class TxnOpParamType : int {
+ILLEGAL,
+WITH_TXN_ID,
+WITH_LABEL,
+};
+
 CloudStreamLoadExecutor::CloudStreamLoadExecutor(ExecEnv* exec_env)
 : StreamLoadExecutor(exec_env) {}
 
@@ -42,13 +48,48 @@ Status 
CloudStreamLoadExecutor::pre_commit_txn(StreamLoadContext* ctx) {
 }
 
 Status CloudStreamLoadExecutor::operate_txn_2pc(StreamLoadContext* ctx) {
-VLOG_DEBUG << "operate_txn_2pc, op: " << ctx->txn_operation;
+std::stringstream ss;
+ss << "db_id=" << ctx->db_id << " txn_id=" << ctx->txn_id << " label=" << 
ctx->label
+   << " txn_2pc_op=" << ctx->txn_operation;
+std::string op_info = ss.str();
+VLOG_DEBUG << "operate_txn_2pc " << op_info;
+TxnOpParamType topt = ctx->txn_id > 0   ? TxnOpParamType::WITH_TXN_ID
+  : !ctx->label.empty() ? TxnOpParamType::WITH_LABEL
+: TxnOpParamType::ILLEGAL;
+
+Status st = Status::InternalError("impossible branch reached, " + 
op_info);
+
 if (ctx->txn_operation.compare("commit") == 0) {
-return 
_exec_env->storage_engine().to_cloud().meta_mgr().commit_txn(*ctx, true);
+if (topt == TxnOpParamType::WITH_TXN_ID) {
+VLOG_DEBUG << "2pc commit stream load txn directly: " << op_info;
+st = 
_exec_env->storage_engine().to_cloud().meta_mgr().commit_txn(*ctx, true);
+} else if (topt == TxnOpParamType::WITH_LABEL) {
+VLOG_DEBUG << "2pc commit stream load txn with FE support: " << 
op_info;
+st = StreamLoadExecutor::operate_txn_2pc(ctx);
+} else {
+st = Status::InternalError(
+"failed to 2pc commit txn, with TxnOpParamType::illegal 
input, " + op_info);
+}
+} else if (ctx->txn_operation.compare("abort") == 0) {
+if (topt == TxnOpParamType::WITH_TXN_ID) {
+LOG(INFO) << "2pc abort stream load txn directly: " << op_info;
+st = 
_exec_env->storage_engine().to_cloud().meta_mgr().abort_txn(*ctx);
+WARN_IF_ERROR(st, "failed to rollback txn " + op_info);
+} else if (topt == TxnOpParamType::WITH_LABEL) { // maybe a label send 
to FE to abort
+VLOG_DEBUG << "2pc abort stream load txn with FE support: " << 
op_info;
+StreamLoadExecutor::rollback_txn(ctx);
+st = Status::OK();
+} else {
+st = Status::InternalError("failed abort txn, with illegal 
input, " + op_info);
+}
 } else {
-// 2pc abort
-return 
_exec_env->storage_engine().to_cloud().meta_mgr().abort_txn(*ctx);
+std::string msg =
+"failed to operate_txn_2pc, unrecognized operation: " + 
ctx->txn_operation;
+LOG(WARNING) << msg << " " << op_info;
+st = Status::InternalError(msg + " " + op_info);
 }
+WARN_IF_ERROR(st, "failed to operate_txn_2pc " + op_info)
+return st;
 }
 
 Status CloudStreamLoadExecutor::commit_txn(StreamLoadContext

(doris) 11/44: [regression-test](case) modify statistics table name (#36689)

2024-07-02 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 51bf9ab1328cc364b4b5643f0870eb56d8fc3ab6
Author: shuke <37901441+shuke...@users.noreply.github.com>
AuthorDate: Mon Jul 1 11:23:13 2024 +0800

[regression-test](case) modify statistics table name  (#36689)
---
 regression-test/suites/pipeline_p0/statitics_compaction.groovy | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/regression-test/suites/pipeline_p0/statitics_compaction.groovy 
b/regression-test/suites/pipeline_p0/statitics_compaction.groovy
index eaf723c763c..aebe55939d5 100644
--- a/regression-test/suites/pipeline_p0/statitics_compaction.groovy
+++ b/regression-test/suites/pipeline_p0/statitics_compaction.groovy
@@ -66,5 +66,5 @@ suite("statistic_table_compaction", "nonConcurrent,p0") {
 }
 
 do_compaction("__internal_schema.column_statistics")
-do_compaction("__internal_schema.histogram_statistics")
-}
\ No newline at end of file
+do_compaction("__internal_schema.partition_statistics")
+}


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) 06/44: [refactor](spill) unify the entry point of spill tasks (#37020)

2024-07-02 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 29cb612a4d65ddc01943a0f7af628da968c8b531
Author: Jerry Hu 
AuthorDate: Mon Jul 1 09:52:32 2024 +0800

[refactor](spill) unify the entry point of spill tasks (#37020)
---
 .../exec/partitioned_aggregation_sink_operator.cpp |  30 ++
 .../partitioned_aggregation_source_operator.cpp|  29 ++
 .../exec/partitioned_aggregation_source_operator.h |   1 -
 .../exec/partitioned_hash_join_probe_operator.cpp  |  76 +++---
 .../exec/partitioned_hash_join_probe_operator.h|   4 +-
 .../exec/partitioned_hash_join_sink_operator.cpp   | 112 +++--
 .../exec/partitioned_hash_join_sink_operator.h |   3 +-
 be/src/pipeline/exec/spill_sort_sink_operator.cpp  |  30 +-
 .../pipeline/exec/spill_sort_source_operator.cpp   |  27 +
 be/src/pipeline/exec/spill_sort_source_operator.h  |   1 -
 be/src/pipeline/exec/spill_utils.h |  76 ++
 11 files changed, 152 insertions(+), 237 deletions(-)

diff --git a/be/src/pipeline/exec/partitioned_aggregation_sink_operator.cpp 
b/be/src/pipeline/exec/partitioned_aggregation_sink_operator.cpp
index b833289e0e0..4399f3c7045 100644
--- a/be/src/pipeline/exec/partitioned_aggregation_sink_operator.cpp
+++ b/be/src/pipeline/exec/partitioned_aggregation_sink_operator.cpp
@@ -22,6 +22,7 @@
 
 #include "aggregation_sink_operator.h"
 #include "common/status.h"
+#include "pipeline/exec/spill_utils.h"
 #include "runtime/fragment_mgr.h"
 #include "vec/spill/spill_stream_manager.h"
 
@@ -253,14 +254,7 @@ Status 
PartitionedAggSinkLocalState::revoke_memory(RuntimeState* state) {
 }
 }};
 
-auto execution_context = state->get_task_execution_context();
-/// Resources in shared state will be released when the operator is closed,
-/// but there may be asynchronous spilling tasks at this time, which can 
lead to conflicts.
-/// So, we need hold the pointer of shared state.
-std::weak_ptr shared_state_holder =
-_shared_state->shared_from_this();
 auto query_id = state->query_id();
-auto mem_tracker = state->get_query_ctx()->query_mem_tracker;
 
 MonotonicStopWatch submit_timer;
 submit_timer.start();
@@ -269,20 +263,10 @@ Status 
PartitionedAggSinkLocalState::revoke_memory(RuntimeState* state) {
 "fault_inject partitioned_agg_sink revoke_memory submit_func 
failed");
 return status;
 });
-status = 
ExecEnv::GetInstance()->spill_stream_mgr()->get_spill_io_thread_pool()->submit_func(
-[this, &parent, state, query_id, mem_tracker, shared_state_holder, 
execution_context,
- submit_timer] {
-SCOPED_ATTACH_TASK_WITH_ID(mem_tracker, query_id);
-std::shared_ptr execution_context_lock;
-auto shared_state_sptr = shared_state_holder.lock();
-if (shared_state_sptr) {
-execution_context_lock = execution_context.lock();
-}
-if (!shared_state_sptr || !execution_context_lock) {
-LOG(INFO) << "query " << print_id(query_id)
-  << " execution_context released, maybe query was 
cancelled.";
-return Status::Cancelled("Cancelled");
-}
+
+auto spill_runnable = std::make_shared(
+state, _shared_state->shared_from_this(),
+[this, &parent, state, query_id, submit_timer] {
 
DBUG_EXECUTE_IF("fault_inject::partitioned_agg_sink::revoke_memory_cancel", {
 auto st = Status::InternalError(
 "fault_inject partitioned_agg_sink "
@@ -332,7 +316,9 @@ Status 
PartitionedAggSinkLocalState::revoke_memory(RuntimeState* state) {
 
parent._agg_sink_operator->reset_hash_table(runtime_state);
 return Base::_shared_state->sink_status;
 });
-return status;
+
+return 
ExecEnv::GetInstance()->spill_stream_mgr()->get_spill_io_thread_pool()->submit(
+std::move(spill_runnable));
 }
 
 } // namespace doris::pipeline
diff --git a/be/src/pipeline/exec/partitioned_aggregation_source_operator.cpp 
b/be/src/pipeline/exec/partitioned_aggregation_source_operator.cpp
index fd609d95eef..a8c4e7b0bcc 100644
--- a/be/src/pipeline/exec/partitioned_aggregation_source_operator.cpp
+++ b/be/src/pipeline/exec/partitioned_aggregation_source_operator.cpp
@@ -23,6 +23,7 @@
 #include "common/exception.h"
 #include "common/status.h"
 #include "pipeline/exec/operator.h"
+#include "pipeline/exec/spill_utils.h"
 #include "runtime/fragment_mgr.h"
 #include "util/runtime_profile.h"
 #include "vec/spill/spill_stream_manager.h"
@@ -204,18 +205,11 @@ Status 
PartitionedAggLocalState::initiate_merge_spill_partition_agg_data(Runtime
 
RETURN_IF_ERROR(Base::_shar

(doris) 13/44: [opt](hive) save hive table schema in transaction (#37008)

2024-07-02 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit fe21f7428133cee6d88831be79097072c2359a87
Author: wuwenchi 
AuthorDate: Mon Jul 1 11:56:58 2024 +0800

[opt](hive) save hive table schema in transaction (#37008)

Save the table schema, reduce the number of HMS calls, and improve write
performance.
---
 .../org/apache/doris/datasource/hive/HMSTransaction.java  | 15 +++
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSTransaction.java 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSTransaction.java
index d883b9dc786..bd0d2315c1e 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSTransaction.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSTransaction.java
@@ -48,6 +48,7 @@ import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 import io.airlift.concurrent.MoreFutures;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.api.Partition;
 import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
 import org.apache.hadoop.hive.metastore.api.Table;
@@ -88,6 +89,7 @@ public class HMSTransaction implements Transaction {
 private final Map> tableActions = 
new HashMap<>();
 private final Map, 
Action>>
 partitionActions = new HashMap<>();
+private final Map> tableColumns = new 
HashMap<>();
 
 private final Executor fileSystemExecutor;
 private HmsCommitter hmsCommitter;
@@ -123,7 +125,7 @@ public class HMSTransaction implements Transaction {
 }
 }
 
-private Set uncompletedMpuPendingUploads = 
new HashSet<>();
+private final Set 
uncompletedMpuPendingUploads = new HashSet<>();
 
 public HMSTransaction(HiveMetadataOps hiveOps, FileSystemProvider 
fileSystemProvider, Executor fileSystemExecutor) {
 this.hiveOps = hiveOps;
@@ -241,7 +243,7 @@ public class HMSTransaction implements Transaction {
 Maps.newHashMap(),
 sd.getOutputFormat(),
 sd.getSerdeInfo().getSerializationLib(),
-hiveOps.getClient().getSchema(dbName, tbName)
+getTableColumns(dbName, tbName)
 );
 if (updateMode == TUpdateMode.OVERWRITE) {
 dropPartition(dbName, tbName, 
hivePartition.getPartitionValues(), true);
@@ -396,7 +398,7 @@ public class HMSTransaction implements Transaction {
 partition.getParameters(),
 sd.getOutputFormat(),
 sd.getSerdeInfo().getSerializationLib(),
-hiveOps.getClient().getSchema(dbName, tbName)
+getTableColumns(dbName, tbName)
 );
 
 partitionActionsForTable.put(
@@ -913,6 +915,11 @@ public class HMSTransaction implements Transaction {
 throw new RuntimeException("Not Found table: " + databaseName + "." + 
tableName);
 }
 
+public synchronized List getTableColumns(String databaseName, 
String tableName) {
+return tableColumns.computeIfAbsent(new 
DatabaseTableName(databaseName, tableName),
+key -> hiveOps.getClient().getSchema(dbName, tbName));
+}
+
 public synchronized void finishChangingExistingTable(
 ActionType actionType,
 String databaseName,
@@ -1276,7 +1283,7 @@ public class HMSTransaction implements Transaction {
 Maps.newHashMap(),
 sd.getOutputFormat(),
 sd.getSerdeInfo().getSerializationLib(),
-hiveOps.getClient().getSchema(dbName, tbName)
+getTableColumns(dbName, tbName)
 );
 
 HivePartitionWithStatistics partitionWithStats =


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) 12/44: [feature](function) support ip functions named ipv4_to_ipv6 and cut_ipv6 (#36883)

2024-07-02 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 6248c68f3cf6141cee98ad69f3f90842573c2bcc
Author: yangshijie 
AuthorDate: Mon Jul 1 11:34:35 2024 +0800

[feature](function) support ip functions named ipv4_to_ipv6 and cut_ipv6 
(#36883)

- ipv4_to_ipv6: accept an IPv4 type address and return the converted
IPv6 type address.
- cut_ipv6: accept an IPv6 type address and return a string containing
the address of the specified number of bytes removed in text format.
- improve ut framework to support parsing of IPv4 and IPv6 types.
- add some uts and regression tests for these two functions.
---
 be/src/vec/functions/function_ip.cpp   |   6 +
 be/src/vec/functions/function_ip.h | 137 +
 be/test/vec/function/function_ip_test.cpp  |  75 +++
 be/test/vec/function/function_test_util.cpp|  16 +++
 be/test/vec/function/function_test_util.h  |   3 +
 .../doris/catalog/BuiltinScalarFunctions.java  |  16 ++-
 .../expressions/functions/scalar/CutIpv6.java  |  67 ++
 .../expressions/functions/scalar/Ipv4ToIpv6.java   |  65 ++
 .../expressions/visitor/ScalarFunctionVisitor.java |  58 +
 gensrc/script/doris_builtins_functions.py  |   2 +
 .../ip_functions/test_cut_ipv6_function.out|  19 +++
 .../ip_functions/test_ipv4_to_ipv6_function.out|  14 +++
 .../ip_functions/test_cut_ipv6_function.groovy |  56 +
 .../ip_functions/test_ipv4_to_ipv6_function.groovy |  51 
 14 files changed, 555 insertions(+), 30 deletions(-)

diff --git a/be/src/vec/functions/function_ip.cpp 
b/be/src/vec/functions/function_ip.cpp
index dbb715ee7f6..ae5a2399981 100644
--- a/be/src/vec/functions/function_ip.cpp
+++ b/be/src/vec/functions/function_ip.cpp
@@ -58,5 +58,11 @@ void register_function_ip(SimpleFunctionFactory& factory) {
 factory.register_function>();
 factory.register_function>();
 factory.register_function>();
+
+/// Convert between IPv4 and IPv6 part
+factory.register_function();
+
+/// Cut IPv6 part
+factory.register_function();
 }
 } // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/src/vec/functions/function_ip.h 
b/be/src/vec/functions/function_ip.h
index 69c7a20e896..3b02d779246 100644
--- a/be/src/vec/functions/function_ip.h
+++ b/be/src/vec/functions/function_ip.h
@@ -25,13 +25,16 @@
 #include 
 
 #include "vec/columns/column.h"
+#include "vec/columns/column_const.h"
 #include "vec/columns/column_nullable.h"
 #include "vec/columns/column_string.h"
 #include "vec/columns/column_struct.h"
 #include "vec/columns/column_vector.h"
 #include "vec/columns/columns_number.h"
+#include "vec/common/assert_cast.h"
 #include "vec/common/format_ip.h"
 #include "vec/common/ipv6_to_binary.h"
+#include "vec/common/unaligned.h"
 #include "vec/core/column_with_type_and_name.h"
 #include "vec/core/types.h"
 #include "vec/data_types/data_type.h"
@@ -1159,4 +1162,138 @@ public:
 }
 };
 
+class FunctionIPv4ToIPv6 : public IFunction {
+public:
+static constexpr auto name = "ipv4_to_ipv6";
+static FunctionPtr create() { return 
std::make_shared(); }
+
+String get_name() const override { return name; }
+
+size_t get_number_of_arguments() const override { return 1; }
+
+DataTypePtr get_return_type_impl(const DataTypes& arguments) const 
override {
+return std::make_shared();
+}
+
+Status execute_impl(FunctionContext* context, Block& block, const 
ColumnNumbers& arguments,
+size_t result, size_t input_rows_count) const override 
{
+const auto& ipv4_column_with_type_and_name = 
block.get_by_position(arguments[0]);
+const auto& [ipv4_column, ipv4_const] =
+unpack_if_const(ipv4_column_with_type_and_name.column);
+const auto* ipv4_addr_column = assert_cast(ipv4_column.get());
+const auto& ipv4_column_data = ipv4_addr_column->get_data();
+auto col_res = ColumnIPv6::create(input_rows_count, 0);
+auto& col_res_data = col_res->get_data();
+
+for (size_t i = 0; i < input_rows_count; ++i) {
+auto ipv4_idx = index_check_const(i, ipv4_const);
+map_ipv4_to_ipv6(ipv4_column_data[ipv4_idx],
+ reinterpret_cast(&col_res_data[i]));
+}
+
+block.replace_by_position(result, std::move(col_res));
+return Status::OK();
+}
+
+private:
+static void map_ipv4_to_ipv6(IPv4 ipv4, UInt8* buf) {
+unaligned_store(buf, 0xULL | 
static_cast(ipv4));
+unaligned_store(buf + 8, 0);
+}
+};
+
+class FunctionCutIPv6 : public IFunction {
+public:
+static constexpr auto name = "cut_ipv6";
+static FunctionPtr create() { return std::make_shared(); }
+
+String get_name() const ov

(doris) 14/44: [opt](function)avoid virtual function calls in geo functions (#37003)

2024-07-02 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 9d61bebee694b8d18908ffbc2f6596865f8a533b
Author: Mryange <59914473+mrya...@users.noreply.github.com>
AuthorDate: Mon Jul 1 12:53:32 2024 +0800

[opt](function)avoid virtual function calls in geo functions (#37003)
---
 be/src/vec/functions/functions_geo.cpp | 285 +
 be/src/vec/functions/functions_geo.h   |   5 +-
 2 files changed, 189 insertions(+), 101 deletions(-)

diff --git a/be/src/vec/functions/functions_geo.cpp 
b/be/src/vec/functions/functions_geo.cpp
index 036033db2a2..b389bc1636e 100644
--- a/be/src/vec/functions/functions_geo.cpp
+++ b/be/src/vec/functions/functions_geo.cpp
@@ -26,6 +26,7 @@
 #include "geo/geo_common.h"
 #include "geo/geo_types.h"
 #include "vec/columns/column.h"
+#include "vec/columns/column_nullable.h"
 #include "vec/columns/columns_number.h"
 #include "vec/common/string_ref.h"
 #include "vec/core/block.h"
@@ -33,6 +34,7 @@
 #include "vec/core/field.h"
 #include "vec/data_types/data_type_nullable.h"
 #include "vec/data_types/data_type_number.h"
+#include "vec/data_types/data_type_string.h"
 #include "vec/functions/simple_function_factory.h"
 
 namespace doris::vectorized {
@@ -41,6 +43,7 @@ struct StPoint {
 static constexpr auto NEED_CONTEXT = false;
 static constexpr auto NAME = "st_point";
 static const size_t NUM_ARGS = 2;
+using Type = DataTypeString;
 static Status execute(Block& block, const ColumnNumbers& arguments, size_t 
result) {
 DCHECK_EQ(arguments.size(), 2);
 auto return_type = block.get_data_type(result);
@@ -52,26 +55,29 @@ struct StPoint {
 
 const auto size = std::max(left_column->size(), right_column->size());
 
-MutableColumnPtr res = return_type->create_column();
-
+auto res = ColumnString::create();
+auto null_map = ColumnUInt8::create(size, 0);
+auto& null_map_data = null_map->get_data();
 GeoPoint point;
 std::string buf;
 if (left_const) {
-const_vector(left_column, right_column, res, size, point, buf);
+const_vector(left_column, right_column, res, null_map_data, size, 
point, buf);
 } else if (right_const) {
-vector_const(left_column, right_column, res, size, point, buf);
+vector_const(left_column, right_column, res, null_map_data, size, 
point, buf);
 } else {
-vector_vector(left_column, right_column, res, size, point, buf);
+vector_vector(left_column, right_column, res, null_map_data, size, 
point, buf);
 }
 
-block.replace_by_position(result, std::move(res));
+block.replace_by_position(result,
+  ColumnNullable::create(std::move(res), 
std::move(null_map)));
 return Status::OK();
 }
 
-static void loop_do(GeoParseStatus& cur_res, MutableColumnPtr& res, 
GeoPoint& point,
-std::string& buf) {
+static void loop_do(GeoParseStatus& cur_res, ColumnString::MutablePtr& 
res, NullMap& null_map,
+int row, GeoPoint& point, std::string& buf) {
 if (cur_res != GEO_PARSE_OK) {
-res->insert_data(nullptr, 0);
+null_map[row] = 1;
+res->insert_default();
 return;
 }
 
@@ -81,32 +87,32 @@ struct StPoint {
 }
 
 static void const_vector(const ColumnPtr& left_column, const ColumnPtr& 
right_column,
- MutableColumnPtr& res, const size_t size, 
GeoPoint& point,
- std::string& buf) {
+ ColumnString::MutablePtr& res, NullMap& null_map, 
const size_t size,
+ GeoPoint& point, std::string& buf) {
 double x = left_column->operator[](0).get();
 for (int row = 0; row < size; ++row) {
 auto cur_res = point.from_coord(x, 
right_column->operator[](row).get());
-loop_do(cur_res, res, point, buf);
+loop_do(cur_res, res, null_map, row, point, buf);
 }
 }
 
 static void vector_const(const ColumnPtr& left_column, const ColumnPtr& 
right_column,
- MutableColumnPtr& res, const size_t size, 
GeoPoint& point,
- std::string& buf) {
+ ColumnString::MutablePtr& res, NullMap& null_map, 
const size_t size,
+ GeoPoint& point, std::string& buf) {
 double y = right_column->operator[](0).get();
 for (int row = 0; row < size; ++row) {
 auto cur_res = 
point.from_coord(right_column->operator[](row).get(), y);
-loop_do(cur_res, res, point, buf);
+loop_do(cur_res, res, null_map, row, point, buf);
 }
 }
 
 static void vector_vector(const ColumnPtr& left_c

(doris) 21/44: [Fix](regression) Fix p0 case `test_unique_table_auto_inc_concurrent` (#37048)

2024-07-02 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit c97839a240f44a2e5f72ce2f3c35b865d922c217
Author: bobhan1 
AuthorDate: Mon Jul 1 15:11:53 2024 +0800

[Fix](regression) Fix p0 case `test_unique_table_auto_inc_concurrent` 
(#37048)

since we can't wait all transaction publish finish for insert statement,
we just check the uniqueness of the generated auto-increment values
---
 .../data/data_model_p0/unique/test_unique_auto_inc_concurrent.out  | 3 ---
 .../suites/data_model_p0/unique/test_unique_auto_inc_concurrent.groovy | 3 ++-
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git 
a/regression-test/data/data_model_p0/unique/test_unique_auto_inc_concurrent.out 
b/regression-test/data/data_model_p0/unique/test_unique_auto_inc_concurrent.out
index 03819c9a717..c803ca86189 100644
--- 
a/regression-test/data/data_model_p0/unique/test_unique_auto_inc_concurrent.out
+++ 
b/regression-test/data/data_model_p0/unique/test_unique_auto_inc_concurrent.out
@@ -1,10 +1,7 @@
 -- This file is automatically generated. You should know what you did if you 
want to edit this
 -- !sql --
-150150
 
 -- !sql --
-300300
 
 -- !sql --
-450450
 
diff --git 
a/regression-test/suites/data_model_p0/unique/test_unique_auto_inc_concurrent.groovy
 
b/regression-test/suites/data_model_p0/unique/test_unique_auto_inc_concurrent.groovy
index bf6d584b2af..4793fb52343 100644
--- 
a/regression-test/suites/data_model_p0/unique/test_unique_auto_inc_concurrent.groovy
+++ 
b/regression-test/suites/data_model_p0/unique/test_unique_auto_inc_concurrent.groovy
@@ -46,8 +46,9 @@ suite("test_unique_table_auto_inc_concurrent") {
 }
 
 threads.each { thread -> thread.join() }
+sql "sync"
 
-qt_sql "select count(id), count(distinct id) from ${table1};"
+qt_sql "select id, count(*) from ${table1} group by id having count(*) 
> 1;"
 }
 
 run_test(15, 1, 10)


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) 23/44: [opt](catalog) add some profile for parquet reader and change meta cache config (#37040)

2024-07-02 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 97f2da58331ced6ab51f19e4353ff53f0e3e8039
Author: Mingyu Chen 
AuthorDate: Mon Jul 1 15:32:31 2024 +0800

[opt](catalog) add some profile for parquet reader and change meta cache 
config (#37040)

## Proposed changes

This PR mainly changes:
1. add new BE config `enable_parquet_page_index`
Default is true, if set to false, the parquet reader will not use page
index to filter data.
This is only for debug purpose, in case sometimes the page index
filter wrong data.

2. Add new FE config `max_hive_partition_table_cache_num`
Separator from `max_hive_table_cache_num`. This config is used to set
the max cache number
of `partitionValuesCache`, which is for partition values list of
partitioned hive table.

3. Reduce the default expire time of meta cache from 86400 to 24400.
The hive table partitioned by day, if the expire time of cache is 86400,
it may always fetching the stale cache value after a day pass by.
---
 be/src/common/config.cpp   |  5 +
 be/src/common/config.h |  2 ++
 be/src/vec/exec/format/parquet/vparquet_reader.cpp | 23 +-
 be/src/vec/exec/format/parquet/vparquet_reader.h   |  4 
 .../main/java/org/apache/doris/common/Config.java  | 13 
 .../doris/datasource/hive/HiveMetaStoreCache.java  |  6 +++---
 6 files changed, 41 insertions(+), 12 deletions(-)

diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index c2274fd169b..7166b39dda8 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -1313,6 +1313,11 @@ DEFINE_Bool(enable_file_logger, "true");
 // The minimum row group size when exporting Parquet files. default 128MB
 DEFINE_Int64(min_row_group_size, "134217728");
 
+// If set to false, the parquet reader will not use page index to filter data.
+// This is only for debug purpose, in case sometimes the page index
+// filter wrong data.
+DEFINE_mBool(enable_parquet_page_index, "true");
+
 // clang-format off
 #ifdef BE_TEST
 // test s3
diff --git a/be/src/common/config.h b/be/src/common/config.h
index 9920b65fe52..dbb5b716b78 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -1399,6 +1399,8 @@ DECLARE_Bool(enable_file_logger);
 // The minimum row group size when exporting Parquet files.
 DECLARE_Int64(min_row_group_size);
 
+DECLARE_mBool(enable_parquet_page_index);
+
 #ifdef BE_TEST
 // test s3
 DECLARE_String(test_s3_resource);
diff --git a/be/src/vec/exec/format/parquet/vparquet_reader.cpp 
b/be/src/vec/exec/format/parquet/vparquet_reader.cpp
index 629f272ef72..25421d80b0e 100644
--- a/be/src/vec/exec/format/parquet/vparquet_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_reader.cpp
@@ -148,6 +148,10 @@ void ParquetReader::_init_profile() {
 ADD_CHILD_COUNTER_WITH_LEVEL(_profile, "FileNum", TUnit::UNIT, 
parquet_profile, 1);
 _parquet_profile.page_index_filter_time =
 ADD_CHILD_TIMER_WITH_LEVEL(_profile, "PageIndexFilterTime", 
parquet_profile, 1);
+_parquet_profile.read_page_index_time =
+ADD_CHILD_TIMER_WITH_LEVEL(_profile, "PageIndexReadTime", 
parquet_profile, 1);
+_parquet_profile.parse_page_index_time =
+ADD_CHILD_TIMER_WITH_LEVEL(_profile, "PageIndexParseTime", 
parquet_profile, 1);
 _parquet_profile.row_group_filter_time =
 ADD_CHILD_TIMER_WITH_LEVEL(_profile, "RowGroupFilterTime", 
parquet_profile, 1);
 
@@ -747,25 +751,32 @@ Status ParquetReader::_process_page_index(const 
tparquet::RowGroup& row_group,
 return Status::OK();
 }
 PageIndex page_index;
-if (!_has_page_index(row_group.columns, page_index)) {
+if (!config::enable_parquet_page_index || 
!_has_page_index(row_group.columns, page_index)) {
 read_whole_row_group();
 return Status::OK();
 }
 std::vector col_index_buff(page_index._column_index_size);
 size_t bytes_read = 0;
 Slice result(col_index_buff.data(), page_index._column_index_size);
-RETURN_IF_ERROR(
-_file_reader->read_at(page_index._column_index_start, result, 
&bytes_read, _io_ctx));
+{
+SCOPED_RAW_TIMER(&_statistics.read_page_index_time);
+RETURN_IF_ERROR(_file_reader->read_at(page_index._column_index_start, 
result, &bytes_read,
+  _io_ctx));
+}
 _column_statistics.read_bytes += bytes_read;
 auto& schema_desc = _file_metadata->schema();
 std::vector skipped_row_ranges;
 std::vector off_index_buff(page_index._offset_index_size);
 Slice res(off_index_buff.data(), page_index._offset_index_size);
-RETURN_IF_ERROR(
-_file_reader->read_at(page_index._offset_index_start, 

(doris) 25/44: [profile](fe)update format of min/hour/sec unit in profile (#37010)

2024-07-02 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit ee516656cf447188b51c8d245c26ecfef4e9b247
Author: minghong 
AuthorDate: Mon Jul 1 16:27:25 2024 +0800

[profile](fe)update format of min/hour/sec unit in profile (#37010)

## Proposed changes
it helps us to find the point in profile which operator consumes long
time

Issue Number: close #xxx


---
 .../src/main/java/org/apache/doris/common/util/DebugUtil.java | 6 +++---
 .../src/test/java/org/apache/doris/common/util/DebugUtilTest.java | 8 
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/common/util/DebugUtil.java 
b/fe/fe-core/src/main/java/org/apache/doris/common/util/DebugUtil.java
index 2a52420a96d..937c74cac66 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/util/DebugUtil.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/DebugUtil.java
@@ -71,17 +71,17 @@ public class DebugUtil {
 boolean hour = false;
 boolean minute = false;
 if (newValue >= HOUR) {
-builder.append(newValue / HOUR).append("h");
+builder.append(newValue / HOUR).append("hour");
 newValue %= HOUR;
 hour = true;
 }
 if (newValue >= MINUTE) {
-builder.append(newValue / MINUTE).append("m");
+builder.append(newValue / MINUTE).append("min");
 newValue %= MINUTE;
 minute = true;
 }
 if (!hour && newValue >= SECOND) {
-builder.append(newValue / SECOND).append("s");
+builder.append(newValue / SECOND).append("sec");
 newValue %= SECOND;
 }
 if (!hour && !minute) {
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/common/util/DebugUtilTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/common/util/DebugUtilTest.java
index 54a3f4c388b..aa599783f18 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/common/util/DebugUtilTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/common/util/DebugUtilTest.java
@@ -50,16 +50,16 @@ public class DebugUtilTest {
 @Test
 public void testGetPrettyStringMs() {
 // 6hour1min
-Assert.assertEquals(DebugUtil.getPrettyStringMs(21660222), "6h1m");
+Assert.assertEquals("6hour1min", 
DebugUtil.getPrettyStringMs(21660222));
 
 // 1min222ms
-Assert.assertEquals(DebugUtil.getPrettyStringMs(60222), "1m");
+Assert.assertEquals("1min", DebugUtil.getPrettyStringMs(60222));
 
 // 2s222ms
-Assert.assertEquals(DebugUtil.getPrettyStringMs(), "2s222ms");
+Assert.assertEquals("2sec222ms", DebugUtil.getPrettyStringMs());
 
 // 22ms
-Assert.assertEquals(DebugUtil.getPrettyStringMs(22), "22ms");
+Assert.assertEquals("22ms", DebugUtil.getPrettyStringMs(22));
 }
 
 @Test


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) 05/44: [Exec](agg) Fix agg limit result error (#37025)

2024-07-02 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 1ed52cdeb8cecc394d39609b93ad8ef3d9f2ba0f
Author: HappenLee 
AuthorDate: Mon Jul 1 09:49:04 2024 +0800

[Exec](agg) Fix agg limit result error (#37025)

Before merge #34853, should merge the pr firstly
---
 be/src/pipeline/dependency.cpp   | 10 ++
 be/src/pipeline/dependency.h |  3 ++-
 be/src/pipeline/exec/aggregation_sink_operator.cpp   |  4 +++-
 be/src/pipeline/exec/aggregation_source_operator.cpp |  8 +++-
 4 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/be/src/pipeline/dependency.cpp b/be/src/pipeline/dependency.cpp
index 68c00af409d..4938883062a 100644
--- a/be/src/pipeline/dependency.cpp
+++ b/be/src/pipeline/dependency.cpp
@@ -248,7 +248,8 @@ void AggSharedState::build_limit_heap(size_t 
hash_table_size) {
 limit_columns_min = limit_heap.top()._row_id;
 }
 
-bool AggSharedState::do_limit_filter(vectorized::Block* block, size_t 
num_rows) {
+bool AggSharedState::do_limit_filter(vectorized::Block* block, size_t num_rows,
+ const std::vector* key_locs) {
 if (num_rows) {
 cmp_res.resize(num_rows);
 need_computes.resize(num_rows);
@@ -257,9 +258,10 @@ bool AggSharedState::do_limit_filter(vectorized::Block* 
block, size_t num_rows)
 
 const auto key_size = null_directions.size();
 for (int i = 0; i < key_size; i++) {
-block->get_by_position(i).column->compare_internal(
-limit_columns_min, *limit_columns[i], null_directions[i], 
order_directions[i],
-cmp_res, need_computes.data());
+block->get_by_position(key_locs ? key_locs->operator[](i) : i)
+.column->compare_internal(limit_columns_min, 
*limit_columns[i],
+  null_directions[i], 
order_directions[i], cmp_res,
+  need_computes.data());
 }
 
 auto set_computes_arr = [](auto* __restrict res, auto* __restrict 
computes, int rows) {
diff --git a/be/src/pipeline/dependency.h b/be/src/pipeline/dependency.h
index 5214022db13..8adc24d3b4e 100644
--- a/be/src/pipeline/dependency.h
+++ b/be/src/pipeline/dependency.h
@@ -311,7 +311,8 @@ public:
 
 Status reset_hash_table();
 
-bool do_limit_filter(vectorized::Block* block, size_t num_rows);
+bool do_limit_filter(vectorized::Block* block, size_t num_rows,
+ const std::vector* key_locs = nullptr);
 void build_limit_heap(size_t hash_table_size);
 
 // We should call this function only at 1st phase.
diff --git a/be/src/pipeline/exec/aggregation_sink_operator.cpp 
b/be/src/pipeline/exec/aggregation_sink_operator.cpp
index fae987394b4..1dab1669dd5 100644
--- a/be/src/pipeline/exec/aggregation_sink_operator.cpp
+++ b/be/src/pipeline/exec/aggregation_sink_operator.cpp
@@ -329,6 +329,7 @@ Status 
AggSinkLocalState::_merge_with_serialized_key_helper(vectorized::Block* b
 if (limit) {
 need_do_agg = _emplace_into_hash_table_limit(_places.data(), 
block, key_locs,
  key_columns, rows);
+rows = block->rows();
 } else {
 _emplace_into_hash_table(_places.data(), key_columns, rows);
 }
@@ -589,7 +590,8 @@ bool 
AggSinkLocalState::_emplace_into_hash_table_limit(vectorized::AggregateData
 bool need_filter = false;
 {
 SCOPED_TIMER(_hash_table_limit_compute_timer);
-need_filter = 
_shared_state->do_limit_filter(block, num_rows);
+need_filter =
+_shared_state->do_limit_filter(block, 
num_rows, &key_locs);
 }
 
 auto& need_computes = _shared_state->need_computes;
diff --git a/be/src/pipeline/exec/aggregation_source_operator.cpp 
b/be/src/pipeline/exec/aggregation_source_operator.cpp
index 5b371877f36..1b7a151e2af 100644
--- a/be/src/pipeline/exec/aggregation_source_operator.cpp
+++ b/be/src/pipeline/exec/aggregation_source_operator.cpp
@@ -452,8 +452,14 @@ void AggLocalState::do_agg_limit(vectorized::Block* block, 
bool* eos) {
 if (_shared_state->reach_limit) {
 if (_shared_state->do_sort_limit && 
_shared_state->do_limit_filter(block, block->rows())) {
 vectorized::Block::filter_block_internal(block, 
_shared_state->need_computes);
+if (auto rows = block->rows()) {
+_num_rows_returned += rows;
+COUNTER_UPDATE(_blocks_returned_counter, 1);
+COUNTER_SET(_rows_returned_counter, _num_rows_returned);
+}
+} else {
+reached_limit(bl

(doris) 09/44: [fix](local shuffle) Fix wrong partitioned expr in local exchanger (#37017)

2024-07-02 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit caac4b7cdb78e738956c545fac35cde77b32ddcc
Author: Gabriel 
AuthorDate: Mon Jul 1 10:02:03 2024 +0800

[fix](local shuffle) Fix wrong partitioned expr in local exchanger (#37017)

Now partitioned expressions in HASH-SHUFFLE local exchanger may be wrong. 
This PR fix it.
---
 be/src/pipeline/exec/hashjoin_build_sink.h | 4 ++--
 be/src/pipeline/exec/hashjoin_probe_operator.h | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/be/src/pipeline/exec/hashjoin_build_sink.h 
b/be/src/pipeline/exec/hashjoin_build_sink.h
index d785c20ee7f..fad03f0a78d 100644
--- a/be/src/pipeline/exec/hashjoin_build_sink.h
+++ b/be/src/pipeline/exec/hashjoin_build_sink.h
@@ -148,8 +148,8 @@ public:
 return _join_distribution == TJoinDistributionType::PARTITIONED;
 }
 bool require_data_distribution() const override {
-return _join_distribution == TJoinDistributionType::COLOCATE ||
-   _join_distribution == TJoinDistributionType::BUCKET_SHUFFLE;
+return _join_distribution != TJoinDistributionType::BROADCAST &&
+   _join_distribution != TJoinDistributionType::NONE;
 }
 
 private:
diff --git a/be/src/pipeline/exec/hashjoin_probe_operator.h 
b/be/src/pipeline/exec/hashjoin_probe_operator.h
index b8bc892ef31..0b4298f55ff 100644
--- a/be/src/pipeline/exec/hashjoin_probe_operator.h
+++ b/be/src/pipeline/exec/hashjoin_probe_operator.h
@@ -159,8 +159,8 @@ public:
 return _join_distribution == TJoinDistributionType::PARTITIONED;
 }
 bool require_data_distribution() const override {
-return _join_distribution == TJoinDistributionType::COLOCATE ||
-   _join_distribution == TJoinDistributionType::BUCKET_SHUFFLE;
+return _join_distribution != TJoinDistributionType::BROADCAST &&
+   _join_distribution != TJoinDistributionType::NONE;
 }
 
 private:


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) 27/44: [feat](nereids) support explain delete from clause (#36782)

2024-07-02 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit fc78d1fd2885d7b68f0c6953a34b7b5261b0f9f9
Author: minghong 
AuthorDate: Mon Jul 1 16:28:44 2024 +0800

[feat](nereids) support explain delete from clause (#36782)

## Proposed changes
support explain like:
explain delete from T where A=1

Issue Number: close #xxx


---
 .../doris/nereids/parser/LogicalPlanBuilder.java   |  16 ++-
 .../trees/plans/commands/DeleteFromCommand.java|  92 +++-
 .../plans/commands/DeleteFromUsingCommand.java |  88 +++
 .../suites/nereids_p0/explain/explain_dml.groovy   | 122 +
 4 files changed, 236 insertions(+), 82 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
index b6f679da5e4..e53177a0358 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
@@ -941,9 +941,12 @@ public class LogicalPlanBuilder extends 
DorisParserBaseVisitor {
 if (ctx.tableAlias().strictIdentifier() != null) {
 tableAlias = ctx.tableAlias().getText();
 }
-if (ctx.USING() == null && ctx.cte() == null && ctx.explain() == null) 
{
+
+Command deleteCommand;
+if (ctx.USING() == null && ctx.cte() == null) {
 query = withFilter(query, Optional.ofNullable(ctx.whereClause()));
-return new DeleteFromCommand(tableName, tableAlias, 
partitionSpec.first, partitionSpec.second, query);
+deleteCommand = new DeleteFromCommand(tableName, tableAlias, 
partitionSpec.first,
+partitionSpec.second, query);
 } else {
 // convert to insert into select
 query = withRelations(query, ctx.relations().relation());
@@ -952,8 +955,13 @@ public class LogicalPlanBuilder extends 
DorisParserBaseVisitor {
 if (ctx.cte() != null) {
 cte = Optional.ofNullable(withCte(query, ctx.cte()));
 }
-return withExplain(new DeleteFromUsingCommand(tableName, 
tableAlias,
-partitionSpec.first, partitionSpec.second, query, cte), 
ctx.explain());
+deleteCommand = new DeleteFromUsingCommand(tableName, tableAlias,
+partitionSpec.first, partitionSpec.second, query, cte);
+}
+if (ctx.explain() != null) {
+return withExplain(deleteCommand, ctx.explain());
+} else {
+return deleteCommand;
 }
 }
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/DeleteFromCommand.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/DeleteFromCommand.java
index 90d159b8274..cbfa94be050 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/DeleteFromCommand.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/DeleteFromCommand.java
@@ -28,11 +28,15 @@ import org.apache.doris.catalog.Env;
 import org.apache.doris.catalog.KeysType;
 import org.apache.doris.catalog.MaterializedIndexMeta;
 import org.apache.doris.catalog.OlapTable;
+import org.apache.doris.catalog.TableIf;
 import org.apache.doris.common.Config;
 import org.apache.doris.common.ErrorCode;
 import org.apache.doris.mysql.privilege.PrivPredicate;
 import org.apache.doris.nereids.NereidsPlanner;
+import org.apache.doris.nereids.analyzer.UnboundAlias;
 import org.apache.doris.nereids.analyzer.UnboundRelation;
+import org.apache.doris.nereids.analyzer.UnboundSlot;
+import org.apache.doris.nereids.analyzer.UnboundTableSinkCreator;
 import org.apache.doris.nereids.exceptions.AnalysisException;
 import org.apache.doris.nereids.glue.LogicalPlanAdapter;
 import org.apache.doris.nereids.rules.RuleType;
@@ -41,12 +45,17 @@ import 
org.apache.doris.nereids.trees.expressions.ComparisonPredicate;
 import org.apache.doris.nereids.trees.expressions.Expression;
 import org.apache.doris.nereids.trees.expressions.InPredicate;
 import org.apache.doris.nereids.trees.expressions.IsNull;
+import org.apache.doris.nereids.trees.expressions.NamedExpression;
 import org.apache.doris.nereids.trees.expressions.Not;
 import org.apache.doris.nereids.trees.expressions.SlotReference;
 import org.apache.doris.nereids.trees.expressions.literal.Literal;
+import org.apache.doris.nereids.trees.expressions.literal.TinyIntLiteral;
+import org.apache.doris.nereids.trees.plans.Explainable;
 import org.apache.doris.nereids.trees.plans.Plan;
 import org.apache.doris.nereids.trees.plans.PlanType;
+import org.apache.doris.nereids.trees.plans.commands.info.DMLCommandType;
 import org.apache.doris.nereids.trees.pla

(doris) 26/44: [fix](nereids) ColStatsMeta.partitionUpdateRows npe (#37044)

2024-07-02 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 9cd6667e19bf7d0292f1d1b8d54bc0159293bd4d
Author: minghong 
AuthorDate: Mon Jul 1 16:28:10 2024 +0800

[fix](nereids) ColStatsMeta.partitionUpdateRows npe (#37044)

when fe upgraded from old version, colMeta object may be deserialized
from json,and colMeta.partitionUpdateRows could be null

## Proposed changes

Issue Number: close #xxx


---
 .../src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
index 6648e9888a9..f83521ae031 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
@@ -312,7 +312,9 @@ public class StatsCalculator extends 
DefaultPlanVisitor {
 if (tableMeta != null) {
 ColStatsMeta colMeta = tableMeta.findColumnStatsMeta(
 
olapScan.getTable().getIndexNameById(olapScan.getSelectedIndexId()), 
slot.getName());
-if (colMeta != null) {
+if (colMeta != null && colMeta.partitionUpdateRows != null) {
+// when fe upgraded from old version, colMeta object may be 
deserialized from json,
+// and colMeta.partitionUpdateRows could be null
 if (olapScan.getSelectedPartitionIds().isEmpty()) {
 deltaRowCount = tableMeta.updatedRows.get() - 
colMeta.updatedRows;
 } else {


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) 07/44: [chore](Regression) Remove useless get provider code in regression framework (#37000)

2024-07-02 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit f0c10fa57a8e4c37d3798bf518d5f49d6aa6d28b
Author: AlexYue 
AuthorDate: Mon Jul 1 09:59:48 2024 +0800

[chore](Regression) Remove useless get provider code in regression 
framework (#37000)

The following get provider and check logic code is useless.
---
 .../main/groovy/org/apache/doris/regression/Config.groovy | 13 +
 .../groovy/org/apache/doris/regression/suite/Suite.groovy | 15 ---
 2 files changed, 1 insertion(+), 27 deletions(-)

diff --git 
a/regression-test/framework/src/main/groovy/org/apache/doris/regression/Config.groovy
 
b/regression-test/framework/src/main/groovy/org/apache/doris/regression/Config.groovy
index 008962ee544..c6711184c01 100644
--- 
a/regression-test/framework/src/main/groovy/org/apache/doris/regression/Config.groovy
+++ 
b/regression-test/framework/src/main/groovy/org/apache/doris/regression/Config.groovy
@@ -564,16 +564,6 @@ class Config {
 return config
 }
 
-static String getProvider(String endpoint) {
-def providers = ["cos", "oss", "s3", "obs", "bos"]
-for (final def provider in providers) {
-if (endpoint.containsIgnoreCase(provider)) {
-return provider
-}
-}
-return ""
-}
-
 static void checkCloudSmokeEnv(Properties properties) {
 // external stage obj info
 String s3Endpoint = properties.getOrDefault("s3Endpoint", "")
@@ -589,8 +579,7 @@ class Config {
 s3EndpointConf:s3Endpoint,
 s3BucketConf:s3BucketName,
 s3AKConf:s3AK,
-s3SKConf:s3SK,
-s3ProviderConf:getProvider(s3Endpoint)
+s3SKConf:s3SK
 ]
 for (final def item in items) {
 if (item.value == null || item.value.isEmpty()) {
diff --git 
a/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy
 
b/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy
index b3a2e958ff0..3397ab4ccfc 100644
--- 
a/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy
+++ 
b/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy
@@ -894,21 +894,6 @@ class Suite implements GroovyInterceptable {
 return;
 }
 
-String getProvider() {
-String s3Endpoint = context.config.otherConfigs.get("s3Endpoint")
-return getProvider(s3Endpoint)
-}
-
-String getProvider(String endpoint) {
-def providers = ["cos", "oss", "s3", "obs", "bos"]
-for (final def provider in providers) {
-if (endpoint.containsIgnoreCase(provider)) {
-return provider
-}
-}
-return ""
-}
-
 int getTotalLine(String filePath) {
 def file = new File(filePath)
 int lines = 0;


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) 08/44: [regression-test](connector) Add a case for the response of streamload that the connector depends (#36864)

2024-07-02 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 145afabedc81f4029c7ae35d3b71016956cf87c6
Author: wudi <676366...@qq.com>
AuthorDate: Mon Jul 1 10:01:41 2024 +0800

[regression-test](connector) Add a case for the response of streamload that 
the connector depends (#36864)
---
 .../data/flink_connector_p0/test_response.csv  |   2 +
 .../flink_connector_response.groovy| 186 +
 2 files changed, 188 insertions(+)

diff --git a/regression-test/data/flink_connector_p0/test_response.csv 
b/regression-test/data/flink_connector_p0/test_response.csv
new file mode 100644
index 000..b9fc6ccd99c
--- /dev/null
+++ b/regression-test/data/flink_connector_p0/test_response.csv
@@ -0,0 +1,2 @@
+1,zhangsan
+2,lisi
diff --git 
a/regression-test/suites/flink_connector_p0/flink_connector_response.groovy 
b/regression-test/suites/flink_connector_p0/flink_connector_response.groovy
new file mode 100644
index 000..c5aa754a52a
--- /dev/null
+++ b/regression-test/suites/flink_connector_p0/flink_connector_response.groovy
@@ -0,0 +1,186 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License./
+
+import java.util.regex.Pattern
+
+/**
+ * Flink connector depends on these responses
+ */
+suite("flink_connector_response") {
+
+def LABEL_EXIST_PATTERN =
+Pattern.compile("Label \\[(.*)\\] has already been used, relate to 
txn \\[(\\d+)\\]");
+def COMMITTED_PATTERN =
+Pattern.compile(
+"transaction \\[(\\d+)\\] is already 
\\b(COMMITTED|committed|VISIBLE|visible)\\b, not pre-committed.");
+def ABORTTED_PATTERN =
+Pattern.compile(
+"transaction \\[(\\d+)\\] is already|transaction 
\\[(\\d+)\\] not found");
+
+def thisDb = sql """select database()""";
+thisDb = thisDb[0][0];
+logger.info("current database is ${thisDb}");
+
+def tableName = "test_response"
+sql """DROP TABLE IF EXISTS ${tableName}"""
+
+sql """
+CREATE TABLE `${tableName}` (
+`id` int,
+`name` string
+)
+DUPLICATE KEY(`id`)
+DISTRIBUTED BY HASH(`id`) BUCKETS 1
+PROPERTIES (
+"replication_num" = "1",
+"light_schema_change" = "true"
+);
+""";
+
+def execute_stream_load_cmd = {label ->
+def filePath = 
"${context.config.dataPath}/flink_connector_p0/test_response.csv"
+StringBuilder strBuilder = new StringBuilder()
+strBuilder.append("""curl --location-trusted -u """ + 
context.config.feHttpUser + ":" + context.config.feHttpPassword)
+strBuilder.append(""" -H two_phase_commit:true """)
+strBuilder.append(""" -H column_separator:, """)
+strBuilder.append(""" -H expect:100-continue """)
+strBuilder.append(""" -H label:""" + label)
+strBuilder.append(""" -T """ + filePath)
+strBuilder.append(""" http://"""; + context.config.feHttpAddress + 
"""/api/${thisDb}/${tableName}/_stream_load""")
+
+String command = strBuilder.toString()
+logger.info("streamload command=" + command)
+def process = command.toString().execute()
+process.waitFor()
+def out = process.getText()
+println(out)
+out
+}
+
+def execute_commit_cmd = {txnId ->
+StringBuilder strBuilder = new StringBuilder()
+strBuilder.append("""curl -X PUT --location-trusted -u """ + 
context.config.feHttpUser + ":" + context.config.feHttpPassword)
+strBuilder.append(""" -H txn_id:""" + txnId)
+strBuilder.append(""" -H txn_operation:commit""")
+strBuilder.append(""" http://"""; + context.config.feHttpAddress + 
"""/api/${thisDb}/${tableName}/_stream_load_2pc""")
+
+String command = strBuilder.toString()
+logger.info("streamload command=" + command)
+def processCommit = command.toString().execute()
+processCommit.waitFor()
+def outCommit = processCommit.getText()
+println(outCommit)
+outCommit
+}
+
+def execute_abort_cmd = {txnId ->
+StringBuilder strBuilder = new StringBuilder()
+strBuilde

(doris) 38/44: [fix](parquet) prevent parquet page reader print much warning logs (#37011)

2024-07-02 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 2f8298e23be8e58d106d599e171f3792fe09e08a
Author: Ashin Gau 
AuthorDate: Mon Jul 1 23:02:41 2024 +0800

[fix](parquet) prevent parquet page reader print much warning logs (#37011)

Prevent parquet page reader print much warning logs.
```
Couldn't deserialize thrift msg:
```
---
 be/src/util/thrift_util.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/be/src/util/thrift_util.h b/be/src/util/thrift_util.h
index aff3a9ab101..9f4792ff64b 100644
--- a/be/src/util/thrift_util.h
+++ b/be/src/util/thrift_util.h
@@ -147,7 +147,7 @@ Status deserialize_thrift_msg(const uint8_t* buf, uint32_t* 
len, bool compact,
 try {
 deserialized_msg->read(tproto.get());
 } catch (std::exception& e) {
-return Status::InternalError("Couldn't deserialize thrift msg:\n{}", 
e.what());
+return Status::InternalError("Couldn't deserialize thrift 
msg:\n{}", e.what());
 } catch (...) {
 // TODO: Find the right exception for 0 bytes
 return Status::InternalError("Unknown exception");


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) 28/44: [fix](map)fix upgrade behavior from 1.2 version (#36937)

2024-07-02 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit a4001b3a92cd8b3474511014710d91964ebd0f9d
Author: amory 
AuthorDate: Mon Jul 1 16:58:41 2024 +0800

[fix](map)fix upgrade behavior from 1.2 version (#36937)

fix core dump when upgrading from 1.2.x to 2.0.x with map datatype column
---
 be/src/olap/rowset/segment_v2/column_reader.cpp | 1 +
 be/src/olap/rowset/segment_v2/column_writer.cpp | 7 ++-
 be/src/olap/tablet_schema.cpp   | 6 --
 3 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp 
b/be/src/olap/rowset/segment_v2/column_reader.cpp
index 392917e0d83..d0f2830712d 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/column_reader.cpp
@@ -128,6 +128,7 @@ Status ColumnReader::create_map(const ColumnReaderOptions& 
opts, const ColumnMet
 const io::FileReaderSPtr& file_reader,
 std::unique_ptr* reader) {
 // map reader now has 3 sub readers for key, value, offsets(scalar), 
null(scala)
+DCHECK(meta.children_columns_size() == 3 || meta.children_columns_size() 
== 4);
 std::unique_ptr key_reader;
 RETURN_IF_ERROR(ColumnReader::create(opts, meta.children_columns(0),
  meta.children_columns(0).num_rows(), 
file_reader,
diff --git a/be/src/olap/rowset/segment_v2/column_writer.cpp 
b/be/src/olap/rowset/segment_v2/column_writer.cpp
index 016a932b276..798cbe08261 100644
--- a/be/src/olap/rowset/segment_v2/column_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/column_writer.cpp
@@ -206,6 +206,11 @@ Status ColumnWriter::create_map_writer(const 
ColumnWriterOptions& opts, const Ta
io::FileWriter* file_writer,
std::unique_ptr* writer) {
 DCHECK(column->get_subtype_count() == 2);
+if (column->get_subtype_count() < 2) {
+return Status::InternalError(
+"If you upgraded from version 1.2.*, please DROP the MAP 
columns and then "
+"ADD the MAP columns back.");
+}
 // create key & value writer
 std::vector> inner_writer_list;
 for (int i = 0; i < 2; ++i) {
@@ -1141,4 +1146,4 @@ Status MapColumnWriter::write_inverted_index() {
 return Status::OK();
 }
 
-} // namespace doris::segment_v2
\ No newline at end of file
+} // namespace doris::segment_v2
diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp
index cee6f3e2d2e..ec887f14a91 100644
--- a/be/src/olap/tablet_schema.cpp
+++ b/be/src/olap/tablet_schema.cpp
@@ -553,7 +553,8 @@ void TabletColumn::init_from_pb(const ColumnPB& column) {
 CHECK(column.children_columns_size() == 1) << "ARRAY type has more 
than 1 children types.";
 }
 if (_type == FieldType::OLAP_FIELD_TYPE_MAP) {
-CHECK(column.children_columns_size() == 2) << "MAP type has more than 
2 children types.";
+DCHECK(column.children_columns_size() == 2) << "MAP type has more than 
2 children types.";
+LOG(WARNING) << "MAP type has more than 2 children types.";
 }
 for (size_t i = 0; i < column.children_columns_size(); i++) {
 TabletColumn child_column;
@@ -623,7 +624,8 @@ void TabletColumn::to_schema_pb(ColumnPB* column) const {
 CHECK(_sub_columns.size() == 1) << "ARRAY type has more than 1 
children types.";
 }
 if (_type == FieldType::OLAP_FIELD_TYPE_MAP) {
-CHECK(_sub_columns.size() == 2) << "MAP type has more than 2 children 
types.";
+DCHECK(_sub_columns.size() == 2) << "MAP type has more than 2 children 
types.";
+LOG(WARNING) << "MAP type has more than 2 children types.";
 }
 
 for (size_t i = 0; i < _sub_columns.size(); i++) {


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) 43/44: [fix](nereids) fix This stopwatch is already running (#37095)

2024-07-02 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 597bfebe2de660661b1528f146f92c5ae4ee75aa
Author: meiyi 
AuthorDate: Tue Jul 2 10:01:27 2024 +0800

[fix](nereids) fix This stopwatch is already running (#37095)

If plan throws exception, the stop watch is not stopped, and when
execute next sql will get exception:

2024-07-01 19:05:01,679 WARN (mysql-nio-pool-21|660) 
[StmtExecutor.execute():601] Analyze failed. stmt[10651860, 
6cea9e53220940ee-b324d18af20d9ab7]
org.apache.doris.common.NereidsException: errCode = 2, detailMessage = This 
stopwatch is already running.
at 
org.apache.doris.qe.StmtExecutor.executeByNereids(StmtExecutor.java:755) 
~[doris-fe.jar:1.2-SNAPSHOT]
at org.apache.doris.qe.StmtExecutor.execute(StmtExecutor.java:584) 
~[doris-fe.jar:1.2-SNAPSHOT]
at org.apache.doris.qe.StmtExecutor.execute(StmtExecutor.java:537) 
~[doris-fe.jar:1.2-SNAPSHOT]
at 
org.apache.doris.nereids.trees.plans.commands.ExecuteCommand.run(ExecuteCommand.java:82)
 ~[doris-fe.jar:1.2-SNAPSHOT]
at 
org.apache.doris.qe.StmtExecutor.executeByNereids(StmtExecutor.java:727) 
~[doris-fe.jar:1.2-SNAPSHOT]
at org.apache.doris.qe.StmtExecutor.execute(StmtExecutor.java:584) 
~[doris-fe.jar:1.2-SNAPSHOT]
at org.apache.doris.qe.StmtExecutor.execute(StmtExecutor.java:537) 
~[doris-fe.jar:1.2-SNAPSHOT]
at 
org.apache.doris.qe.MysqlConnectProcessor.handleExecute(MysqlConnectProcessor.java:201)
 ~[doris-fe.jar:1.2-SNAPSHOT]
at 
org.apache.doris.qe.MysqlConnectProcessor.handleExecute(MysqlConnectProcessor.java:246)
 ~[doris-fe.jar:1.2-SNAPSHOT]
at 
org.apache.doris.qe.MysqlConnectProcessor.dispatch(MysqlConnectProcessor.java:291)
 ~[doris-fe.jar:1.2-SNAPSHOT]
at 
org.apache.doris.qe.MysqlConnectProcessor.processOnce(MysqlConnectProcessor.java:342)
 ~[doris-fe.jar:1.2-SNAPSHOT]
at 
org.apache.doris.mysql.ReadListener.lambda$handleEvent$0(ReadListener.java:52) 
~[doris-fe.jar:1.2-SNAPSHOT]
at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136) 
~[?:?]
at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635) 
~[?:?]
at java.lang.Thread.run(Thread.java:833) ~[?:?]
Caused by: org.apache.doris.common.AnalysisException: errCode = 2, 
detailMessage = This stopwatch is already running.
... 15 more
Caused by: java.lang.IllegalStateException: This stopwatch is already 
running.
at 
com.google.common.base.Preconditions.checkState(Preconditions.java:512) 
~[guava-32.1.2-jre.jar:?]
at com.google.common.base.Stopwatch.start(Stopwatch.java:166) 
~[guava-32.1.2-jre.jar:?]
at 
org.apache.doris.nereids.NereidsPlanner.plan(NereidsPlanner.java:140) 
~[doris-fe.jar:1.2-SNAPSHOT]
at 
org.apache.doris.nereids.trees.plans.commands.insert.InsertIntoTableCommand.initPlan(InsertIntoTableCommand
.java:155) ~[doris-fe.jar:1.2-SNAPSHOT]
at 
org.apache.doris.nereids.trees.plans.commands.insert.InsertIntoTableCommand.runInternal(InsertIntoTableComm
and.java:226) ~[doris-fe.jar:1.2-SNAPSHOT]
at 
org.apache.doris.nereids.trees.plans.commands.insert.InsertIntoTableCommand.run(InsertIntoTableCommand.java
:105) ~[doris-fe.jar:1.2-SNAPSHOT]
at 
org.apache.doris.qe.StmtExecutor.executeByNereids(StmtExecutor.java:727) 
~[doris-fe.jar:1.2-SNAPSHOT]
... 14 more
---
 .../src/main/java/org/apache/doris/nereids/NereidsPlanner.java | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java
index e0d6f2f7589..97377c7ba26 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java
@@ -138,9 +138,13 @@ public class NereidsPlanner extends Planner {
 
 PhysicalProperties requireProperties = buildInitRequireProperties();
 statementContext.getStopwatch().start();
-boolean showPlanProcess = 
showPlanProcess(queryStmt.getExplainOptions());
-Plan resultPlan = plan(parsedPlan, requireProperties, explainLevel, 
showPlanProcess);
-statementContext.getStopwatch().stop();
+Plan resultPlan = null;
+try {
+boolean showPlanProcess = 
showPlanProcess(queryStmt.getExplainOptions());
+resultPlan = plan(parsedPlan, requireProperties, explainLevel, 
showPlanProcess);
+} finally {
+statementContext.getStopwatch().stop();
+}
 setOptimizedPlan(resultPlan);
 
 if (resultPlan instanceof PhysicalPlan) {


--

(doris) 37/44: [test](migrate) move test_hive_text_complex_type from p2 to p0 (#37007)

2024-07-02 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 538a885697cb559f14d0e04261984251f1ab2cd6
Author: Ashin Gau 
AuthorDate: Mon Jul 1 23:02:25 2024 +0800

[test](migrate) move test_hive_text_complex_type from p2 to p0 (#37007)

follow up: #36787
move test_hive_text_complex_type from p2 to p0
---
 .../hive_text_complex_type/create_table.hql|  27 +
 .../hive_text_complex_type/data.tar.gz | Bin 0 -> 560 bytes
 .../multi_catalog/hive_text_complex_type/run.sh|  12 
 .../hive_text_complex_type2/create_table.hql   |  21 +++
 .../hive_text_complex_type2/data.tar.gz| Bin 0 -> 366 bytes
 .../multi_catalog/hive_text_complex_type2/run.sh   |  12 
 .../hive_text_complex_type3/create_table.hql   |  24 
 .../hive_text_complex_type3/data.tar.gz| Bin 0 -> 977 bytes
 .../multi_catalog/hive_text_complex_type3/run.sh   |  12 
 .../create_table.hql   |  33 +++
 .../hive_text_complex_type_delimiter/data.tar.gz   | Bin 0 -> 568 bytes
 .../hive_text_complex_type_delimiter/run.sh|  12 
 .../create_table.hql   |  27 +
 .../hive_text_complex_type_delimiter2/data.tar.gz  | Bin 0 -> 376 bytes
 .../hive_text_complex_type_delimiter2/run.sh   |  12 
 .../create_table.hql   |  26 +
 .../hive_text_complex_type_delimiter3/data.tar.gz  | Bin 0 -> 978 bytes
 .../hive_text_complex_type_delimiter3/run.sh   |  12 
 .../parquet_predicate_table/create_table.hql   |  18 ++
 .../parquet_predicate_table/data.tar.gz| Bin 0 -> 828 bytes
 .../multi_catalog/parquet_predicate_table/run.sh   |  12 
 .../hive/test_hive_text_complex_type.out   |  65 +
 .../hive/test_hive_text_complex_type.groovy|  15 +++--
 23 files changed, 334 insertions(+), 6 deletions(-)

diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_text_complex_type/create_table.hql
 
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_text_complex_type/create_table.hql
new file mode 100644
index 000..3b20db98019
--- /dev/null
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_text_complex_type/create_table.hql
@@ -0,0 +1,27 @@
+CREATE DATABASE IF NOT EXISTS multi_catalog;
+USE multi_catalog;
+
+CREATE TABLE `multi_catalog.hive_text_complex_type`(
+  `column1` int, 
+  `column2` map, 
+  `column3` map, 
+  `column4` map, 
+  `column5` map, 
+  `column6` map, 
+  `column7` map, 
+  `column8` map, 
+  `column9` map, 
+  `column10` map, 
+  `column11` map, 
+  `column12` 
struct)
+ROW FORMAT SERDE 
+  'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' 
+STORED AS INPUTFORMAT 
+  'org.apache.hadoop.mapred.TextInputFormat' 
+OUTPUTFORMAT 
+  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
+LOCATION '/user/doris/suites/multi_catalog/hive_text_complex_type'
+TBLPROPERTIES (
+  'transient_lastDdlTime'='1690518015');
+
+msck repair table hive_text_complex_type;
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_text_complex_type/data.tar.gz
 
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_text_complex_type/data.tar.gz
new file mode 100644
index 000..dd8a3c6b068
Binary files /dev/null and 
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_text_complex_type/data.tar.gz
 differ
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_text_complex_type/run.sh
 
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_text_complex_type/run.sh
new file mode 100644
index 000..f3136eaa200
--- /dev/null
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_text_complex_type/run.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+set -x
+
+CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
+
+## mkdir and put data to hdfs
+cd "${CUR_DIR}" && rm -rf data/ && tar xzf data.tar.gz
+hadoop fs -mkdir -p /user/doris/suites/multi_catalog/
+hadoop fs -put "${CUR_DIR}"/data/* /user/doris/suites/multi_catalog/
+
+# create table
+hive -f "${CUR_DIR}/create_table.hql"
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_text_complex_type2/create_table.hql
 
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_text_complex_type2/create_table.hql
new file mode 100644
index 000..ac75375d950
--- /dev/null
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_text_complex_type2/create_table.hql
@@ -0,0 +1,21 @@
+CREATE DATABASE IF NOT EXISTS multi_catalog;
+USE multi_catalog;
+
+CREATE TABLE `multi_catalog.hive_text_complex_type2`(
+  `id` int, 
+ 

(doris) 42/44: [Fix]add set thread num config for wg flush pool (#37028)

2024-07-02 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 2206acececbbb37bf83d832de221d5e0508708e1
Author: wangbo 
AuthorDate: Tue Jul 2 09:58:57 2024 +0800

[Fix]add set thread num config for wg flush pool (#37028)

## Proposed changes
calculate workload group's mem table flush pool 's thread num by cpu and
disk num, other wise it may cause mem table flush cost more memory than
not enable workload group.
---
 be/src/common/config.cpp |  3 ++
 be/src/common/config.h   |  4 ++
 be/src/olap/delta_writer_v2.cpp  |  2 +-
 be/src/olap/storage_engine.cpp   |  3 +-
 be/src/olap/storage_engine.h |  4 ++
 be/src/runtime/query_context.cpp |  6 +--
 be/src/runtime/query_context.h   |  4 +-
 be/src/runtime/workload_group/workload_group.cpp | 53 
 be/src/runtime/workload_group/workload_group.h   |  4 +-
 be/src/vec/sink/writer/async_result_writer.cpp   | 27 
 10 files changed, 64 insertions(+), 46 deletions(-)

diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index 7166b39dda8..78afc756af8 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -691,6 +691,9 @@ DEFINE_Int32(high_priority_flush_thread_num_per_store, "6");
 // max_flush_thread_num_per_cpu * num_cpu)
 DEFINE_Int32(max_flush_thread_num_per_cpu, "4");
 
+DEFINE_mInt32(wg_flush_thread_num_per_store, "6");
+DEFINE_mInt32(wg_flush_thread_num_per_cpu, "4");
+
 // config for tablet meta checkpoint
 DEFINE_mInt32(tablet_meta_checkpoint_min_new_rowsets_num, "10");
 DEFINE_mInt32(tablet_meta_checkpoint_min_interval_secs, "600");
diff --git a/be/src/common/config.h b/be/src/common/config.h
index dbb5b716b78..1a9e3291db5 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -743,6 +743,10 @@ DECLARE_Int32(high_priority_flush_thread_num_per_store);
 // max_flush_thread_num_per_cpu * num_cpu)
 DECLARE_Int32(max_flush_thread_num_per_cpu);
 
+// workload group flush pool params
+DECLARE_mInt32(wg_flush_thread_num_per_store);
+DECLARE_mInt32(wg_flush_thread_num_per_cpu);
+
 // config for tablet meta checkpoint
 DECLARE_mInt32(tablet_meta_checkpoint_min_new_rowsets_num);
 DECLARE_mInt32(tablet_meta_checkpoint_min_interval_secs);
diff --git a/be/src/olap/delta_writer_v2.cpp b/be/src/olap/delta_writer_v2.cpp
index 80978280b92..3f2f7bf99fa 100644
--- a/be/src/olap/delta_writer_v2.cpp
+++ b/be/src/olap/delta_writer_v2.cpp
@@ -128,7 +128,7 @@ Status DeltaWriterV2::init() {
 RETURN_IF_ERROR(_rowset_writer->init(context));
 ThreadPool* wg_thread_pool_ptr = nullptr;
 if (_state->get_query_ctx()) {
-wg_thread_pool_ptr = 
_state->get_query_ctx()->get_non_pipe_exec_thread_pool();
+wg_thread_pool_ptr = 
_state->get_query_ctx()->get_memtable_flush_pool();
 }
 RETURN_IF_ERROR(_memtable_writer->init(_rowset_writer, _tablet_schema, 
_partial_update_info,
wg_thread_pool_ptr,
diff --git a/be/src/olap/storage_engine.cpp b/be/src/olap/storage_engine.cpp
index 5d50bb5f4df..90093241ad2 100644
--- a/be/src/olap/storage_engine.cpp
+++ b/be/src/olap/storage_engine.cpp
@@ -233,8 +233,9 @@ Status StorageEngine::_open() {
 auto dirs = get_stores();
 RETURN_IF_ERROR(load_data_dirs(dirs));
 
+_disk_num = dirs.size();
 _memtable_flush_executor = std::make_unique();
-_memtable_flush_executor->init(dirs.size());
+_memtable_flush_executor->init(_disk_num);
 
 _calc_delete_bitmap_executor = 
std::make_unique();
 _calc_delete_bitmap_executor->init();
diff --git a/be/src/olap/storage_engine.h b/be/src/olap/storage_engine.h
index 94cf142a8c1..5ddd888db6d 100644
--- a/be/src/olap/storage_engine.h
+++ b/be/src/olap/storage_engine.h
@@ -131,6 +131,8 @@ public:
 
 int64_t memory_limitation_bytes_per_thread_for_schema_change() const;
 
+int get_disk_num() { return _disk_num; }
+
 protected:
 void _evict_querying_rowset();
 void _evict_quring_rowset_thread_callback();
@@ -153,6 +155,8 @@ protected:
 scoped_refptr _evict_quering_rowset_thread;
 
 int64_t _memory_limitation_bytes_for_schema_change;
+
+int _disk_num {-1};
 };
 
 class StorageEngine final : public BaseStorageEngine {
diff --git a/be/src/runtime/query_context.cpp b/be/src/runtime/query_context.cpp
index 18d565dcfef..dd7cf4f55b8 100644
--- a/be/src/runtime/query_context.cpp
+++ b/be/src/runtime/query_context.cpp
@@ -326,9 +326,9 @@ doris::pipeline::TaskScheduler* 
QueryContext::get_pipe_exec_scheduler() {
 return _exec_env->pipeline_task_scheduler();
 }
 
-ThreadPool* QueryContext::get_non_pipe_exec_thread_pool() {
+ThreadPool* QueryContext::get_memtable_flush_pool() {
 if (_workload_group) {
-re

(doris) 44/44: [fix](recycler) Fix DCHECK in ObjectListIterator (#37112)

2024-07-02 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 9e4ca47e1713d024be102776d24a43c4655c30be
Author: plat1ko 
AuthorDate: Tue Jul 2 10:13:22 2024 +0800

[fix](recycler) Fix DCHECK in ObjectListIterator (#37112)

Fix DCHECK in ObjectListIterator
---
 cloud/src/recycler/azure_obj_client.cpp | 2 +-
 cloud/src/recycler/s3_obj_client.cpp| 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/cloud/src/recycler/azure_obj_client.cpp 
b/cloud/src/recycler/azure_obj_client.cpp
index 60cd79abb95..62386808d08 100644
--- a/cloud/src/recycler/azure_obj_client.cpp
+++ b/cloud/src/recycler/azure_obj_client.cpp
@@ -83,7 +83,7 @@ public:
 try {
 auto resp = client_->ListBlobs(req_);
 has_more_ = resp.NextPageToken.HasValue();
-DCHECK(!has_more_ || resp.Blobs.empty()) << has_more_ << ' ' << 
resp.Blobs.empty();
+DCHECK(!(has_more_ && resp.Blobs.empty())) << has_more_ << ' ' << 
resp.Blobs.empty();
 req_.ContinuationToken = std::move(resp.NextPageToken);
 results_.reserve(resp.Blobs.size());
 for (auto&& item : std::ranges::reverse_view(resp.Blobs)) {
diff --git a/cloud/src/recycler/s3_obj_client.cpp 
b/cloud/src/recycler/s3_obj_client.cpp
index e95a7409475..23e55711096 100644
--- a/cloud/src/recycler/s3_obj_client.cpp
+++ b/cloud/src/recycler/s3_obj_client.cpp
@@ -77,7 +77,7 @@ public:
 
const_cast(outcome.GetResult().GetNextContinuationToken(;
 
 auto&& content = outcome.GetResult().GetContents();
-DCHECK(!has_more_ || content.empty()) << has_more_ << ' ' << 
content.empty();
+DCHECK(!(has_more_ && content.empty())) << has_more_ << ' ' << 
content.empty();
 
 results_.reserve(content.size());
 for (auto&& obj : std::ranges::reverse_view(content)) {


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) 32/44: [fix](auth)fix mtmv name to resolve conflicts (#36958)

2024-07-02 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 5f262d610a3710efc6af7e49c70ead7aeef20be6
Author: zfr95 <87513668+zfr9...@users.noreply.github.com>
AuthorDate: Mon Jul 1 20:26:52 2024 +0800

[fix](auth)fix mtmv name to resolve conflicts (#36958)

## Proposed changes

[fix](auth)fix mtmv name to resolve conflicts
---
 .../test_master_slave_consistency_auth.groovy  | 49 --
 .../suites/auth_p0/test_select_column_auth.groovy  | 33 ---
 2 files changed, 46 insertions(+), 36 deletions(-)

diff --git 
a/regression-test/suites/auth_p0/test_master_slave_consistency_auth.groovy 
b/regression-test/suites/auth_p0/test_master_slave_consistency_auth.groovy
index 379ea68f3ce..9e5239f6434 100644
--- a/regression-test/suites/auth_p0/test_master_slave_consistency_auth.groovy
+++ b/regression-test/suites/auth_p0/test_master_slave_consistency_auth.groovy
@@ -34,11 +34,16 @@ suite ("test_follower_consistent_auth","p0,auth") {
 
 String user = 'test_follower_consistent_user'
 String pwd = 'C123_567p'
-String dbName = 'test_select_column_auth_db'
-String tableName = 'test_select_column_auth_table'
-String role = 'test_select_column_auth_role'
-String wg = 'test_select_column_auth_wg'
-String rg = 'test_select_column_auth_rg'
+String dbName = 'test_follower_consistent_db'
+String tableName = 'test_follower_consistent_table'
+String role = 'test_follower_consistent_role'
+String wg = 'test_follower_consistent_wg'
+String rg = 'test_follower_consistent_rg'
+String mv_name = 'test_follower_consistent_mv'
+String mtmv_name = 'test_follower_consistent_mtmv'
+String view_name = 'test_follower_consistent_view'
+String rollup_name = 'test_follower_consistent_rollup'
+String catalog_name = 'test_follower_consistent_catalog'
 try_sql("DROP role ${role}")
 sql """CREATE ROLE ${role}"""
 sql """drop WORKLOAD GROUP if exists '${wg}'"""
@@ -76,12 +81,12 @@ suite ("test_follower_consistent_auth","p0,auth") {
 );
 """
 
-sql """create view ${dbName}.v1 as select * from 
${dbName}.${tableName};"""
-sql """alter table ${dbName}.${tableName} add rollup 
rollup1(username)"""
+sql """create view ${dbName}.${view_name} as select * from 
${dbName}.${tableName};"""
+sql """alter table ${dbName}.${tableName} add rollup 
${rollup_name}(username)"""
 sleep(5 * 1000)
-sql """create materialized view mv1 as select username from 
${dbName}.${tableName}"""
+sql """create materialized view ${mv_name} as select username from 
${dbName}.${tableName}"""
 sleep(5 * 1000)
-sql """CREATE MATERIALIZED VIEW ${dbName}.mtmv1 
+sql """CREATE MATERIALIZED VIEW ${dbName}.${mtmv_name} 
 BUILD IMMEDIATE REFRESH AUTO ON MANUAL 
 DISTRIBUTED BY RANDOM BUCKETS 1 
 PROPERTIES ('replication_num' = '1') 
@@ -93,13 +98,13 @@ suite ("test_follower_consistent_auth","p0,auth") {
 (3, "333");
 """
 sql """CREATE USER '${user}' IDENTIFIED BY '${pwd}'"""
-sql """refresh MATERIALIZED VIEW ${dbName}.mtmv1 auto"""
+sql """refresh MATERIALIZED VIEW ${dbName}.${mtmv_name} auto"""
 sql """grant select_priv on regression_test to ${user}"""
 
 
 connect(user=user, password="${pwd}", url=context.config.jdbcUrl) {
 try {
-sql "SHOW CATALOG RECYCLE BIN WHERE NAME = 'test'"
+sql "SHOW CATALOG RECYCLE BIN WHERE NAME = '${catalog_name}'"
 } catch (Exception e) {
 log.info(e.getMessage())
 assertTrue(e.getMessage().contains("Admin_priv"))
@@ -107,7 +112,7 @@ suite ("test_follower_consistent_auth","p0,auth") {
 }
 connect(user=user, password="${pwd}", url=new_jdbc_url) {
 try {
-sql "SHOW CATALOG RECYCLE BIN WHERE NAME = 'test'"
+sql "SHOW CATALOG RECYCLE BIN WHERE NAME = '${catalog_name}'"
 } catch (Exception e) {
 log.info(e.getMessage())
 assertTrue(e.getMessage().contains("Admin_priv"))
@@ -157,7 +162,7 @@ suite ("test_follower_consistent_auth","p0,auth") {
 
 connect(user=user, password="${pwd}", url=context.config.jdbcUrl) {
 try {
-sql "select username from ${dbName}.v1"
+sql "select username from ${dbName}.${view_name}"
 } catch (Exception e) {
 log.info(e.getMessage())
 assertTrue(e.getMessage().contains("Admin_priv,Select_priv"))
@@ -165,24 +170,24 @@ suite ("test_follower_consistent_auth","p0,auth") {
 }
 connect(user=user, password="${pwd}", url=new_jdbc_url) {
 t

(doris) 41/44: [fix](merge-on-write) when full clone failed, duplicate key might occur (#37001)

2024-07-02 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 1ef9ab1453f80ac849589bb322c00e8e3e602cff
Author: zhannngchen <48427519+zhannngc...@users.noreply.github.com>
AuthorDate: Tue Jul 2 09:53:49 2024 +0800

[fix](merge-on-write) when full clone failed, duplicate key might occur 
(#37001)

## Proposed changes

Issue Number: close #xxx

introduced by #31268

full clone failure may produce duplicate keys in mow table
the bug would be triggered in the following condition:
1. replica 0 miss version
2. replica 0 try to do full clone from other replicas
3. the full clone failed and the delete bitmap is overrided incorrectly
4. replica 0 try to do incremental clone again and this time the clone
succeed
5. incremental clone can't fix the delete bitmap overrided by previous
failed full clone
6. duplicate key occurred

solution:
for full clone, don't override the delete bitmap, use `merge()` method
instead.
---
 be/src/olap/snapshot_manager.cpp   |   9 ++
 be/src/olap/tablet.cpp |   8 ++
 be/src/olap/task/engine_clone_task.cpp |   2 +-
 .../test_mow_full_clone_exception.out  |  37 ++
 .../test_mow_full_clone_exception.groovy   | 137 +
 5 files changed, 192 insertions(+), 1 deletion(-)

diff --git a/be/src/olap/snapshot_manager.cpp b/be/src/olap/snapshot_manager.cpp
index eab49997599..e2f117b54e2 100644
--- a/be/src/olap/snapshot_manager.cpp
+++ b/be/src/olap/snapshot_manager.cpp
@@ -451,6 +451,15 @@ Status SnapshotManager::_create_snapshot_files(const 
TabletSharedPtr& ref_tablet
 }
 }
 
+
DBUG_EXECUTE_IF("SnapshotManager.create_snapshot_files.allow_inc_clone", {
+auto tablet_id = dp->param("tablet_id", 0);
+auto is_full_clone = dp->param("is_full_clone", false);
+if (ref_tablet->tablet_id() == tablet_id && is_full_clone) {
+LOG(INFO) << "injected full clone for tabelt: " << 
tablet_id;
+res = Status::InternalError("fault injection error");
+}
+});
+
 // be would definitely set it as true no matter has missed version 
or not, we could
 // just check whether the missed version is empty or not
 int64_t version = -1;
diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp
index d49d56ef2d3..8b6ebc2c395 100644
--- a/be/src/olap/tablet.cpp
+++ b/be/src/olap/tablet.cpp
@@ -404,6 +404,14 @@ Status Tablet::revise_tablet_meta(const 
std::vector& to_add,
 break; // while (keys_type() == UNIQUE_KEYS && 
enable_unique_key_merge_on_write())
 }
 
+DBUG_EXECUTE_IF("Tablet.revise_tablet_meta_fail", {
+auto ptablet_id = dp->param("tablet_id", 0);
+if (tablet_id() == ptablet_id) {
+LOG(INFO) << "injected revies_tablet_meta failure for tabelt: " << 
ptablet_id;
+calc_bm_status = Status::InternalError("fault injection error");
+}
+});
+
 // error handling
 if (!calc_bm_status.ok()) {
 if (is_incremental_clone) {
diff --git a/be/src/olap/task/engine_clone_task.cpp 
b/be/src/olap/task/engine_clone_task.cpp
index 590fdd1b4a7..a05a640dcfe 100644
--- a/be/src/olap/task/engine_clone_task.cpp
+++ b/be/src/olap/task/engine_clone_task.cpp
@@ -875,7 +875,7 @@ Status EngineCloneTask::_finish_full_clone(Tablet* tablet,
 }
 }
 if (tablet->enable_unique_key_merge_on_write()) {
-tablet->tablet_meta()->delete_bitmap() = 
cloned_tablet_meta->delete_bitmap();
+
tablet->tablet_meta()->delete_bitmap().merge(cloned_tablet_meta->delete_bitmap());
 }
 return tablet->revise_tablet_meta(to_add, to_delete, false);
 // TODO(plat1ko): write cooldown meta to remote if this replica is 
cooldown replica
diff --git 
a/regression-test/data/unique_with_mow_p0/test_mow_full_clone_exception.out 
b/regression-test/data/unique_with_mow_p0/test_mow_full_clone_exception.out
new file mode 100644
index 000..f11c60b41e1
--- /dev/null
+++ b/regression-test/data/unique_with_mow_p0/test_mow_full_clone_exception.out
@@ -0,0 +1,37 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !sql --
+1  10
+2  200
+3  30
+4  400
+5  500
+6  600
+7  7
+8  8
+9  9
+10 10
+
+-- !sql --
+1  10
+2  200
+3  30
+4  400
+5  500
+6  600
+7  7
+8  8
+9  9
+10 10
+
+-- !sql --
+1  10
+2  200
+3  30
+4  400
+5  500
+6  600
+7  7
+8  8
+9  9
+10 10
+
diff --git 
a/regression-test/suites/unique_with_mow_p0/test_mow_full_clone_exception.groovy
 
b/regression-test/suites/unique_with_mow_p0/test_mow_fu

(doris) 18/44: [Migrate-Test](multi-catalog) Migrate p2 tests from p2 to p0. (#36989)

2024-07-02 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 19774867822dd9c58d9422b0b186a3eed4470d4a
Author: Qi Chen 
AuthorDate: Mon Jul 1 14:45:50 2024 +0800

[Migrate-Test](multi-catalog) Migrate p2 tests from p2 to p0. (#36989)

## Proposed changes

[Migrate-Test] (multi-catalog) Migrate p2 tests from p2 to p0.
- Migrate p2 tests from p2 to p0.
- Set health check of hms docker to 10s(interval) * 120(retries).
- Remove duplicated tables in `create_preinstalled_table.hql` by adding
new scripts.
---
 .../docker-compose/hive/hive-2x.yaml.tpl   |   2 +-
 .../docker-compose/hive/hive-3x.yaml.tpl   |   2 +-
 .../hive/scripts/create_preinstalled_table.hql | 108 
 .../data/default/account_fund/create_table.hql |  28 +
 .../scripts/data/default/account_fund/data.tar.gz  | Bin 0 -> 234 bytes
 .../hive/scripts/data/default/account_fund/run.sh  |  12 ++
 .../scripts/data/default/hive01/create_table.hql   |  22 
 .../hive/scripts/data/default/hive01/data.tar.gz   | Bin 0 -> 186 bytes
 .../hive/scripts/data/default/hive01/run.sh|  12 ++
 .../data/default/sale_table/create_table.hql   |  24 
 .../scripts/data/default/sale_table/data.tar.gz| Bin 0 -> 221 bytes
 .../hive/scripts/data/default/sale_table/run.sh|  12 ++
 .../data/default/string_table/create_table.hql |  27 
 .../scripts/data/default/string_table/data.tar.gz  | Bin 0 -> 260 bytes
 .../hive/scripts/data/default/string_table/run.sh  |  12 ++
 .../scripts/data/default/student/create_table.hql  |  24 
 .../hive/scripts/data/default/student/data.tar.gz  | Bin 0 -> 210 bytes
 .../hive/scripts/data/default/student/run.sh   |  12 ++
 .../scripts/data/default/test1/create_table.hql|  23 
 .../hive/scripts/data/default/test1/data.tar.gz| Bin 0 -> 211 bytes
 .../hive/scripts/data/default/test1/run.sh |  12 ++
 .../scripts/data/default/test2/create_table.hql|  23 
 .../hive/scripts/data/default/test2/data.tar.gz| Bin 0 -> 197 bytes
 .../hive/scripts/data/default/test2/run.sh |  12 ++
 .../data/default/test_hive_doris/create_table.hql  |  20 +++
 .../data/default/test_hive_doris/data.tar.gz   | Bin 0 -> 181 bytes
 .../scripts/data/default/test_hive_doris/run.sh|  12 ++
 .../par_fields_in_file_orc/create_table.hql|  21 
 .../par_fields_in_file_orc/data.tar.gz | Bin 0 -> 751 bytes
 .../multi_catalog/par_fields_in_file_orc/run.sh|  12 ++
 .../par_fields_in_file_parquet/create_table.hql|  21 
 .../par_fields_in_file_parquet/data.tar.gz | Bin 0 -> 548 bytes
 .../par_fields_in_file_parquet/run.sh  |  12 ++
 .../partition_location_1/create_table.hql  |  22 
 .../multi_catalog/partition_location_1/data.tar.gz | Bin 0 -> 583 bytes
 .../data/multi_catalog/partition_location_1/run.sh |  12 ++
 .../partition_location_2/create_table.hql  |  23 
 .../multi_catalog/partition_location_2/data.tar.gz | Bin 0 -> 600 bytes
 .../data/multi_catalog/partition_location_2/run.sh |  12 ++
 .../timestamp_with_time_zone/create_table.hql  |  17 +++
 .../timestamp_with_time_zone/data.tar.gz   | Bin 0 -> 1499 bytes
 .../multi_catalog/timestamp_with_time_zone/run.sh  |  12 ++
 .../scripts/data/test/hive_test/create_table.hql   |  20 +++
 .../hive/scripts/data/test/hive_test/data.tar.gz   | Bin 0 -> 161 bytes
 .../hive/scripts/data/test/hive_test/run.sh|  12 ++
 .../hive/test_external_catalog_hive.out| 139 +++--
 .../hive/test_hive_partition_location.out  |  40 ++
 .../hive/test_external_catalog_hive.groovy |  99 ---
 .../hive/test_hive_partition_location.groovy   |  18 +--
 49 files changed, 692 insertions(+), 199 deletions(-)

diff --git a/docker/thirdparties/docker-compose/hive/hive-2x.yaml.tpl 
b/docker/thirdparties/docker-compose/hive/hive-2x.yaml.tpl
index ca0fe2e9ddb..0aec9ec2365 100644
--- a/docker/thirdparties/docker-compose/hive/hive-2x.yaml.tpl
+++ b/docker/thirdparties/docker-compose/hive/hive-2x.yaml.tpl
@@ -89,7 +89,7 @@ services:
   - hive-metastore-postgresql
 healthcheck:
   test: ["CMD", "sh", "-c", "/mnt/scripts/healthy_check.sh"]
-  interval: 5s
+  interval: 10s
   timeout: 60s
   retries: 120
 network_mode: "host"
diff --git a/docker/thirdparties/docker-compose/hive/hive-3x.yaml.tpl 
b/docker/thirdparties/docker-compose/hive/hive-3x.yaml.tpl
index 09d150c17b2..901e5b3f71a 100644
--- a/docker/thirdparties/docker-compose/hive/hive-3x.yaml.tpl
+++ b/docker/thirdparties/docker-compose/hive/hive-3x.yaml.tpl
@@ -89,7 +89,7 @@ services:
   - hive-metastore-postgresql
 healthcheck:
   test: ["CMD", "sh", "-c", "/mnt/scripts/healthy_check.sh"]
-  interval: 5s
+  interval: 10s
   tim

(doris) branch master updated (2e63fefabd4 -> 9223349ee0e)

2024-07-02 Thread morrysnow
This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


from 2e63fefabd4 [opt](ctas) add a variable to control varchar length in 
ctas (#37069)
 add 9223349ee0e [enhance](mtmv)support replace materialized view (#36749)

No new revisions were added by this update.

Summary of changes:
 .../antlr4/org/apache/doris/nereids/DorisParser.g4 |   1 +
 .../main/java/org/apache/doris/alter/Alter.java|   8 ++
 .../doris/nereids/parser/LogicalPlanBuilder.java   |   6 +
 .../trees/plans/commands/info/AlterMTMVInfo.java   |  10 ++
 .../plans/commands/info/AlterMTMVRenameInfo.java   |   1 +
 .../plans/commands/info/AlterMTMVReplaceInfo.java  | 101 ++
 .../data/mtmv_p0/test_multi_level_rename_mtmv.out  |   4 +
 .../data/mtmv_p0/test_multi_level_replace_mtmv.out |   4 +
 regression-test/data/mtmv_p0/test_replace_mtmv.out |  25 
 .../mtmv_p0/test_multi_level_rename_mtmv.groovy|  88 
 .../mtmv_p0/test_multi_level_replace_mtmv.groovy   |  98 +
 .../suites/mtmv_p0/test_replace_mtmv.groovy| 155 +
 12 files changed, 501 insertions(+)
 create mode 100644 
fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/AlterMTMVReplaceInfo.java
 create mode 100644 
regression-test/data/mtmv_p0/test_multi_level_rename_mtmv.out
 create mode 100644 
regression-test/data/mtmv_p0/test_multi_level_replace_mtmv.out
 create mode 100644 regression-test/data/mtmv_p0/test_replace_mtmv.out
 create mode 100644 
regression-test/suites/mtmv_p0/test_multi_level_rename_mtmv.groovy
 create mode 100644 
regression-test/suites/mtmv_p0/test_multi_level_replace_mtmv.groovy
 create mode 100644 regression-test/suites/mtmv_p0/test_replace_mtmv.groovy


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) branch master updated: [feature](ES Catalog) map nested/object type in ES to JSON type in Doris (#37101)

2024-07-02 Thread morningman
This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
 new 6e192d284b1 [feature](ES Catalog) map nested/object type in ES to JSON 
type in Doris (#37101)
6e192d284b1 is described below

commit 6e192d284b17d306644a224d87892b5001f7b85f
Author: qiye 
AuthorDate: Tue Jul 2 17:19:14 2024 +0800

[feature](ES Catalog) map nested/object type in ES to JSON type in Doris 
(#37101)

1. `nested`/`object` can map to `json` type in Doris, and can be
analyzed with json functions.
2. Add some cases for `json_extract`.
---
 be/src/exec/es/es_scroll_parser.cpp|   7 +
 .../elasticsearch/scripts/data/data1.json  |   6 +-
 .../elasticsearch/scripts/data/data1_es6.json  |   4 +
 .../elasticsearch/scripts/data/data2.json  |   6 +-
 .../elasticsearch/scripts/data/data2_es6.json  |   6 +-
 .../elasticsearch/scripts/data/data3.json  |   6 +-
 .../elasticsearch/scripts/data/data3_es5.json  |   6 +-
 .../elasticsearch/scripts/data/data3_es6.json  |   6 +-
 .../elasticsearch/scripts/data/data4.json  |   4 +
 .../elasticsearch/scripts/index/array_meta.json|   5 +-
 .../elasticsearch/scripts/index/es6_hide.json  |   3 +
 .../elasticsearch/scripts/index/es6_test1.json |   3 +
 .../elasticsearch/scripts/index/es6_test2.json |   3 +
 .../elasticsearch/scripts/index/es7_hide.json  |   3 +
 .../elasticsearch/scripts/index/es7_test1.json |   3 +
 .../elasticsearch/scripts/index/es7_test2.json |   3 +
 .../org/apache/doris/datasource/es/EsUtil.java |   8 +-
 .../data/external_table_p0/es/test_es_query.out| 244 -
 .../es/test_es_query_no_http_url.out   |   6 +-
 regression-test/data/mtmv_p0/test_es_mtmv.out  |  16 +-
 .../external_table_p0/es/test_es_query.groovy  |  34 +--
 21 files changed, 241 insertions(+), 141 deletions(-)

diff --git a/be/src/exec/es/es_scroll_parser.cpp 
b/be/src/exec/es/es_scroll_parser.cpp
index a1c3c9f0d5e..f3c8dc57eba 100644
--- a/be/src/exec/es/es_scroll_parser.cpp
+++ b/be/src/exec/es/es_scroll_parser.cpp
@@ -40,6 +40,7 @@
 #include "runtime/decimalv2_value.h"
 #include "runtime/define_primitive_type.h"
 #include "runtime/descriptors.h"
+#include "runtime/jsonb_value.h"
 #include "runtime/primitive_type.h"
 #include "runtime/types.h"
 #include "util/binary_cast.hpp"
@@ -799,6 +800,12 @@ Status ScrollParser::fill_columns(const TupleDescriptor* 
tuple_desc,
 col_ptr->insert(array);
 break;
 }
+case TYPE_JSONB: {
+JsonBinaryValue binary_val(json_value_to_string(col));
+vectorized::JsonbField json(binary_val.value(), binary_val.size());
+col_ptr->insert(json);
+break;
+}
 default: {
 LOG(ERROR) << "Unsupported data type: " << type_to_string(type);
 DCHECK(false);
diff --git 
a/docker/thirdparties/docker-compose/elasticsearch/scripts/data/data1.json 
b/docker/thirdparties/docker-compose/elasticsearch/scripts/data/data1.json
index 7eb7f9c94f5..b5b57ed281e 100755
--- a/docker/thirdparties/docker-compose/elasticsearch/scripts/data/data1.json
+++ b/docker/thirdparties/docker-compose/elasticsearch/scripts/data/data1.json
@@ -29,5 +29,9 @@
 {"name": "Tim", "age": 28}
   ],
   "my_wildcard": "This string can be quite lengthy",
-  "level": "debug"
+  "level": "debug",
+  "c_user": [
+{"first": "John", "last":  "Smith"},
+{"first": "Alice", "last":  "White"}
+  ]
 }
diff --git 
a/docker/thirdparties/docker-compose/elasticsearch/scripts/data/data1_es6.json 
b/docker/thirdparties/docker-compose/elasticsearch/scripts/data/data1_es6.json
index 68e52e46dbb..8a6b404f8e6 100755
--- 
a/docker/thirdparties/docker-compose/elasticsearch/scripts/data/data1_es6.json
+++ 
b/docker/thirdparties/docker-compose/elasticsearch/scripts/data/data1_es6.json
@@ -23,5 +23,9 @@
   "c_person": [
 {"name": "Andy", "age": 18},
 {"name": "Tim", "age": 28}
+  ],
+  "c_user": [
+{"first": "John", "last":  "Smith"},
+{"first": "Alice", "last":  "White"}
   ]
 }
diff --git 
a/docker/thirdparties/docker-compose/elasticsearch/scripts/data/data2.json 
b/docker/thirdparties/docker-compose/elasticsearch/scripts/data/data2.json
index 792200fcdec..9ab26a5684f 100755
--- a/docker/thirdparties/docker-compose/elasticsearch/scripts/data/data2.json
+++ b/docker/thirdparties/docker-compose/elasticsearch/scripts/data/data2.json
@@ -28,5 +28,9 @@
 {"name": "Andy", "age": 18},
 {"name": "Tim", "age": 28}
   ],
-  "message": ""
+  "message": "",
+  "c_user": [
+{"first": "John", "last":  "Smith"},
+{"first": "Alice", "last":  "White"}
+  ]
 }
diff --git 
a/docker/thirdparties/docker-compose/elasticsearch/scripts/data/data2_es6.json 
b/docker/thirdparties/docker

(doris) branch master updated: [feat](Nereids) Add support for slot pruning in functional dependencies (#37045)

2024-07-02 Thread morrysnow
This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
 new 40dbb5998df [feat](Nereids) Add support for slot pruning in functional 
dependencies (#37045)
40dbb5998df is described below

commit 40dbb5998df09bc002c8bd8f4c56c86d8c2c90ae
Author: 谢健 
AuthorDate: Tue Jul 2 17:21:20 2024 +0800

[feat](Nereids) Add support for slot pruning in functional dependencies 
(#37045)

Implement slot pruning functionality for functional dependencies to
optimize performance and resource utilization. This enhancement allows
for more efficient handling of dependencies by removing unnecessary
slots.
---
 .../apache/doris/nereids/properties/DataTrait.java |  2 +
 .../apache/doris/nereids/properties/FuncDeps.java  |  6 ++-
 .../doris/nereids/properties/FuncDepsDG.java   | 47 +
 .../rewrite/PushDownAggThroughJoinOnPkFk.java  |  7 +--
 .../nereids/trees/plans/logical/LogicalPlan.java   |  7 +++
 .../trees/plans/logical/LogicalProject.java|  3 --
 .../doris/nereids/util/ImmutableEqualSet.java  | 23 +
 .../doris/nereids/properties/FuncDepsDGTest.java   | 43 
 .../doris/nereids/util/ImmutableEqualSetTest.java  | 59 ++
 9 files changed, 190 insertions(+), 7 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/DataTrait.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/DataTrait.java
index 3a74f58b328..1d5210a1e6a 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/DataTrait.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/DataTrait.java
@@ -317,6 +317,8 @@ public class DataTrait {
 public void pruneSlots(Set outputSlots) {
 uniformSet.removeNotContain(outputSlots);
 uniqueSet.removeNotContain(outputSlots);
+equalSetBuilder.removeNotContain(outputSlots);
+fdDgBuilder.removeNotContain(outputSlots);
 }
 
 public void replace(Map replaceMap) {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/FuncDeps.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/FuncDeps.java
index be7b0853605..e637af8982c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/FuncDeps.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/FuncDeps.java
@@ -33,7 +33,7 @@ import java.util.stream.Collectors;
  * Function dependence items.
  */
 public class FuncDeps {
-class FuncDepsItem {
+static class FuncDepsItem {
 final Set determinants;
 final Set dependencies;
 
@@ -165,6 +165,10 @@ public class FuncDeps {
 && items.contains(new FuncDepsItem(dependency, dominate));
 }
 
+public Set getItems() {
+return items;
+}
+
 /**
  * find the determinants of dependencies
  */
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/FuncDepsDG.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/FuncDepsDG.java
index 1245762ee29..a6637f09768 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/FuncDepsDG.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/FuncDepsDG.java
@@ -150,6 +150,53 @@ public class FuncDepsDG {
 return new FuncDepsDG(ImmutableMap.copyOf(itemMap), 
ImmutableList.copyOf(dgItems));
 }
 
+public void removeNotContain(Set validSlot) {
+FuncDeps funcDeps = findValidFuncDeps(validSlot);
+dgItems.clear();
+itemMap.clear();
+for (FuncDeps.FuncDepsItem item : funcDeps.getItems()) {
+this.addDeps(item.determinants, item.dependencies);
+}
+}
+
+/**
+ * Finds all functional dependencies that are applicable to a given 
set of valid slots.
+ */
+public FuncDeps findValidFuncDeps(Set validSlot) {
+FuncDeps res = new FuncDeps();
+for (Entry, Integer> entry : itemMap.entrySet()) {
+if (validSlot.containsAll(entry.getKey())) {
+Set visited = new HashSet<>();
+Set children = new HashSet<>();
+DGItem dgItem = dgItems.get(entry.getValue());
+visited.add(dgItem);
+collectAllChildren(validSlot, dgItem, visited, children);
+for (DGItem child : children) {
+res.addFuncItems(dgItem.slots, child.slots);
+}
+}
+}
+return res;
+}
+
+/**
+ * Helper method to recursively collect all child nodes of a given 
root node
+ * that are valid according to the specified slots.
+ */
+  

(doris) branch branch-2.1 updated: [test](migrate) move test_hive_text_complex_type from p2 to p0 (#37007) (#37123)

2024-07-02 Thread ashingau
This is an automated email from the ASF dual-hosted git repository.

ashingau pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
 new cf86eb86473 [test](migrate) move test_hive_text_complex_type from p2 
to p0 (#37007) (#37123)
cf86eb86473 is described below

commit cf86eb864731e1e7d28e427ae4ddf6c14c2f0ed8
Author: Ashin Gau 
AuthorDate: Tue Jul 2 17:36:37 2024 +0800

[test](migrate) move test_hive_text_complex_type from p2 to p0 (#37007) 
(#37123)

bp: #37007
---
 .../hive_text_complex_type/create_table.hql|  27 +
 .../hive_text_complex_type/data.tar.gz | Bin 0 -> 560 bytes
 .../multi_catalog/hive_text_complex_type/run.sh|  12 
 .../hive_text_complex_type2/create_table.hql   |  21 +++
 .../hive_text_complex_type2/data.tar.gz| Bin 0 -> 366 bytes
 .../multi_catalog/hive_text_complex_type2/run.sh   |  12 
 .../hive_text_complex_type3/create_table.hql   |  24 
 .../hive_text_complex_type3/data.tar.gz| Bin 0 -> 977 bytes
 .../multi_catalog/hive_text_complex_type3/run.sh   |  12 
 .../create_table.hql   |  33 +++
 .../hive_text_complex_type_delimiter/data.tar.gz   | Bin 0 -> 568 bytes
 .../hive_text_complex_type_delimiter/run.sh|  12 
 .../create_table.hql   |  27 +
 .../hive_text_complex_type_delimiter2/data.tar.gz  | Bin 0 -> 376 bytes
 .../hive_text_complex_type_delimiter2/run.sh   |  12 
 .../create_table.hql   |  26 +
 .../hive_text_complex_type_delimiter3/data.tar.gz  | Bin 0 -> 978 bytes
 .../hive_text_complex_type_delimiter3/run.sh   |  12 
 .../parquet_predicate_table/create_table.hql   |  18 ++
 .../parquet_predicate_table/data.tar.gz| Bin 0 -> 828 bytes
 .../multi_catalog/parquet_predicate_table/run.sh   |  12 
 .../hive/test_hive_text_complex_type.out   |  65 +
 .../hive/test_hive_text_complex_type.groovy|  15 +++--
 23 files changed, 334 insertions(+), 6 deletions(-)

diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_text_complex_type/create_table.hql
 
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_text_complex_type/create_table.hql
new file mode 100644
index 000..3b20db98019
--- /dev/null
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_text_complex_type/create_table.hql
@@ -0,0 +1,27 @@
+CREATE DATABASE IF NOT EXISTS multi_catalog;
+USE multi_catalog;
+
+CREATE TABLE `multi_catalog.hive_text_complex_type`(
+  `column1` int, 
+  `column2` map, 
+  `column3` map, 
+  `column4` map, 
+  `column5` map, 
+  `column6` map, 
+  `column7` map, 
+  `column8` map, 
+  `column9` map, 
+  `column10` map, 
+  `column11` map, 
+  `column12` 
struct)
+ROW FORMAT SERDE 
+  'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' 
+STORED AS INPUTFORMAT 
+  'org.apache.hadoop.mapred.TextInputFormat' 
+OUTPUTFORMAT 
+  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
+LOCATION '/user/doris/suites/multi_catalog/hive_text_complex_type'
+TBLPROPERTIES (
+  'transient_lastDdlTime'='1690518015');
+
+msck repair table hive_text_complex_type;
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_text_complex_type/data.tar.gz
 
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_text_complex_type/data.tar.gz
new file mode 100644
index 000..dd8a3c6b068
Binary files /dev/null and 
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_text_complex_type/data.tar.gz
 differ
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_text_complex_type/run.sh
 
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_text_complex_type/run.sh
new file mode 100644
index 000..f3136eaa200
--- /dev/null
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_text_complex_type/run.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+set -x
+
+CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
+
+## mkdir and put data to hdfs
+cd "${CUR_DIR}" && rm -rf data/ && tar xzf data.tar.gz
+hadoop fs -mkdir -p /user/doris/suites/multi_catalog/
+hadoop fs -put "${CUR_DIR}"/data/* /user/doris/suites/multi_catalog/
+
+# create table
+hive -f "${CUR_DIR}/create_table.hql"
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_text_complex_type2/create_table.hql
 
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_text_complex_type2/create_table.hql
new file mode 100644
index 000..ac75375d950
--- /dev/null
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_text_complex_type2/create_table.h

(doris) branch branch-2.1 updated: [branch-2.1] avoid glog coredump when running with ASAN (#37134)

2024-07-02 Thread morningman
This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
 new f5d0cdeeb45 [branch-2.1] avoid glog coredump when running with ASAN 
(#37134)
f5d0cdeeb45 is described below

commit f5d0cdeeb45344386d90cfe51692b302124de6fb
Author: Mingyu Chen 
AuthorDate: Tue Jul 2 17:45:04 2024 +0800

[branch-2.1] avoid glog coredump when running with ASAN (#37134)

## Proposed changes

This is just a workround try avoid coredump like this:
```
#0 0x56414f0e8ed1 in __asan::CheckUnwind() crtstuff.c
#1 0x56414f1009a2 in __sanitizer::CheckFailed(char const*, int, char 
const*, unsigned long long, unsigned long long) crtstuff.c
#2 0x56414f0ecbf3 in 
__asan::AsanThread::GetStackFrameAccessByAddr(unsigned long, 
__asan::AsanThread::StackFrameAccess*) crtstuff.c
#3 0x56414f050d87 in 
__asan::AddressDescription::AddressDescription(unsigned long, unsigned long, 
bool) crtstuff.c
#4 0x56414f052a73 in __asan::ErrorGeneric::ErrorGeneric(unsigned int, 
unsigned long, unsigned long, unsigned long, unsigned long, bool, unsigned 
long) crtstuff.c
#5 0x56414f0e6a9e in __asan::ReportGenericError(unsigned long, unsigned 
long, unsigned long, unsigned long, bool, unsigned long, unsigned int, bool) 
crtstuff.c
#6 0x56414f066885 in gmtime_r 
(/mnt/hdd01/ci/branch21-deploy/be/lib/doris_be+0x17ef3885) (BuildId: 
f58eb5e327529636)
#7 0x564177940521 in google::LogMessage::Init(char const*, int, int, 
void (google::LogMessage::*)()) crtstuff.c
#8 0x564151de36fc in doris::Status 
doris::ThriftRpcHelper::rpc(std::__cxx11::basic_string, std::allocator> const&, 
int, std::function&)>, int) 
/home/zcp/repo_center/doris_branch-2.1/doris/be/src/util/thrift_rpc_helper.cpp:76:13
#9 0x56417603cda7 in 
doris::vectorized::VRowDistribution::automatic_create_partition() 
/home/zcp/repo_center/doris_branch-2.1/doris/be/src/vec/sink/vrow_distribution.cpp:99:5
#10 0x56417614cffa in 
doris::vectorized::VTabletWriter::_send_new_partition_batch() 
/home/zcp/repo_center/doris_branch-2.1/doris/be/src/vec/sink/writer/vtablet_writer.cpp:1346:9

```
---
 be/src/util/thrift_rpc_helper.cpp | 16 
 1 file changed, 16 insertions(+)

diff --git a/be/src/util/thrift_rpc_helper.cpp 
b/be/src/util/thrift_rpc_helper.cpp
index 4410a27ee44..7904fc0b7b3 100644
--- a/be/src/util/thrift_rpc_helper.cpp
+++ b/be/src/util/thrift_rpc_helper.cpp
@@ -73,22 +73,38 @@ Status ThriftRpcHelper::rpc(const std::string& ip, const 
int32_t port,
 try {
 callback(client);
 } catch (apache::thrift::transport::TTransportException& e) {
+#ifndef ADDRESS_SANITIZER
 LOG(WARNING) << "retrying call frontend service after "
  << config::thrift_client_retry_interval_ms << " ms, 
address=" << address
  << ", reason=" << e.what();
+#else
+std::cerr << "retrying call frontend service after "
+  << config::thrift_client_retry_interval_ms << " ms, 
address=" << address
+  << ", reason=" << e.what() << std::endl;
+#endif
 std::this_thread::sleep_for(
 
std::chrono::milliseconds(config::thrift_client_retry_interval_ms));
 status = client.reopen(timeout_ms);
 if (!status.ok()) {
+#ifndef ADDRESS_SANITIZER
 LOG(WARNING) << "client reopen failed. address=" << address
  << ", status=" << status;
+#else
+std::cerr << "client reopen failed. address=" << address << ", 
status=" << status
+  << std::endl;
+#endif
 return status;
 }
 callback(client);
 }
 } catch (apache::thrift::TException& e) {
+#ifndef ADDRESS_SANITIZER
 LOG(WARNING) << "call frontend service failed, address=" << address
  << ", reason=" << e.what();
+#else
+std::cerr << "call frontend service failed, address=" << address << ", 
reason=" << e.what()
+  << std::endl;
+#endif
 std::this_thread::sleep_for(
 
std::chrono::milliseconds(config::thrift_client_retry_interval_ms * 2));
 // just reopen to disable this connection


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) branch branch-2.1 updated: [fix](compile) fix compile failed on MacOS due to ambiguous std::abs (#37136)

2024-07-02 Thread morrysnow
This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
 new 239bc1a7e0d [fix](compile) fix compile failed on MacOS due to 
ambiguous std::abs (#37136)
239bc1a7e0d is described below

commit 239bc1a7e0d8f07ed3cd00f2eea7da143185c40f
Author: camby 
AuthorDate: Tue Jul 2 17:45:33 2024 +0800

[fix](compile) fix compile failed on MacOS due to ambiguous std::abs 
(#37136)

cherry-pick #35125 to branch-2.1

Co-authored-by: morrySnow <101034200+morrys...@users.noreply.github.com>
---
 be/src/vec/functions/function_string.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/be/src/vec/functions/function_string.h 
b/be/src/vec/functions/function_string.h
index 51e1d624062..4096bcca6d3 100644
--- a/be/src/vec/functions/function_string.h
+++ b/be/src/vec/functions/function_string.h
@@ -3018,7 +3018,7 @@ StringRef do_money_format(FunctionContext* context, 
UInt32 scale, T int_value, T
 auto multiplier = common::exp10_i128(std::abs(static_cast(scale - 
3)));
 // do devide first to avoid overflow
 // after round frac_value will be positive by design.
-frac_value = std::abs(frac_value / multiplier) + 5;
+frac_value = std::abs(static_cast(frac_value / multiplier)) + 5;
 frac_value /= 10;
 } else if (scale < 2) {
 DCHECK(frac_value < 100);
@@ -3059,8 +3059,8 @@ StringRef do_money_format(FunctionContext* context, 
UInt32 scale, T int_value, T
 
 memcpy(result_data + (append_sign_manually ? 1 : 0), p, integer_str_len);
 *(result_data + whole_decimal_str_len - 3) = '.';
-*(result_data + whole_decimal_str_len - 2) = '0' + std::abs(frac_value / 
10);
-*(result_data + whole_decimal_str_len - 1) = '0' + std::abs(frac_value % 
10);
+*(result_data + whole_decimal_str_len - 2) = '0' + 
std::abs(static_cast(frac_value / 10));
+*(result_data + whole_decimal_str_len - 1) = '0' + 
std::abs(static_cast(frac_value % 10));
 return result;
 };
 


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) branch branch-2.1 updated: [fix](ES Catalog)Add array types support in esquery function (#36936) (#37054)

2024-07-02 Thread morningman
This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
 new b63e6ac3504 [fix](ES Catalog)Add array types support in esquery 
function (#36936) (#37054)
b63e6ac3504 is described below

commit b63e6ac35049b07a6a3a74f10fd17d9241dd780b
Author: qiye 
AuthorDate: Tue Jul 2 17:48:35 2024 +0800

[fix](ES Catalog)Add array types support in esquery function (#36936) 
(#37054)

backport #36936
---
 .../expressions/functions/scalar/EsQuery.java  |  4 ++-
 gensrc/script/doris_builtins_functions.py  |  2 +-
 .../data/external_table_p0/es/test_es_query.out| 42 +++---
 .../external_table_p0/es/test_es_query.groovy  | 10 --
 4 files changed, 50 insertions(+), 8 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EsQuery.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EsQuery.java
index a5fbd339c9f..28a6988bca4 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EsQuery.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EsQuery.java
@@ -25,6 +25,7 @@ import 
org.apache.doris.nereids.trees.expressions.shape.BinaryExpression;
 import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
 import org.apache.doris.nereids.types.BooleanType;
 import org.apache.doris.nereids.types.VarcharType;
+import org.apache.doris.nereids.types.coercion.AnyDataType;
 
 import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableList;
@@ -38,7 +39,8 @@ public class EsQuery extends ScalarFunction
 implements BinaryExpression, ExplicitlyCastableSignature, 
PropagateNullable {
 
 public static final List SIGNATURES = ImmutableList.of(
-
FunctionSignature.ret(BooleanType.INSTANCE).args(VarcharType.SYSTEM_DEFAULT, 
VarcharType.SYSTEM_DEFAULT)
+
FunctionSignature.ret(BooleanType.INSTANCE).args(AnyDataType.INSTANCE_WITHOUT_INDEX,
+VarcharType.SYSTEM_DEFAULT)
 );
 
 /**
diff --git a/gensrc/script/doris_builtins_functions.py 
b/gensrc/script/doris_builtins_functions.py
index b7912d08904..df50d392f20 100644
--- a/gensrc/script/doris_builtins_functions.py
+++ b/gensrc/script/doris_builtins_functions.py
@@ -1544,7 +1544,7 @@ visible_functions = {
 [['esquery'], 'BOOLEAN', ['DATEV2', 'VARCHAR'], ''],
 [['esquery'], 'BOOLEAN', ['DATETIMEV2', 'VARCHAR'], ''],
 [['esquery'], 'BOOLEAN', ['TIMEV2', 'VARCHAR'], ''],
-[['esquery'], 'BOOLEAN', ['ARRAY', 'VARCHAR'], ''],
+[['esquery'], 'BOOLEAN', ['ARRAY', 'VARCHAR'], '', ['T']],
 [['esquery'], 'BOOLEAN', ['MAP', 'VARCHAR'], ''],
 [['esquery'], 'BOOLEAN', ['STRING', 'VARCHAR'], ''],
 [['esquery'], 'BOOLEAN', ['VARIANT', 'VARCHAR'], ''],
diff --git a/regression-test/data/external_table_p0/es/test_es_query.out 
b/regression-test/data/external_table_p0/es/test_es_query.out
index 605e2f1aa93..d751719389f 100644
--- a/regression-test/data/external_table_p0/es/test_es_query.out
+++ b/regression-test/data/external_table_p0/es/test_es_query.out
@@ -1,9 +1,9 @@
 -- This file is automatically generated. You should know what you did if you 
want to edit this
 -- !sql01 --
-["2020-01-01", "2020-01-02"]   [-1, 0, 1, 2]   [0, 1, 2, 3]["d", "e", "f"] 
[128, 129, -129, -130]  ["192.168.0.1", "127.0.0.1"]string1 [1, 2, 3, 4]
2022-08-08  2022-08-08T12:10:10 text#1  ["2020-01-01", "2020-01-02"]
3.14[1, 2, 3, 4][1, 1.1, 1.2, 1.3]  [1, 2, 3, 4]["a", "b", "c"] 
["{"name":"Andy","age":18}", "{"name":"Tim","age":28}"] 2022-08-08T12:10:10 
2022-08-08T12:10:10 2022-08-08T20:10:10 [1, -2, -3, 4]  [1, 0, 1, 1]
[32768, 32769, -32769, -32770]  \N
+["2020-01-01 12:00:00", "2020-01-02 13:01:01"] [-1, 0, 1, 2]   [0, 1, 2, 3]
["d", "e", "f"] [128, 129, -129, -130]  ["192.168.0.1", "127.0.0.1"]string1 
[1, 2, 3, 4]2022-08-08  2022-08-08T12:10:10 text#1  ["2020-01-01", 
"2020-01-02"]3.14[1, 2, 3, 4][1, 1.1, 1.2, 1.3]  [1, 2, 3, 4]   
 ["a", "b", "c"] ["{"name":"Andy","age":18}", "{"name":"Tim","age":28}"] 
2022-08-08T12:10:10 2022-08-08T12:10:10 2022-08-08T20:10:10 [1, -2, 
-3, 4]  [1, 0, 1, 1][32768, 32769, -32769, -32770]  \N
 
 -- !sql02 --
-["2020-01-01", "2020-01-02"]   [-1, 0, 1, 2]   [0, 1, 2, 3]["d", "e", "f"] 
[128, 129, -129, -130]  ["192.168.0.1", "127.0.0.1"]string1 [1, 2, 3, 4]
2022-08-08  2022-08-08T12:10:10 text#1  ["2020-01-01", "2020-01-02"]
3.14[1, 2, 3, 4][1, 1.1, 1.2, 1.3]  [1, 2, 3, 4]["a", "b", "c"] 
["{"name":"Andy","age":18}", "{"name":"Tim","age":28}"] 2022-08-08T12:1

(doris) branch branch-2.0 updated: [fix](ES Catalog)Add array types support in esquery function (#36936) (#37056)

2024-07-02 Thread morningman
This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
 new fd754d757f2 [fix](ES Catalog)Add array types support in esquery 
function (#36936) (#37056)
fd754d757f2 is described below

commit fd754d757f20ff07d6c103f6fc8d0179183e1762
Author: qiye 
AuthorDate: Tue Jul 2 17:48:59 2024 +0800

[fix](ES Catalog)Add array types support in esquery function (#36936) 
(#37056)

backport #36936
---
 .../expressions/functions/scalar/EsQuery.java  |  4 ++-
 gensrc/script/doris_builtins_functions.py  |  2 +-
 .../data/external_table_p0/es/test_es_query.out| 42 +++---
 .../external_table_p0/es/test_es_query.groovy  | 10 --
 4 files changed, 50 insertions(+), 8 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EsQuery.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EsQuery.java
index a5fbd339c9f..28a6988bca4 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EsQuery.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EsQuery.java
@@ -25,6 +25,7 @@ import 
org.apache.doris.nereids.trees.expressions.shape.BinaryExpression;
 import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
 import org.apache.doris.nereids.types.BooleanType;
 import org.apache.doris.nereids.types.VarcharType;
+import org.apache.doris.nereids.types.coercion.AnyDataType;
 
 import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableList;
@@ -38,7 +39,8 @@ public class EsQuery extends ScalarFunction
 implements BinaryExpression, ExplicitlyCastableSignature, 
PropagateNullable {
 
 public static final List SIGNATURES = ImmutableList.of(
-
FunctionSignature.ret(BooleanType.INSTANCE).args(VarcharType.SYSTEM_DEFAULT, 
VarcharType.SYSTEM_DEFAULT)
+
FunctionSignature.ret(BooleanType.INSTANCE).args(AnyDataType.INSTANCE_WITHOUT_INDEX,
+VarcharType.SYSTEM_DEFAULT)
 );
 
 /**
diff --git a/gensrc/script/doris_builtins_functions.py 
b/gensrc/script/doris_builtins_functions.py
index ea09c5473c1..4e07a10dc21 100644
--- a/gensrc/script/doris_builtins_functions.py
+++ b/gensrc/script/doris_builtins_functions.py
@@ -1488,7 +1488,7 @@ visible_functions = {
 [['esquery'], 'BOOLEAN', ['DATEV2', 'VARCHAR'], ''],
 [['esquery'], 'BOOLEAN', ['DATETIMEV2', 'VARCHAR'], ''],
 [['esquery'], 'BOOLEAN', ['TIMEV2', 'VARCHAR'], ''],
-[['esquery'], 'BOOLEAN', ['ARRAY', 'VARCHAR'], ''],
+[['esquery'], 'BOOLEAN', ['ARRAY', 'VARCHAR'], '', ['T']],
 [['esquery'], 'BOOLEAN', ['MAP', 'VARCHAR'], ''],
 [['esquery'], 'BOOLEAN', ['STRING', 'VARCHAR'], ''],
 [['esquery'], 'BOOLEAN', ['VARIANT', 'VARCHAR'], ''],
diff --git a/regression-test/data/external_table_p0/es/test_es_query.out 
b/regression-test/data/external_table_p0/es/test_es_query.out
index 605e2f1aa93..d751719389f 100644
--- a/regression-test/data/external_table_p0/es/test_es_query.out
+++ b/regression-test/data/external_table_p0/es/test_es_query.out
@@ -1,9 +1,9 @@
 -- This file is automatically generated. You should know what you did if you 
want to edit this
 -- !sql01 --
-["2020-01-01", "2020-01-02"]   [-1, 0, 1, 2]   [0, 1, 2, 3]["d", "e", "f"] 
[128, 129, -129, -130]  ["192.168.0.1", "127.0.0.1"]string1 [1, 2, 3, 4]
2022-08-08  2022-08-08T12:10:10 text#1  ["2020-01-01", "2020-01-02"]
3.14[1, 2, 3, 4][1, 1.1, 1.2, 1.3]  [1, 2, 3, 4]["a", "b", "c"] 
["{"name":"Andy","age":18}", "{"name":"Tim","age":28}"] 2022-08-08T12:10:10 
2022-08-08T12:10:10 2022-08-08T20:10:10 [1, -2, -3, 4]  [1, 0, 1, 1]
[32768, 32769, -32769, -32770]  \N
+["2020-01-01 12:00:00", "2020-01-02 13:01:01"] [-1, 0, 1, 2]   [0, 1, 2, 3]
["d", "e", "f"] [128, 129, -129, -130]  ["192.168.0.1", "127.0.0.1"]string1 
[1, 2, 3, 4]2022-08-08  2022-08-08T12:10:10 text#1  ["2020-01-01", 
"2020-01-02"]3.14[1, 2, 3, 4][1, 1.1, 1.2, 1.3]  [1, 2, 3, 4]   
 ["a", "b", "c"] ["{"name":"Andy","age":18}", "{"name":"Tim","age":28}"] 
2022-08-08T12:10:10 2022-08-08T12:10:10 2022-08-08T20:10:10 [1, -2, 
-3, 4]  [1, 0, 1, 1][32768, 32769, -32769, -32770]  \N
 
 -- !sql02 --
-["2020-01-01", "2020-01-02"]   [-1, 0, 1, 2]   [0, 1, 2, 3]["d", "e", "f"] 
[128, 129, -129, -130]  ["192.168.0.1", "127.0.0.1"]string1 [1, 2, 3, 4]
2022-08-08  2022-08-08T12:10:10 text#1  ["2020-01-01", "2020-01-02"]
3.14[1, 2, 3, 4][1, 1.1, 1.2, 1.3]  [1, 2, 3, 4]["a", "b", "c"] 
["{"name":"Andy","age":18}", "{"name":"Tim","age":28}"] 2022-08-08T12:1

(doris) branch master updated: [env](compile) compile failed caused by (#37068)

2024-07-02 Thread panxiaolei
This is an automated email from the ASF dual-hosted git repository.

panxiaolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
 new 2469f1fbd59 [env](compile)  compile failed  caused by
(#37068)
2469f1fbd59 is described below

commit 2469f1fbd59c9da5fb0c17827e4458874e60385c
Author: Mryange <59914473+mrya...@users.noreply.github.com>
AuthorDate: Tue Jul 2 18:23:24 2024 +0800

[env](compile)  compile failed  caused by(#37068)

compile failed  caused by  
---
 be/src/util/simd/vstring_function.h | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/be/src/util/simd/vstring_function.h 
b/be/src/util/simd/vstring_function.h
index 4fff59a01df..579da50d2df 100644
--- a/be/src/util/simd/vstring_function.h
+++ b/be/src/util/simd/vstring_function.h
@@ -17,7 +17,11 @@
 
 #pragma once
 
+#ifdef __AVX2__
 #include 
+
+#include "gutil/macros.h"
+#endif
 #include 
 
 #include 
@@ -112,7 +116,7 @@ public:
 
 if constexpr (trim_single) {
 const auto ch = remove_str.data[0];
-#if defined(__AVX2__) || defined(__aarch64__)
+#if defined(__AVX2__)
 constexpr auto AVX2_BYTES = sizeof(__m256i);
 const auto size = end - begin;
 const auto* const avx2_begin = end - size / AVX2_BYTES * 
AVX2_BYTES;
@@ -153,7 +157,7 @@ public:
 
 if constexpr (trim_single) {
 const auto ch = remove_str.data[0];
-#if defined(__AVX2__) || defined(__aarch64__)
+#if defined(__AVX2__)
 constexpr auto AVX2_BYTES = sizeof(__m256i);
 const auto size = end - begin;
 const auto* const avx2_end = begin + size / AVX2_BYTES * 
AVX2_BYTES;


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) branch test_0702 created (now f56c55d9630)

2024-07-02 Thread panxiaolei
This is an automated email from the ASF dual-hosted git repository.

panxiaolei pushed a change to branch test_0702
in repository https://gitbox.apache.org/repos/asf/doris.git


  at f56c55d9630 do not cancel query when rf sync filter size meet eof

No new revisions were added by this update.


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) branch master updated: [improvement](statistics)Support show column partition update rows info. (#37124)

2024-07-02 Thread lijibing
This is an automated email from the ASF dual-hosted git repository.

lijibing pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
 new 80f6583e692 [improvement](statistics)Support show column partition 
update rows info. (#37124)
80f6583e692 is described below

commit 80f6583e692bebb82c26d6c2ad125b41138fe3d4
Author: Jibing-Li <64681310+jibing...@users.noreply.github.com>
AuthorDate: Tue Jul 2 19:00:44 2024 +0800

[improvement](statistics)Support show column partition update rows info. 
(#37124)

Support show column update rows of each partition. This may help when
investigate online issues.
```
mysql> show table stats part2 partition(p1, p2) (id, colint);
++-++--+
| index_name | column_name | partition_name | updated_rows |
++-++--+
| part2  | id  | p1 | 6|
| part2  | id  | p2 | 6|
| part2  | colint  | p1 | 6|
| part2  | colint  | p2 | 6|
++-++--+
4 rows in set (0.01 sec)
```
---
 fe/fe-core/src/main/cup/sql_parser.cup |  4 +-
 .../apache/doris/analysis/ShowTableStatsStmt.java  | 74 --
 .../suites/statistics/test_partition_stats.groovy  | 45 -
 3 files changed, 116 insertions(+), 7 deletions(-)

diff --git a/fe/fe-core/src/main/cup/sql_parser.cup 
b/fe/fe-core/src/main/cup/sql_parser.cup
index d423475267e..336252bc0b9 100644
--- a/fe/fe-core/src/main/cup/sql_parser.cup
+++ b/fe/fe-core/src/main/cup/sql_parser.cup
@@ -4597,9 +4597,9 @@ show_param ::=
 RESULT = new ShowSyncJobStmt(dbName);
 :}
 /* show table stats */
-| KW_TABLE opt_cached:cached KW_STATS table_name:tbl 
opt_partition_names:partitionNames
+| KW_TABLE opt_cached:cached KW_STATS table_name:tbl 
opt_partition_names:partitionNames opt_col_list:cols
 {:
-RESULT = new ShowTableStatsStmt(tbl, partitionNames, cached);
+RESULT = new ShowTableStatsStmt(tbl, cols, partitionNames, cached);
 :}
 /* show column stats */
 | KW_COLUMN opt_cached:cached KW_STATS table_name:tbl opt_col_list:cols 
opt_partition_names:partitionNames
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java
index 9d025695c7b..5edfbe05886 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java
@@ -26,12 +26,14 @@ import org.apache.doris.catalog.ScalarType;
 import org.apache.doris.catalog.TableIf;
 import org.apache.doris.common.ErrorCode;
 import org.apache.doris.common.ErrorReport;
+import org.apache.doris.common.Pair;
 import org.apache.doris.common.UserException;
 import org.apache.doris.datasource.CatalogIf;
 import org.apache.doris.mysql.privilege.PrivPredicate;
 import org.apache.doris.qe.ConnectContext;
 import org.apache.doris.qe.ShowResultSet;
 import org.apache.doris.qe.ShowResultSetMetaData;
+import org.apache.doris.statistics.ColStatsMeta;
 import org.apache.doris.statistics.TableStatsMeta;
 
 import com.google.common.collect.ImmutableList;
@@ -42,7 +44,10 @@ import java.time.LocalDateTime;
 import java.time.format.DateTimeFormatter;
 import java.util.ArrayList;
 import java.util.Collection;
+import java.util.HashSet;
 import java.util.List;
+import java.util.Map;
+import java.util.Set;
 
 public class ShowTableStatsStmt extends ShowStmt {
 
@@ -65,15 +70,25 @@ public class ShowTableStatsStmt extends ShowStmt {
 .add("row_count")
 .build();
 
-private final TableName tableName;
+private static final ImmutableList COLUMN_PARTITION_TITLE_NAMES =
+new ImmutableList.Builder()
+.add("index_name")
+.add("column_name")
+.add("partition_name")
+.add("updated_rows")
+.build();
 
+private final TableName tableName;
+private final List columnNames;
 private final PartitionNames partitionNames;
 private final boolean cached;
 
 private TableIf table;
 
-public ShowTableStatsStmt(TableName tableName, PartitionNames 
partitionNames, boolean cached) {
+public ShowTableStatsStmt(TableName tableName, List columnNames,
+  PartitionNames partitionNames, boolean cached) {
 this.tableName = tableName;
+this.columnNames = columnNames;
 this.partitionNames = partitionNames;
 this.cached = cached;
 }
@@ -89,6 +104,9 @@ public class ShowTableStatsStmt extends ShowStmt {
 if (p

(doris) branch branch-2.1 updated: [pick]reset memtable flush thread num (#37092)

2024-07-02 Thread wangbo
This is an automated email from the ASF dual-hosted git repository.

wangbo pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
 new f5572ac732e [pick]reset memtable flush thread num (#37092)
f5572ac732e is described below

commit f5572ac732e1d40a2f2301cd270b3be854377c3f
Author: wangbo 
AuthorDate: Tue Jul 2 19:20:17 2024 +0800

[pick]reset memtable flush thread num (#37092)

## Proposed changes

pick #37028
---
 be/src/common/config.cpp |  2 +
 be/src/common/config.h   |  3 ++
 be/src/olap/delta_writer_v2.cpp  |  2 +-
 be/src/olap/storage_engine.cpp   |  1 +
 be/src/olap/storage_engine.h |  4 ++
 be/src/runtime/fragment_mgr.cpp  |  6 +--
 be/src/runtime/query_context.cpp |  6 +--
 be/src/runtime/query_context.h   |  4 +-
 be/src/runtime/workload_group/workload_group.cpp | 47 +++-
 be/src/runtime/workload_group/workload_group.h   |  2 +-
 be/src/vec/sink/writer/async_result_writer.cpp   | 27 --
 11 files changed, 55 insertions(+), 49 deletions(-)

diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index 910bf69609e..563e4750165 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -671,6 +671,8 @@ DEFINE_Int32(flush_thread_num_per_store, "6");
 // number of thread for flushing memtable per store, for high priority load 
task
 DEFINE_Int32(high_priority_flush_thread_num_per_store, "6");
 
+DEFINE_Int32(wg_flush_thread_num_per_store, "6");
+
 // config for tablet meta checkpoint
 DEFINE_mInt32(tablet_meta_checkpoint_min_new_rowsets_num, "10");
 DEFINE_mInt32(tablet_meta_checkpoint_min_interval_secs, "600");
diff --git a/be/src/common/config.h b/be/src/common/config.h
index 2d0dc128a2a..21325a0f011 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -726,6 +726,9 @@ DECLARE_Int32(flush_thread_num_per_store);
 // number of thread for flushing memtable per store, for high priority load 
task
 DECLARE_Int32(high_priority_flush_thread_num_per_store);
 
+// workload group's flush thread num
+DECLARE_Int32(wg_flush_thread_num_per_store);
+
 // config for tablet meta checkpoint
 DECLARE_mInt32(tablet_meta_checkpoint_min_new_rowsets_num);
 DECLARE_mInt32(tablet_meta_checkpoint_min_interval_secs);
diff --git a/be/src/olap/delta_writer_v2.cpp b/be/src/olap/delta_writer_v2.cpp
index 5cfc260d1b5..378728f025c 100644
--- a/be/src/olap/delta_writer_v2.cpp
+++ b/be/src/olap/delta_writer_v2.cpp
@@ -128,7 +128,7 @@ Status DeltaWriterV2::init() {
 RETURN_IF_ERROR(_rowset_writer->init(context));
 ThreadPool* wg_thread_pool_ptr = nullptr;
 if (_state->get_query_ctx()) {
-wg_thread_pool_ptr = 
_state->get_query_ctx()->get_non_pipe_exec_thread_pool();
+wg_thread_pool_ptr = 
_state->get_query_ctx()->get_memtable_flush_pool();
 }
 RETURN_IF_ERROR(_memtable_writer->init(_rowset_writer, _tablet_schema, 
_partial_update_info,
wg_thread_pool_ptr,
diff --git a/be/src/olap/storage_engine.cpp b/be/src/olap/storage_engine.cpp
index b838af570a2..91c297b1960 100644
--- a/be/src/olap/storage_engine.cpp
+++ b/be/src/olap/storage_engine.cpp
@@ -181,6 +181,7 @@ Status StorageEngine::_open() {
 RETURN_NOT_OK_STATUS_WITH_WARN(_check_file_descriptor_number(), "check fd 
number failed");
 
 auto dirs = get_stores();
+_disk_num = dirs.size();
 RETURN_IF_ERROR(load_data_dirs(dirs));
 
 _memtable_flush_executor.reset(new MemTableFlushExecutor());
diff --git a/be/src/olap/storage_engine.h b/be/src/olap/storage_engine.h
index f2b5f421670..9dc18dfb276 100644
--- a/be/src/olap/storage_engine.h
+++ b/be/src/olap/storage_engine.h
@@ -224,6 +224,8 @@ public:
 
 std::set get_broken_paths() { return _broken_paths; }
 
+int get_disk_num() { return _disk_num; }
+
 private:
 // Instance should be inited from `static open()`
 // MUST NOT be called in other circumstances.
@@ -469,6 +471,8 @@ private:
 
 std::unique_ptr _create_tablet_idx_lru_cache;
 
+int _disk_num {-1};
+
 DISALLOW_COPY_AND_ASSIGN(StorageEngine);
 };
 
diff --git a/be/src/runtime/fragment_mgr.cpp b/be/src/runtime/fragment_mgr.cpp
index 08de61f8931..bd5308aeba1 100644
--- a/be/src/runtime/fragment_mgr.cpp
+++ b/be/src/runtime/fragment_mgr.cpp
@@ -775,11 +775,7 @@ Status FragmentMgr::exec_plan_fragment(const 
TExecPlanFragmentParams& params,
 std::make_pair(params.params.fragment_instance_id, 
fragment_executor));
 }
 
-auto* current_thread_pool = query_ctx->get_non_pipe_exec_thread_pool();
-if (!current_thread_pool) {
-current_thread_pool = _thread_pool.get();
-}
-auto st = current_thread_pool->submit_func([this, fragment_executor, cb]() 
{
+auto st = _thread_poo

(doris) branch branch-2.1 updated: [enhancement](nereids) speedup sql cache with variable (#37090) (#37119)

2024-07-02 Thread morrysnow
This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
 new 42f4271e9df [enhancement](nereids) speedup sql cache with variable 
(#37090) (#37119)
42f4271e9df is described below

commit 42f4271e9dfb5c5d912e0d9335167271cdd831b5
Author: 924060929 <924060...@qq.com>
AuthorDate: Tue Jul 2 19:25:22 2024 +0800

[enhancement](nereids) speedup sql cache with variable (#37090) (#37119)

cherry pick from #37090
---
 .../doris/common/NereidsSqlCacheManager.java   | 49 +++-
 .../org/apache/doris/nereids/NereidsPlanner.java   | 79 +++
 .../org/apache/doris/nereids/SqlCacheContext.java  | 88 +++---
 .../nereids/trees/plans/ComputeResultSet.java  | 55 ++
 .../plans/physical/PhysicalEmptyRelation.java  | 39 +-
 .../plans/physical/PhysicalOneRowRelation.java | 49 +++-
 .../trees/plans/physical/PhysicalResultSink.java   | 18 -
 .../trees/plans/physical/PhysicalSqlCache.java | 11 ++-
 .../java/org/apache/doris/qe/StmtExecutor.java |  8 +-
 .../cache/parse_sql_from_sql_cache.groovy  | 30 +---
 10 files changed, 276 insertions(+), 150 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/common/NereidsSqlCacheManager.java 
b/fe/fe-core/src/main/java/org/apache/doris/common/NereidsSqlCacheManager.java
index cf6280650f0..cbc3c173af6 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/common/NereidsSqlCacheManager.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/common/NereidsSqlCacheManager.java
@@ -48,6 +48,7 @@ import org.apache.doris.nereids.trees.plans.Plan;
 import org.apache.doris.nereids.trees.plans.RelationId;
 import org.apache.doris.nereids.trees.plans.logical.LogicalEmptyRelation;
 import org.apache.doris.nereids.trees.plans.logical.LogicalSqlCache;
+import org.apache.doris.nereids.util.Utils;
 import org.apache.doris.proto.InternalService;
 import org.apache.doris.proto.Types.PUniqueId;
 import org.apache.doris.qe.ConnectContext;
@@ -58,6 +59,7 @@ import org.apache.doris.qe.cache.SqlCache;
 import com.github.benmanes.caffeine.cache.Cache;
 import com.github.benmanes.caffeine.cache.Caffeine;
 import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Lists;
 import org.apache.commons.collections.CollectionUtils;
 
 import java.lang.reflect.Field;
@@ -123,16 +125,14 @@ public class NereidsSqlCacheManager {
 SqlCacheContext sqlCacheContext = sqlCacheContextOpt.get();
 UserIdentity currentUserIdentity = 
connectContext.getCurrentUserIdentity();
 String key = currentUserIdentity.toString() + ":" + sql.trim();
-if ((sqlCaches.getIfPresent(key) == null) && 
sqlCacheContext.getOrComputeCacheKeyMd5() != null
+if (sqlCaches.getIfPresent(key) == null && 
sqlCacheContext.getOrComputeCacheKeyMd5() != null
 && sqlCacheContext.getResultSetInFe().isPresent()) {
 sqlCaches.put(key, sqlCacheContext);
 }
 }
 
-/** tryAddCache */
-public void tryAddCache(
-ConnectContext connectContext, String sql,
-CacheAnalyzer analyzer, boolean currentMissParseSqlFromSqlCache) {
+/** tryAddBeCache */
+public void tryAddBeCache(ConnectContext connectContext, String sql, 
CacheAnalyzer analyzer) {
 Optional sqlCacheContextOpt = 
connectContext.getStatementContext().getSqlCacheContext();
 if (!sqlCacheContextOpt.isPresent()) {
 return;
@@ -143,8 +143,7 @@ public class NereidsSqlCacheManager {
 SqlCacheContext sqlCacheContext = sqlCacheContextOpt.get();
 UserIdentity currentUserIdentity = 
connectContext.getCurrentUserIdentity();
 String key = currentUserIdentity.toString() + ":" + sql.trim();
-if ((currentMissParseSqlFromSqlCache || sqlCaches.getIfPresent(key) == 
null)
-&& sqlCacheContext.getOrComputeCacheKeyMd5() != null) {
+if (sqlCaches.getIfPresent(key) == null && 
sqlCacheContext.getOrComputeCacheKeyMd5() != null) {
 SqlCache cache = (SqlCache) analyzer.getCache();
 sqlCacheContext.setSumOfPartitionNum(cache.getSumOfPartitionNum());
 sqlCacheContext.setLatestPartitionId(cache.getLatestId());
@@ -182,9 +181,6 @@ public class NereidsSqlCacheManager {
 if (viewsChanged(env, sqlCacheContext)) {
 return invalidateCache(key);
 }
-if (usedVariablesChanged(sqlCacheContext)) {
-return invalidateCache(key);
-}
 
 LogicalEmptyRelation whateverPlan = new LogicalEmptyRelation(new 
RelationId(0), ImmutableList.of());
 if (nondeterministicFunctionChanged(whateverPlan, connectContext, 
sqlCacheContext)) {
@@ -201,7 +197,10 @@ public class NereidsSqlCacheManager {
 
 try {
 Opt

(doris) branch master updated: [fix](Nereids) normalize aggregate should not push down lambda's param (#37109)

2024-07-02 Thread morrysnow
This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
 new cce5d5b6a21 [fix](Nereids) normalize aggregate should not push down 
lambda's param (#37109)
cce5d5b6a21 is described below

commit cce5d5b6a216afab2ac9a20d3d753174c2a8e012
Author: morrySnow <101034200+morrys...@users.noreply.github.com>
AuthorDate: Tue Jul 2 19:36:59 2024 +0800

[fix](Nereids) normalize aggregate should not push down lambda's param 
(#37109)

ArrayItemSlot should not be inputSlot
---
 .../java/org/apache/doris/nereids/trees/expressions/Expression.java | 4 +++-
 .../normalize_aggregate/normalize_aggregate_test.out| 3 +++
 .../normalize_aggregate/normalize_aggregate_test.groovy | 6 --
 3 files changed, 10 insertions(+), 3 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Expression.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Expression.java
index f6c7cbdb66a..d7f400955c0 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Expression.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Expression.java
@@ -23,6 +23,7 @@ import org.apache.doris.nereids.analyzer.Unbound;
 import org.apache.doris.nereids.analyzer.UnboundVariable;
 import org.apache.doris.nereids.exceptions.AnalysisException;
 import org.apache.doris.nereids.trees.AbstractTreeNode;
+import 
org.apache.doris.nereids.trees.expressions.ArrayItemReference.ArrayItemSlot;
 import org.apache.doris.nereids.trees.expressions.functions.ExpressionTrait;
 import org.apache.doris.nereids.trees.expressions.functions.Nondeterministic;
 import 
org.apache.doris.nereids.trees.expressions.functions.agg.AggregateFunction;
@@ -67,7 +68,8 @@ public abstract class Expression extends 
AbstractTreeNode implements
 private final boolean inferred;
 private final boolean hasUnbound;
 private final boolean compareWidthAndDepth;
-private final Supplier> inputSlots = Suppliers.memoize(() -> 
collect(Slot.class::isInstance));
+private final Supplier> inputSlots = Suppliers.memoize(
+() -> collect(e -> e instanceof Slot && !(e instanceof 
ArrayItemSlot)));
 
 protected Expression(Expression... children) {
 super(children);
diff --git 
a/regression-test/data/nereids_rules_p0/normalize_aggregate/normalize_aggregate_test.out
 
b/regression-test/data/nereids_rules_p0/normalize_aggregate/normalize_aggregate_test.out
index 860f5ff8f63..67487e816f1 100644
--- 
a/regression-test/data/nereids_rules_p0/normalize_aggregate/normalize_aggregate_test.out
+++ 
b/regression-test/data/nereids_rules_p0/normalize_aggregate/normalize_aggregate_test.out
@@ -4,3 +4,6 @@
 
 -- !test_upper_project_projections_rewrite2 --
 
+-- !test_lambda --
+1
+
diff --git 
a/regression-test/suites/nereids_rules_p0/normalize_aggregate/normalize_aggregate_test.groovy
 
b/regression-test/suites/nereids_rules_p0/normalize_aggregate/normalize_aggregate_test.groovy
index 1bf5e07c969..f7751a8bdf7 100644
--- 
a/regression-test/suites/nereids_rules_p0/normalize_aggregate/normalize_aggregate_test.groovy
+++ 
b/regression-test/suites/nereids_rules_p0/normalize_aggregate/normalize_aggregate_test.groovy
@@ -15,8 +15,6 @@
 // specific language governing permissions and limitations
 // under the License.
 suite("normalize_aggregate") {
-sql "SET enable_nereids_planner=true"
-sql "SET enable_fallback_to_original_planner=false"
 qt_test_upper_project_projections_rewrite """
 SELECT DISTINCT + + ( ( + + 46 ) ) * 89 AS col0, COUNT( * ) + + - 72 + 
- - 87 - AVG ( ALL - 56 ) * COUNT( * ) + - CASE + 49 WHEN 6 * + 76 + - +
 CAST( NULL AS SIGNED ) THEN NULL WHEN - COUNT( DISTINCT + + CAST( NULL 
AS SIGNED ) ) + 23 THEN NULL ELSE - + 43 * 32 - + 97 + - ( + 65 ) * + +
@@ -29,4 +27,8 @@ suite("normalize_aggregate") {
 qt_test_upper_project_projections_rewrite2 """
 SELECT - + AVG ( DISTINCT - col0 ) * - col0 FROM
 normalize_aggregate_tab WHERE + - col0 IS NULL GROUP BY col0 HAVING NULL 
IS NULL;"""
+
+qt_test_lambda """
+select count(array_filter(i -> (i > 0.99), array(1, 2, 3)))
+"""
 }
\ No newline at end of file


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) branch test_0702 updated (f56c55d9630 -> 0a5606c4d20)

2024-07-02 Thread panxiaolei
This is an automated email from the ASF dual-hosted git repository.

panxiaolei pushed a change to branch test_0702
in repository https://gitbox.apache.org/repos/asf/doris.git


from f56c55d9630 do not cancel query when rf sync filter size meet eof
 new f0297cd4598 update
 new 0a5606c4d20 [Chore](runtime-filter) enlarge sync filter size rpc 
timeout limit (#37103)

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 be/src/common/config.cpp   |  1 +
 be/src/exprs/runtime_filter.cpp|  4 ++--
 be/src/exprs/runtime_filter.h  |  2 +-
 be/src/exprs/runtime_filter_slots.h| 11 ++-
 regression-test/suites/query_p0/join/test_join5.groovy |  1 +
 5 files changed, 15 insertions(+), 4 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) 02/02: [Chore](runtime-filter) enlarge sync filter size rpc timeout limit (#37103)

2024-07-02 Thread panxiaolei
This is an automated email from the ASF dual-hosted git repository.

panxiaolei pushed a commit to branch test_0702
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 0a5606c4d20bf49d773c09e36e3c57ad75507e0e
Author: Pxl 
AuthorDate: Tue Jul 2 14:16:39 2024 +0800

[Chore](runtime-filter) enlarge sync filter size rpc timeout limit (#37103)

## Proposed changes
enlarge sync filter size rpc timeout limit

rf will failed when rpc timeout, so we need enlarge limit
```
sync filter size meet error, filter: RuntimeFilter: (id = 3, type = 
in_or_bloomfilter, need_local_merge: false, is_broadcast: false, 
build_bf_cardinality: true
```
---
 be/src/common/config.cpp   | 1 +
 be/src/exprs/runtime_filter.cpp| 4 ++--
 be/src/exprs/runtime_filter.h  | 2 +-
 be/src/exprs/runtime_filter_slots.h| 2 +-
 regression-test/suites/query_p0/join/test_join5.groovy | 1 +
 5 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index 910bf69609e..8ca9b6254ab 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -246,6 +246,7 @@ DEFINE_Int32(doris_scanner_thread_pool_queue_size, 
"102400");
 // default thrift client connect timeout(in seconds)
 DEFINE_mInt32(thrift_connect_timeout_seconds, "3");
 DEFINE_mInt32(fetch_rpc_timeout_seconds, "30");
+
 // default thrift client retry interval (in milliseconds)
 DEFINE_mInt64(thrift_client_retry_interval_ms, "1000");
 // max message size of thrift request
diff --git a/be/src/exprs/runtime_filter.cpp b/be/src/exprs/runtime_filter.cpp
index c84f7ad83e6..107aa7d0f99 100644
--- a/be/src/exprs/runtime_filter.cpp
+++ b/be/src/exprs/runtime_filter.cpp
@@ -1059,7 +1059,7 @@ public:
 : Base(req, callback), _dependency(std::move(dependency)), 
_filter(filter) {}
 };
 
-Status IRuntimeFilter::send_filter_size(uint64_t local_filter_size) {
+Status IRuntimeFilter::send_filter_size(RuntimeState* state, uint64_t 
local_filter_size) {
 DCHECK(is_producer());
 
 if (_need_local_merge) {
@@ -1110,7 +1110,7 @@ Status IRuntimeFilter::send_filter_size(uint64_t 
local_filter_size) {
 
 request->set_filter_size(local_filter_size);
 request->set_filter_id(_filter_id);
-callback->cntl_->set_timeout_ms(wait_time_ms());
+callback->cntl_->set_timeout_ms(std::min(3600, state->execution_timeout()) 
* 1000);
 
 stub->send_filter_size(closure->cntl_.get(), closure->request_.get(), 
closure->response_.get(),
closure.get());
diff --git a/be/src/exprs/runtime_filter.h b/be/src/exprs/runtime_filter.h
index ee6897be322..e8c5bbfd872 100644
--- a/be/src/exprs/runtime_filter.h
+++ b/be/src/exprs/runtime_filter.h
@@ -230,7 +230,7 @@ public:
 // push filter to remote node or push down it to scan_node
 Status publish(bool publish_local = false);
 
-Status send_filter_size(uint64_t local_filter_size);
+Status send_filter_size(RuntimeState* state, uint64_t local_filter_size);
 
 RuntimeFilterType type() const { return _runtime_filter_type; }
 
diff --git a/be/src/exprs/runtime_filter_slots.h 
b/be/src/exprs/runtime_filter_slots.h
index ac85a02bed4..ebda4b56fcc 100644
--- a/be/src/exprs/runtime_filter_slots.h
+++ b/be/src/exprs/runtime_filter_slots.h
@@ -55,7 +55,7 @@ public:
 // send_filter_size may call dependency->sub(), so we call 
set_dependency firstly for all rf to avoid dependency set_ready repeatedly
 for (auto* runtime_filter : _runtime_filters) {
 if (runtime_filter->need_sync_filter_size()) {
-
RETURN_IF_ERROR(runtime_filter->send_filter_size(hash_table_size));
+RETURN_IF_ERROR(runtime_filter->send_filter_size(state, 
hash_table_size));
 }
 }
 return Status::OK();
diff --git a/regression-test/suites/query_p0/join/test_join5.groovy 
b/regression-test/suites/query_p0/join/test_join5.groovy
index 62be496372d..4323575870f 100644
--- a/regression-test/suites/query_p0/join/test_join5.groovy
+++ b/regression-test/suites/query_p0/join/test_join5.groovy
@@ -16,6 +16,7 @@
 // under the License.
 
 suite("test_join5", "query,p0") {
+sql "set runtime_filter_wait_time_ms = 5"
 def DBname = "regression_test_join5"
 sql "DROP DATABASE IF EXISTS ${DBname}"
 sql "CREATE DATABASE IF NOT EXISTS ${DBname}"


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) 01/02: update

2024-07-02 Thread panxiaolei
This is an automated email from the ASF dual-hosted git repository.

panxiaolei pushed a commit to branch test_0702
in repository https://gitbox.apache.org/repos/asf/doris.git

commit f0297cd459856f6a11bfb8456cd5719a98207d63
Author: BiteThet 
AuthorDate: Tue Jul 2 19:45:42 2024 +0800

update
---
 be/src/exprs/runtime_filter_slots.h | 9 +
 1 file changed, 9 insertions(+)

diff --git a/be/src/exprs/runtime_filter_slots.h 
b/be/src/exprs/runtime_filter_slots.h
index b5b04a1ebac..ac85a02bed4 100644
--- a/be/src/exprs/runtime_filter_slots.h
+++ b/be/src/exprs/runtime_filter_slots.h
@@ -71,6 +71,9 @@ public:
 // process ignore duplicate IN_FILTER
 std::unordered_set has_in_filter;
 for (auto* filter : _runtime_filters) {
+if (filter->get_ignored()) {
+continue;
+}
 if (filter->get_real_type() != RuntimeFilterType::IN_FILTER) {
 continue;
 }
@@ -83,6 +86,9 @@ public:
 
 // process ignore filter when it has IN_FILTER on same expr, and init 
bloom filter size
 for (auto* filter : _runtime_filters) {
+if (filter->get_ignored()) {
+continue;
+}
 if (filter->get_real_type() == RuntimeFilterType::IN_FILTER ||
 !has_in_filter.contains(filter->expr_order())) {
 continue;
@@ -95,6 +101,9 @@ public:
 Status init_filters(RuntimeState* state, uint64_t local_hash_table_size) {
 // process IN_OR_BLOOM_FILTER's real type
 for (auto* filter : _runtime_filters) {
+if (filter->get_ignored()) {
+continue;
+}
 if (filter->type() == RuntimeFilterType::IN_OR_BLOOM_FILTER &&
 get_real_size(filter, local_hash_table_size) > 
state->runtime_filter_max_in_num()) {
 RETURN_IF_ERROR(filter->change_to_bloom_filter());


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) branch master updated: [refactor](nereids) refactor analyze view (#37106)

2024-07-02 Thread huajianlan
This is an automated email from the ASF dual-hosted git repository.

huajianlan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
 new 5858beea761 [refactor](nereids) refactor analyze view (#37106)
5858beea761 is described below

commit 5858beea761b55380c81b2f39fa0ba8868fdeeaf
Author: 924060929 <924060...@qq.com>
AuthorDate: Tue Jul 2 19:47:03 2024 +0800

[refactor](nereids) refactor analyze view (#37106)

The Analyzer of NereidsPlanner use different rules to analyze normal plan 
and view, to prevent the plans in the views analyze multiple times, because 
some rules can not apply multiple times, say, decimal type coercion, if this 
rule applied multiple times, it will generate a wrong result.

But this design is trick. Normally, after process the LogicalView, the 
whole plan tree in the LogicalView should not contains unbound plan, but the 
current situation is not like this. And this problem block development of some 
rules, so I refactor it:
1. the Analyzer will not traverse the children of the LogicalView
2. After link the LogicalView to the outer plan tree, the whole plan tree 
of the LogicalView will not contains unbound plan
3. analyze view and table use the same rules, keep it simple
---
 .../org/apache/doris/nereids/CascadesContext.java  |  12 +-
 .../jobs/executor/AbstractBatchJobExecutor.java|  40 +-
 .../doris/nereids/jobs/executor/Analyzer.java  |  80 +--
 .../doris/nereids/jobs/executor/Rewriter.java  | 663 +++--
 .../jobs/rewrite/PlanTreeRewriteBottomUpJob.java   |  22 +-
 .../nereids/jobs/rewrite/PlanTreeRewriteJob.java   |   6 +-
 .../jobs/rewrite/PlanTreeRewriteTopDownJob.java|  22 +-
 .../jobs/rewrite/RootPlanTreeRewriteJob.java   |  13 +-
 .../doris/nereids/rules/analysis/BindRelation.java |   2 +-
 9 files changed, 454 insertions(+), 406 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/CascadesContext.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/CascadesContext.java
index 3b9ba912383..a0d748c08c7 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/CascadesContext.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/CascadesContext.java
@@ -240,19 +240,11 @@ public class CascadesContext implements ScheduleContext {
 }
 
 public Analyzer newAnalyzer() {
-return newAnalyzer(false);
-}
-
-public Analyzer newAnalyzer(boolean analyzeView) {
-return new Analyzer(this, analyzeView);
-}
-
-public Analyzer newAnalyzer(boolean analyzeView, 
Optional customTableResolver) {
-return new Analyzer(this, analyzeView, customTableResolver);
+return newAnalyzer(Optional.empty());
 }
 
 public Analyzer newAnalyzer(Optional 
customTableResolver) {
-return newAnalyzer(false, customTableResolver);
+return new Analyzer(this, customTableResolver);
 }
 
 @Override
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/AbstractBatchJobExecutor.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/AbstractBatchJobExecutor.java
index bec86debc9e..4eebf6ffc05 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/AbstractBatchJobExecutor.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/AbstractBatchJobExecutor.java
@@ -29,6 +29,7 @@ import org.apache.doris.nereids.jobs.rewrite.TopicRewriteJob;
 import org.apache.doris.nereids.rules.Rule;
 import org.apache.doris.nereids.rules.RuleFactory;
 import org.apache.doris.nereids.rules.RuleType;
+import org.apache.doris.nereids.trees.plans.Plan;
 import org.apache.doris.nereids.trees.plans.visitor.CustomRewriter;
 
 import com.google.common.collect.ImmutableList;
@@ -36,6 +37,8 @@ import com.google.common.collect.ImmutableList;
 import java.util.Arrays;
 import java.util.List;
 import java.util.Objects;
+import java.util.Set;
+import java.util.function.Predicate;
 import java.util.function.Supplier;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
@@ -46,6 +49,8 @@ import java.util.stream.Stream;
  * Each batch of rules will be uniformly executed.
  */
 public abstract class AbstractBatchJobExecutor {
+private static final ThreadLocal>> NOT_TRAVERSE_CHILDREN = 
new ThreadLocal();
+private static final Predicate TRAVERSE_ALL_PLANS = plan -> true;
 
 protected CascadesContext cascadesContext;
 
@@ -65,6 +70,17 @@ public abstract class AbstractBatchJobExecutor {
 ).collect(ImmutableList.toImmutableList());
 }
 
+/** notTraverseChildrenOf */
+public static  T notTraverseChildrenOf(
+Set> notTraverseClasses, Supplier action) 
{
+try {
+NOT_TRAVERSE_CHILDREN.set((Set) notTraverseClasses);
+return action.get();
+} finally {
+NOT_TRAVERSE_CH

(doris) branch branch-2.1 updated (42f4271e9df -> c7549acf29e)

2024-07-02 Thread morrysnow
This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a change to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


from 42f4271e9df [enhancement](nereids) speedup sql cache with variable 
(#37090) (#37119)
 add c7549acf29e [Fix](Nereids) fix leading with different be instance 
number (#36613) (#36967)

No new revisions were added by this update.

Summary of changes:
 .../data/nereids_p0/hint/fix_leading.out   | 63 +-
 .../suites/nereids_p0/hint/fix_leading.groovy  |  2 +-
 2 files changed, 26 insertions(+), 39 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



Error while running notifications feature from refs/heads/master:.asf.yaml in doris-website!

2024-07-02 Thread Apache Infrastructure


An error occurred while running notifications feature in .asf.yaml!:
Invalid notification target 'comm...@foo.apache.org'. Must be a valid 
@doris.apache.org list!


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



Error while running notifications feature from refs/heads/master:.asf.yaml in doris-website!

2024-07-02 Thread Apache Infrastructure


An error occurred while running notifications feature in .asf.yaml!:
Invalid notification target 'comm...@foo.apache.org'. Must be a valid 
@doris.apache.org list!


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris-website) branch master updated: [doc][kerberos] add kerberos doc for hive catalog (#800)

2024-07-02 Thread luzhijing
This is an automated email from the ASF dual-hosted git repository.

luzhijing pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris-website.git


The following commit(s) were added to refs/heads/master by this push:
 new e39ed14abc [doc][kerberos] add kerberos doc for hive catalog (#800)
e39ed14abc is described below

commit e39ed14abcd9b8e74ad308bf320a24faf3006d78
Author: slothever <18522955+w...@users.noreply.github.com>
AuthorDate: Tue Jul 2 20:08:01 2024 +0800

[doc][kerberos] add kerberos doc for hive catalog (#800)

add doc for https://github.com/apache/doris/pull/36430

-

Co-authored-by: Luzhijing <82810928+luzhij...@users.noreply.github.com>
---
 docs/lakehouse/datalake-analytics/hive.md  | 132 +++---
 .../current/lakehouse/datalake-analytics/hive.md   | 201 +---
 .../lakehouse/datalake-analytics/hive.md   | 203 ++---
 .../lakehouse/datalake-analytics/hive.md   | 132 +++---
 4 files changed, 485 insertions(+), 183 deletions(-)

diff --git a/docs/lakehouse/datalake-analytics/hive.md 
b/docs/lakehouse/datalake-analytics/hive.md
index ab0830d830..00b276b7a9 100644
--- a/docs/lakehouse/datalake-analytics/hive.md
+++ b/docs/lakehouse/datalake-analytics/hive.md
@@ -89,7 +89,7 @@ CREATE CATALOG hive PROPERTIES (
 
 ViewFs related parameters can be added to the catalog configuration as above, 
or added to `conf/core-site.xml`.
 
-How ViewFs works and parameter configuration, please refer to relevant hadoop 
documents, for example, 
https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/ViewFs.html
+How ViewFs works and parameter configuration, please refer to relevant hadoop 
documents, for example, 

 
 ### Hive On JuiceFS
 
@@ -285,8 +285,8 @@ Currently, Doris only supports automatic update of metadata 
in Hive Metastore (H
 
 The automatic update feature involves the following parameters in fe.conf:
 
-1. `enable_hms_events_incremental_sync`: This specifies whether to enable 
automatic incremental synchronization for metadata, which is disabled by 
default. 
-2. `hms_events_polling_interval_ms`: This specifies the interval between two 
readings, which is set to 1 by default. (Unit: millisecond) 
+1. `enable_hms_events_incremental_sync`: This specifies whether to enable 
automatic incremental synchronization for metadata, which is disabled by 
default.
+2. `hms_events_polling_interval_ms`: This specifies the interval between two 
readings, which is set to 1 by default. (Unit: millisecond)
 3. `hms_events_batch_size_per_rpc`: This specifies the maximum number of 
events that are read at a time, which is set to 500 by default.
 
 To enable automatic update(Excluding Huawei MRS), you need to modify the 
hive-site.xml of HMS and then restart HMS and HiveServer2:
@@ -344,7 +344,7 @@ If you meet error message like `Invalid method name: 
'get_table_req'`, which mea
 
 You can specify the hive version when creating the Catalog. If accessing Hive 
1.1.0 version:
 
-```sql 
+```sql
 CREATE CATALOG hive PROPERTIES (
 'type'='hms',
 'hive.metastore.uris' = 'thrift://172.0.0.1:9083',
@@ -389,7 +389,6 @@ Add following setting when creating an HMS catalog, file 
splitting and scanning
 "broker.name" = "test_broker"
 ```
 
-
 Doris has implemented Broker query support for HMS Catalog Iceberg based on 
the Iceberg `FileIO` interface. If needed, the following configuration can be 
added when creating the HMS Catalog.
 
 ```sql
@@ -412,14 +411,14 @@ To connect to the Hive Metastore with Ranger permission 
verification enabled, yo
 
 1. When creating a Catalog, add:
 
-   ```sql
-   "access_controller.properties.ranger.service.name" = "hive",
-   "access_controller.class" = 
"org.apache.doris.catalog.authorizer.ranger.hive.RangerHiveAccessControllerFactory",
-   ```
+ ```sql
+ "access_controller.properties.ranger.service.name" = "hive",
+ "access_controller.class" = 
"org.apache.doris.catalog.authorizer.ranger.hive.RangerHiveAccessControllerFactory",
+ ```
 
-   > Note:
-   >
-   > `access_controller.properties.ranger.service.name` refers to the type 
of service, such as `hive`, `hdfs`, etc. It is not the value of 
`ranger.plugin.hive.service.name` in the configuration file.
+ > Note:
+ >
+ > `access_controller.properties.ranger.service.name` refers to the type of 
service, such as `hive`, `hdfs`, etc. It is not the value of 
`ranger.plugin.hive.service.name` in the configuration file.
 
 2. Configure all FE environments:
 
@@ -502,32 +501,32 @@ This section mainly introduces how to connect to a Hive + 
HDFS cluster with Kerb
 
 ### Environment preparation
 
-- `krb5.conf`
+* `krb5.conf`
 
-   `krb5.conf` is the configuration file for the Kerberos authentication 
protocol. This file needs to be deployed on all FE and BE nodes. And ens

Error while running notifications feature from refs/heads/master:.asf.yaml in doris-website!

2024-07-02 Thread Apache Infrastructure


An error occurred while running notifications feature in .asf.yaml!:
Invalid notification target 'comm...@foo.apache.org'. Must be a valid 
@doris.apache.org list!


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris-website) branch master updated: [fix](docs) Fix the parameter error for partial column updates in the Flink connector (#815)

2024-07-02 Thread luzhijing
This is an automated email from the ASF dual-hosted git repository.

luzhijing pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris-website.git


The following commit(s) were added to refs/heads/master by this push:
 new 9b10778328 [fix](docs) Fix the parameter error for partial column 
updates in the Flink connector (#815)
9b10778328 is described below

commit 9b107783289dd98f5a6cd897287521ec098e6a26
Author: Petrichor <31833513+vinle...@users.noreply.github.com>
AuthorDate: Tue Jul 2 20:10:34 2024 +0800

[fix](docs) Fix the parameter error for partial column updates in the Flink 
connector (#815)
---
 docs/ecosystem/flink-doris-connector.md | 2 +-
 .../current/ecosystem/flink-doris-connector.md  | 2 +-
 .../version-2.1/ecosystem/flink-doris-connector.md  | 2 +-
 versioned_docs/version-2.1/ecosystem/flink-doris-connector.md   | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/ecosystem/flink-doris-connector.md 
b/docs/ecosystem/flink-doris-connector.md
index 02abce7561..cc2957eae2 100644
--- a/docs/ecosystem/flink-doris-connector.md
+++ b/docs/ecosystem/flink-doris-connector.md
@@ -494,7 +494,7 @@ WITH (
   'sink.properties.format' = 'json',
   'sink.properties.read_json_by_line' = 'true',
   'sink.properties.columns' = 'id,name,bank,age',
-  'sink.properties.partial.columns' = 'true' --Enable partial column updates
+  'sink.properties.partial_columns' = 'true' --Enable partial column updates
 );
 
 
diff --git 
a/i18n/zh-CN/docusaurus-plugin-content-docs/current/ecosystem/flink-doris-connector.md
 
b/i18n/zh-CN/docusaurus-plugin-content-docs/current/ecosystem/flink-doris-connector.md
index aebb3227c7..39f631eb36 100644
--- 
a/i18n/zh-CN/docusaurus-plugin-content-docs/current/ecosystem/flink-doris-connector.md
+++ 
b/i18n/zh-CN/docusaurus-plugin-content-docs/current/ecosystem/flink-doris-connector.md
@@ -501,7 +501,7 @@ WITH (
   'sink.properties.format' = 'json',
   'sink.properties.read_json_by_line' = 'true',
   'sink.properties.columns' = 'id,name,bank,age',
-  'sink.properties.partial.columns' = 'true' -- 开启部分列更新
+  'sink.properties.partial_columns' = 'true' -- 开启部分列更新
 );
 
 
diff --git 
a/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/ecosystem/flink-doris-connector.md
 
b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/ecosystem/flink-doris-connector.md
index 616c5a0ef4..2f78290240 100644
--- 
a/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/ecosystem/flink-doris-connector.md
+++ 
b/i18n/zh-CN/docusaurus-plugin-content-docs/version-2.1/ecosystem/flink-doris-connector.md
@@ -501,7 +501,7 @@ WITH (
   'sink.properties.format' = 'json',
   'sink.properties.read_json_by_line' = 'true',
   'sink.properties.columns' = 'id,name,bank,age',
-  'sink.properties.partial.columns' = 'true' -- 开启部分列更新
+  'sink.properties.partial_columns' = 'true' -- 开启部分列更新
 );
 
 
diff --git a/versioned_docs/version-2.1/ecosystem/flink-doris-connector.md 
b/versioned_docs/version-2.1/ecosystem/flink-doris-connector.md
index 796a98fe37..f453b4d87b 100644
--- a/versioned_docs/version-2.1/ecosystem/flink-doris-connector.md
+++ b/versioned_docs/version-2.1/ecosystem/flink-doris-connector.md
@@ -490,7 +490,7 @@ WITH (
   'sink.properties.format' = 'json',
   'sink.properties.read_json_by_line' = 'true',
   'sink.properties.columns' = 'id,name,bank,age',
-  'sink.properties.partial.columns' = 'true' --Enable partial column updates
+  'sink.properties.partial_columns' = 'true' --Enable partial column updates
 );
 
 


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) branch master updated: [fix](txn insert) Forbid delete condition in txn insert (#37135)

2024-07-02 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
 new ec264b8f78d [fix](txn insert) Forbid delete condition in txn insert 
(#37135)
ec264b8f78d is described below

commit ec264b8f78df742067ed178b927ac5b258ff0f33
Author: meiyi 
AuthorDate: Tue Jul 2 20:48:20 2024 +0800

[fix](txn insert) Forbid delete condition in txn insert (#37135)

## Proposed changes

Currently, there are two kind of delete in doris, one is delete where
which is implemented via fast path, while the other one is delete using
which is implemented via insert into alike.

The first one can read writings in the same txn while the second one can
not. This is confusing for users, so we just disable delete after insert
and update now.
---
 .../java/org/apache/doris/load/TxnDeleteJob.java   |  2 +-
 .../commands/insert/OlapTxnInsertExecutor.java |  2 +-
 .../apache/doris/transaction/TransactionEntry.java | 19 --
 regression-test/data/insert_p0/txn_insert.out  | 22 
 .../insert_p0/txn_insert_with_schema_change.out| 42 --
 regression-test/suites/insert_p0/txn_insert.groovy | 36 ++-
 .../insert_p0/txn_insert_with_schema_change.groovy |  4 +--
 .../txn_insert_concurrent_insert_ud.groovy |  6 ++--
 .../txn_insert_concurrent_insert_update.groovy |  6 ++--
 .../insert_p2/txn_insert_with_schema_change.groovy |  4 +--
 10 files changed, 61 insertions(+), 82 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/TxnDeleteJob.java 
b/fe/fe-core/src/main/java/org/apache/doris/load/TxnDeleteJob.java
index 5a508b910c5..916a8395487 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/load/TxnDeleteJob.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/load/TxnDeleteJob.java
@@ -44,7 +44,7 @@ public class TxnDeleteJob extends DeleteJob {
 @Override
 public long beginTxn() throws Exception {
 TransactionEntry txnEntry = ConnectContext.get().getTxnEntry();
-this.transactionId = txnEntry.beginTransaction(targetTbl);
+this.transactionId = txnEntry.beginTransaction(targetTbl, 
SubTransactionType.DELETE);
 this.label = txnEntry.getLabel();
 return this.transactionId;
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/OlapTxnInsertExecutor.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/OlapTxnInsertExecutor.java
index b16930a1a2a..ebe0a318e19 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/OlapTxnInsertExecutor.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/OlapTxnInsertExecutor.java
@@ -58,7 +58,7 @@ public class OlapTxnInsertExecutor extends OlapInsertExecutor 
{
 throw new AnalysisException("Transaction insert expect label " 
+ txnEntry.getLabel()
 + ", but got " + this.labelName);
 }
-this.txnId = txnEntry.beginTransaction(table);
+this.txnId = txnEntry.beginTransaction(table, 
SubTransactionType.INSERT);
 this.labelName = txnEntry.getLabel();
 } catch (Exception e) {
 throw new AnalysisException("begin transaction failed. " + 
e.getMessage(), e);
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/transaction/TransactionEntry.java 
b/fe/fe-core/src/main/java/org/apache/doris/transaction/TransactionEntry.java
index e9db8e3f58f..6771d9c3156 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/transaction/TransactionEntry.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/transaction/TransactionEntry.java
@@ -182,7 +182,7 @@ public class TransactionEntry {
 }
 
 // Used for insert into select, return the sub_txn_id for this insert
-public long beginTransaction(TableIf table) throws Exception {
+public long beginTransaction(TableIf table, SubTransactionType 
subTransactionType) throws Exception {
 if (isInsertValuesTxnBegan()) {
 // FIXME: support mix usage of `insert into values` and `insert 
into select`
 throw new AnalysisException(
@@ -226,6 +226,12 @@ public class TransactionEntry {
 throw new AnalysisException(
 "Transaction insert must be in the same database, 
expect db_id=" + this.database.getId());
 }
+// for delete type, make sure there is no insert for the same table
+if (subTransactionType == SubTransactionType.DELETE && 
subTransactionStates.stream()
+.anyMatch(s -> s.getTable().getId() == table.getId()
+&& s.getSubTransactionType() == 
SubTransactionType.INSERT)) {
+throw new AnalysisExceptio

(doris) branch branch-2.1 updated: [opt](catalog) add some profile for parquet reader and change meta cache config (#37040) (#37146)

2024-07-02 Thread morningman
This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
 new e25717458e9 [opt](catalog) add some profile for parquet reader and 
change meta cache config (#37040) (#37146)
e25717458e9 is described below

commit e25717458e97074b27d1a43c6d09b97a2d4e56df
Author: Mingyu Chen 
AuthorDate: Tue Jul 2 20:58:43 2024 +0800

[opt](catalog) add some profile for parquet reader and change meta cache 
config (#37040) (#37146)

bp #37040
---
 be/src/common/config.cpp   |  5 +
 be/src/common/config.h |  2 ++
 be/src/vec/exec/format/parquet/vparquet_reader.cpp | 23 +-
 be/src/vec/exec/format/parquet/vparquet_reader.h   |  4 
 .../main/java/org/apache/doris/common/Config.java  | 13 
 .../doris/datasource/hive/HiveMetaStoreCache.java  |  6 +++---
 6 files changed, 41 insertions(+), 12 deletions(-)

diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index 563e4750165..fe811165c17 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -1250,6 +1250,11 @@ DEFINE_Int64(min_row_group_size, "134217728");
 // The time out milliseconds for remote fetch schema RPC, default 60s
 DEFINE_mInt64(fetch_remote_schema_rpc_timeout_ms, "6");
 
+// If set to false, the parquet reader will not use page index to filter data.
+// This is only for debug purpose, in case sometimes the page index
+// filter wrong data.
+DEFINE_mBool(enable_parquet_page_index, "true");
+
 // clang-format off
 #ifdef BE_TEST
 // test s3
diff --git a/be/src/common/config.h b/be/src/common/config.h
index 21325a0f011..891a8333148 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -1334,6 +1334,8 @@ DECLARE_mInt64(fetch_remote_schema_rpc_timeout_ms);
 // The minimum row group size when exporting Parquet files.
 DECLARE_Int64(min_row_group_size);
 
+DECLARE_mBool(enable_parquet_page_index);
+
 #ifdef BE_TEST
 // test s3
 DECLARE_String(test_s3_resource);
diff --git a/be/src/vec/exec/format/parquet/vparquet_reader.cpp 
b/be/src/vec/exec/format/parquet/vparquet_reader.cpp
index f99786dc6e2..f3b9f2ad55c 100644
--- a/be/src/vec/exec/format/parquet/vparquet_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_reader.cpp
@@ -148,6 +148,10 @@ void ParquetReader::_init_profile() {
 ADD_CHILD_COUNTER_WITH_LEVEL(_profile, "FileNum", TUnit::UNIT, 
parquet_profile, 1);
 _parquet_profile.page_index_filter_time =
 ADD_CHILD_TIMER_WITH_LEVEL(_profile, "PageIndexFilterTime", 
parquet_profile, 1);
+_parquet_profile.read_page_index_time =
+ADD_CHILD_TIMER_WITH_LEVEL(_profile, "PageIndexReadTime", 
parquet_profile, 1);
+_parquet_profile.parse_page_index_time =
+ADD_CHILD_TIMER_WITH_LEVEL(_profile, "PageIndexParseTime", 
parquet_profile, 1);
 _parquet_profile.row_group_filter_time =
 ADD_CHILD_TIMER_WITH_LEVEL(_profile, "RowGroupFilterTime", 
parquet_profile, 1);
 
@@ -747,25 +751,32 @@ Status ParquetReader::_process_page_index(const 
tparquet::RowGroup& row_group,
 return Status::OK();
 }
 PageIndex page_index;
-if (!_has_page_index(row_group.columns, page_index)) {
+if (!config::enable_parquet_page_index || 
!_has_page_index(row_group.columns, page_index)) {
 read_whole_row_group();
 return Status::OK();
 }
 uint8_t col_index_buff[page_index._column_index_size];
 size_t bytes_read = 0;
 Slice result(col_index_buff, page_index._column_index_size);
-RETURN_IF_ERROR(
-_file_reader->read_at(page_index._column_index_start, result, 
&bytes_read, _io_ctx));
+{
+SCOPED_RAW_TIMER(&_statistics.read_page_index_time);
+RETURN_IF_ERROR(_file_reader->read_at(page_index._column_index_start, 
result, &bytes_read,
+  _io_ctx));
+}
 _column_statistics.read_bytes += bytes_read;
 auto& schema_desc = _file_metadata->schema();
 std::vector skipped_row_ranges;
 uint8_t off_index_buff[page_index._offset_index_size];
 Slice res(off_index_buff, page_index._offset_index_size);
-RETURN_IF_ERROR(
-_file_reader->read_at(page_index._offset_index_start, res, 
&bytes_read, _io_ctx));
+{
+SCOPED_RAW_TIMER(&_statistics.read_page_index_time);
+RETURN_IF_ERROR(
+_file_reader->read_at(page_index._offset_index_start, res, 
&bytes_read, _io_ctx));
+}
 _column_statistics.read_bytes += bytes_read;
 // read twice: parse column index & parse offset index
 _column_statistics.meta_read_calls += 2;
+SCOPED_RAW_TIMER(&_statistics.parse_page_index_time);
 for (auto& read_col : _read_columns) {
 auto conjunct_iter = _colname_to_value_ran

(doris) branch master updated: [fix](fe) Avoid infinite dropping index in check compatibility mode (#37116)

2024-07-02 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
 new 6b7b63c7c0d [fix](fe) Avoid infinite dropping index in check 
compatibility mode (#37116)
6b7b63c7c0d is described below

commit 6b7b63c7c0d2ebae25722e6f29f0dd26adaffb26
Author: walter 
AuthorDate: Tue Jul 2 21:27:39 2024 +0800

[fix](fe) Avoid infinite dropping index in check compatibility mode (#37116)
---
 .../src/main/java/org/apache/doris/alter/CloudSchemaChangeJobV2.java | 5 +
 1 file changed, 5 insertions(+)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/alter/CloudSchemaChangeJobV2.java 
b/fe/fe-core/src/main/java/org/apache/doris/alter/CloudSchemaChangeJobV2.java
index 2fe3d748602..3968f2d274f 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/alter/CloudSchemaChangeJobV2.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/alter/CloudSchemaChangeJobV2.java
@@ -118,6 +118,11 @@ public class CloudSchemaChangeJobV2 extends 
SchemaChangeJobV2 {
 
 @Override
 protected void postProcessOriginIndex() {
+if (Config.enable_check_compatibility_mode) {
+LOG.info("skip drop origin indexes in checking compatibility 
mode");
+return;
+}
+
 List originIdxList = 
indexIdMap.values().stream().collect(Collectors.toList());
 dropIndex(originIdxList);
 }


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) branch master updated (6b7b63c7c0d -> 277de79953b)

2024-07-02 Thread dataroaring
This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


from 6b7b63c7c0d [fix](fe) Avoid infinite dropping index in check 
compatibility mode (#37116)
 add 277de79953b [Enhancement](partial update) Add partial update mix cases 
(#37113)

No new revisions were added by this update.

Summary of changes:
 .../partial_update/test_mix_partial_update.out | 365 
 .../test_mix_partial_update_load1.csv  |   4 +
 .../test_mix_partial_update_load2.csv  |   1 +
 .../test_mix_partial_update_load3.csv  |   1 +
 .../test_mix_partial_update_load4.csv  |   1 +
 .../test_mix_partial_update_load5.csv  |   1 +
 .../test_mix_partial_update_load6.csv  |   1 +
 .../test_mix_partial_update_load7.csv  |   1 +
 .../test_mix_partial_update_load_A.csv |   1 +
 .../test_mix_partial_update_load_B.csv |   1 +
 .../test_mix_partial_update_load_C.csv |   1 +
 .../test_mix_partial_update_load_D.csv |   1 +
 .../partial_update/test_mix_partial_update.groovy  | 381 +
 13 files changed, 760 insertions(+)
 create mode 100644 
regression-test/data/unique_with_mow_p0/partial_update/test_mix_partial_update.out
 create mode 100644 
regression-test/data/unique_with_mow_p0/partial_update/test_mix_partial_update_load1.csv
 create mode 100644 
regression-test/data/unique_with_mow_p0/partial_update/test_mix_partial_update_load2.csv
 create mode 100644 
regression-test/data/unique_with_mow_p0/partial_update/test_mix_partial_update_load3.csv
 create mode 100644 
regression-test/data/unique_with_mow_p0/partial_update/test_mix_partial_update_load4.csv
 create mode 100644 
regression-test/data/unique_with_mow_p0/partial_update/test_mix_partial_update_load5.csv
 create mode 100644 
regression-test/data/unique_with_mow_p0/partial_update/test_mix_partial_update_load6.csv
 create mode 100644 
regression-test/data/unique_with_mow_p0/partial_update/test_mix_partial_update_load7.csv
 create mode 100644 
regression-test/data/unique_with_mow_p0/partial_update/test_mix_partial_update_load_A.csv
 create mode 100644 
regression-test/data/unique_with_mow_p0/partial_update/test_mix_partial_update_load_B.csv
 create mode 100644 
regression-test/data/unique_with_mow_p0/partial_update/test_mix_partial_update_load_C.csv
 create mode 100644 
regression-test/data/unique_with_mow_p0/partial_update/test_mix_partial_update_load_D.csv
 create mode 100644 
regression-test/suites/unique_with_mow_p0/partial_update/test_mix_partial_update.groovy


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) branch master updated (277de79953b -> 6969ad0596b)

2024-07-02 Thread gavinchou
This is an automated email from the ASF dual-hosted git repository.

gavinchou pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


from 277de79953b [Enhancement](partial update) Add partial update mix cases 
(#37113)
 add 6969ad0596b [enhancement](compaction) optimizing memory usage for 
compaction (#37099)

No new revisions were added by this update.

Summary of changes:
 be/src/cloud/cloud_base_compaction.cpp |  10 ++
 be/src/cloud/cloud_cumulative_compaction.cpp   |  13 +-
 be/src/common/config.cpp   |   6 +
 be/src/common/config.h |   6 +
 be/src/olap/base_compaction.cpp|  10 ++
 be/src/olap/base_tablet.h  |   5 +
 be/src/olap/compaction.cpp |  15 ++-
 be/src/olap/compaction.h   |   2 +
 be/src/olap/cumulative_compaction.cpp  |  15 ++-
 be/src/olap/iterators.h|  15 ++-
 be/src/olap/merger.cpp |  67 +-
 be/src/olap/merger.h   |   6 +-
 be/src/olap/rowset/rowset_meta.h   |  15 +++
 be/src/olap/rowset/segcompaction.cpp   |   2 +-
 be/src/olap/tablet_reader.h|   2 +
 be/src/vec/olap/vertical_block_reader.cpp  |  24 +++-
 be/src/vec/olap/vertical_block_reader.h|   3 +-
 be/src/vec/olap/vertical_merge_iterator.cpp|  29 +++--
 be/src/vec/olap/vertical_merge_iterator.h  |  25 +++-
 be/test/olap/base_compaction_test.cpp  |  84 +
 be/test/olap/rowid_conversion_test.cpp |   6 +-
 be/test/vec/olap/vertical_compaction_test.cpp  |  14 ++-
 .../compaction_width_array_column.groovy   | 137 +
 23 files changed, 469 insertions(+), 42 deletions(-)
 create mode 100644 be/test/olap/base_compaction_test.cpp
 create mode 100644 
regression-test/suites/compaction/compaction_width_array_column.groovy


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) branch master updated: [Enhancement](s3-load) Add domain connection and aksk correction check for S3 load (#36711)

2024-07-02 Thread gavinchou
This is an automated email from the ASF dual-hosted git repository.

gavinchou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
 new 95cb544b3d8 [Enhancement](s3-load) Add domain connection and aksk 
correction check for S3 load (#36711)
95cb544b3d8 is described below

commit 95cb544b3d856f1beb07b51a5704f634840b6fa3
Author: Xin Liao 
AuthorDate: Tue Jul 2 21:44:24 2024 +0800

[Enhancement](s3-load) Add domain connection and aksk correction check for 
S3 load (#36711)

Add domain connection and aksk correction check for S3 load before
actual execution.
---
 .../java/org/apache/doris/analysis/LoadStmt.java   |  88 +--
 .../property/constants/S3Properties.java   |   1 +
 ...t_domain_connection_and_ak_sk_correction.groovy | 161 +
 3 files changed, 241 insertions(+), 9 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/LoadStmt.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/LoadStmt.java
index d8d515fe6a4..1990078b46c 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/LoadStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/LoadStmt.java
@@ -21,10 +21,14 @@ import org.apache.doris.catalog.Database;
 import org.apache.doris.catalog.Env;
 import org.apache.doris.catalog.KeysType;
 import org.apache.doris.catalog.OlapTable;
+import org.apache.doris.cloud.proto.Cloud.ObjectStoreInfoPB;
 import org.apache.doris.cloud.security.SecurityChecker;
+import org.apache.doris.cloud.storage.RemoteBase;
+import org.apache.doris.cloud.storage.RemoteBase.ObjectInfo;
 import org.apache.doris.common.AnalysisException;
 import org.apache.doris.common.Config;
 import org.apache.doris.common.DdlException;
+import org.apache.doris.common.InternalErrorCode;
 import org.apache.doris.common.UserException;
 import org.apache.doris.common.util.PrintableMap;
 import org.apache.doris.common.util.TimeUtils;
@@ -500,7 +504,7 @@ public class LoadStmt extends DdlStmt {
 }
 } else if (brokerDesc != null) {
 etlJobType = EtlJobType.BROKER;
-checkWhiteList();
+checkS3Param();
 } else if (isMysqlLoad) {
 etlJobType = EtlJobType.LOCAL_FILE;
 } else {
@@ -518,6 +522,26 @@ public class LoadStmt extends DdlStmt {
 user = ConnectContext.get().getQualifiedUser();
 }
 
+
+private String getProviderFromEndpoint() {
+Map properties = brokerDesc.getProperties();
+for (Map.Entry entry : properties.entrySet()) {
+if (entry.getKey().equalsIgnoreCase(S3Properties.PROVIDER)) {
+return entry.getValue();
+}
+}
+return S3Properties.S3_PROVIDER;
+}
+
+private String getBucketFromFilePath(String filePath) throws Exception {
+String[] parts = filePath.split("\\/\\/");
+if (parts.length < 2) {
+throw new Exception("filePath is not valid");
+}
+String buckt = parts[1].split("\\/")[0];
+return buckt;
+}
+
 public String getComment() {
 return comment;
 }
@@ -597,7 +621,7 @@ public class LoadStmt extends DdlStmt {
 }
 }
 
-public void checkWhiteList() throws UserException {
+public void checkS3Param() throws UserException {
 Map brokerDescProperties = brokerDesc.getProperties();
 if (brokerDescProperties.containsKey(S3Properties.Env.ENDPOINT)
 && 
brokerDescProperties.containsKey(S3Properties.Env.ACCESS_KEY)
@@ -606,17 +630,63 @@ public class LoadStmt extends DdlStmt {
 String endpoint = 
brokerDescProperties.get(S3Properties.Env.ENDPOINT);
 endpoint = endpoint.replaceFirst("^http://";, "");
 endpoint = endpoint.replaceFirst("^https://";, "");
-List whiteList = new 
ArrayList<>(Arrays.asList(Config.s3_load_endpoint_white_list));
-whiteList.removeIf(String::isEmpty);
-if (!whiteList.isEmpty() && !whiteList.contains(endpoint)) {
-throw new UserException("endpoint: " + endpoint
-+ " is not in s3 load endpoint white list: " + 
String.join(",", whiteList));
-}
 brokerDescProperties.put(S3Properties.Env.ENDPOINT, endpoint);
-if (AzureProperties.checkAzureProviderPropertyExist(properties)) {
+checkWhiteList(endpoint);
+if 
(AzureProperties.checkAzureProviderPropertyExist(brokerDescProperties)) {
 return;
 }
 checkEndpoint(endpoint);
+checkAkSk();
+}
+}
+
+public void checkWhiteList(String endpoint) throws UserException {
+List whiteList = new 
ArrayList<>(Arrays.asList(Config.s3_load_endpoint_white_list));
+whiteList.removeIf(String::isEmpty);
+if (!whiteList.isEmpty() && !whiteList.contains(en

(doris) branch branch-2.1 updated: [Fix](csv_reader) Add a session variable to control whether empty rows in CSV files are read as NULL values (#37153)

2024-07-02 Thread morningman
This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
 new bd24a8bdd9b [Fix](csv_reader) Add a session variable to control 
whether empty rows in CSV files are read as NULL values (#37153)
bd24a8bdd9b is described below

commit bd24a8bdd9b7e327fc2ded62bc6fc30092c46c27
Author: Tiewei Fang <43782773+bepppo...@users.noreply.github.com>
AuthorDate: Tue Jul 2 22:12:17 2024 +0800

[Fix](csv_reader) Add a session variable to control whether empty rows in 
CSV files are read as NULL values (#37153)

bp: #36668
---
 be/src/runtime/runtime_state.h |   5 +
 be/src/vec/exec/format/csv/csv_reader.cpp  |  25 -
 be/src/vec/exec/format/csv/csv_reader.h|   1 +
 .../java/org/apache/doris/qe/SessionVariable.java  |   8 ++
 gensrc/thrift/PaloInternalService.thrift   |   1 +
 .../tvf/test_read_csv_empty_line_as_null.out   |  31 ++
 .../tvf/test_read_csv_empty_line_as_null.groovy| 111 +
 7 files changed, 180 insertions(+), 2 deletions(-)

diff --git a/be/src/runtime/runtime_state.h b/be/src/runtime/runtime_state.h
index 33b5ded9c3a..b88b29ee8d0 100644
--- a/be/src/runtime/runtime_state.h
+++ b/be/src/runtime/runtime_state.h
@@ -518,6 +518,11 @@ public:
 return _query_options.__isset.enable_parallel_scan && 
_query_options.enable_parallel_scan;
 }
 
+bool is_read_csv_empty_line_as_null() const {
+return _query_options.__isset.read_csv_empty_line_as_null &&
+   _query_options.read_csv_empty_line_as_null;
+}
+
 int parallel_scan_max_scanners_count() const {
 return _query_options.__isset.parallel_scan_max_scanners_count
? _query_options.parallel_scan_max_scanners_count
diff --git a/be/src/vec/exec/format/csv/csv_reader.cpp 
b/be/src/vec/exec/format/csv/csv_reader.cpp
index a10ba8c3d14..7894b5c57ae 100644
--- a/be/src/vec/exec/format/csv/csv_reader.cpp
+++ b/be/src/vec/exec/format/csv/csv_reader.cpp
@@ -485,7 +485,10 @@ Status CsvReader::get_next_block(Block* block, size_t* 
read_rows, bool* eof) {
 continue;
 }
 if (size == 0) {
-// Read empty row, just continue
+if (!_line_reader_eof && 
_state->is_read_csv_empty_line_as_null()) {
+++rows;
+}
+// Read empty line, continue
 continue;
 }
 
@@ -518,7 +521,10 @@ Status CsvReader::get_next_block(Block* block, size_t* 
read_rows, bool* eof) {
 continue;
 }
 if (size == 0) {
-// Read empty row, just continue
+if (!_line_reader_eof && 
_state->is_read_csv_empty_line_as_null()) {
+RETURN_IF_ERROR(_fill_empty_line(block, columns, &rows));
+}
+// Read empty line, continue
 continue;
 }
 
@@ -661,6 +667,21 @@ Status CsvReader::_fill_dest_columns(const Slice& line, 
Block* block,
 return Status::OK();
 }
 
+Status CsvReader::_fill_empty_line(Block* block, 
std::vector& columns,
+   size_t* rows) {
+for (int i = 0; i < _file_slot_descs.size(); ++i) {
+IColumn* col_ptr = columns[i];
+if (!_is_load) {
+col_ptr = const_cast(
+
block->get_by_position(_file_slot_idx_map[i]).column.get());
+}
+auto& null_column = assert_cast(*col_ptr);
+null_column.insert_data(nullptr, 0);
+}
+++(*rows);
+return Status::OK();
+}
+
 Status CsvReader::_validate_line(const Slice& line, bool* success) {
 if (!_is_proto_format && !validate_utf8(line.data, line.size)) {
 if (!_is_load) {
diff --git a/be/src/vec/exec/format/csv/csv_reader.h 
b/be/src/vec/exec/format/csv/csv_reader.h
index d9c8633f427..65eba62a54c 100644
--- a/be/src/vec/exec/format/csv/csv_reader.h
+++ b/be/src/vec/exec/format/csv/csv_reader.h
@@ -204,6 +204,7 @@ private:
 Status _create_decompressor();
 Status _fill_dest_columns(const Slice& line, Block* block,
   std::vector& columns, size_t* 
rows);
+Status _fill_empty_line(Block* block, std::vector& 
columns, size_t* rows);
 Status _line_split_to_values(const Slice& line, bool* success);
 void _split_line(const Slice& line);
 Status _check_array_format(std::vector& split_values, bool* 
is_success);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java 
b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index d6e75faf673..5cf6cb901d5 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -194,6 +194,8 @@ public cla

(doris) branch branch-2.1 updated (bd24a8bdd9b -> 74086189d31)

2024-07-02 Thread morningman
This is an automated email from the ASF dual-hosted git repository.

morningman pushed a change to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


from bd24a8bdd9b [Fix](csv_reader) Add a session variable to control 
whether empty rows in CSV files are read as NULL values (#37153)
 add 74086189d31 [test](tvf) move p2 tvf tests from p2 to p0 (#36871) 
(#37150)

No new revisions were added by this update.

Summary of changes:
 .../scripts/data/tvf/test_hdfs_tvf_compression/run.sh | 19 +++
 .../hive/scripts/data/tvf/test_tvf/run.sh | 19 +++
 .../tvf/test_hdfs_tvf_compression.out |  0
 .../tvf/test_path_partition_keys.out  |  0
 .../tvf/test_path_partition_keys/dt1=cyw/a.csv|  0
 .../tvf/test_path_partition_keys/dt1=cyw/b.csv|  0
 .../tvf/test_path_partition_keys/dt1=hello/c.csv  |  0
 .../test_path_partition_keys/dt2=two/dt1=cyw/a.csv|  0
 .../test_path_partition_keys/dt2=two/dt1=cyw/b.csv|  0
 .../test_path_partition_keys/dt2=two/dt1=hello/c.csv  |  0
 .../tvf/test_tvf_p2.out   |  0
 .../tvf/test_hdfs_tvf_compression.groovy  | 10 +-
 .../tvf/test_path_partition_keys.groovy   | 10 +-
 .../tvf/test_tvf_p2.groovy|  8 
 14 files changed, 52 insertions(+), 14 deletions(-)
 create mode 100644 
docker/thirdparties/docker-compose/hive/scripts/data/tvf/test_hdfs_tvf_compression/run.sh
 create mode 100644 
docker/thirdparties/docker-compose/hive/scripts/data/tvf/test_tvf/run.sh
 rename regression-test/data/{external_table_p2 => 
external_table_p0}/tvf/test_hdfs_tvf_compression.out (100%)
 rename regression-test/data/{external_table_p2 => 
external_table_p0}/tvf/test_path_partition_keys.out (100%)
 rename regression-test/data/{external_table_p2 => 
external_table_p0}/tvf/test_path_partition_keys/dt1=cyw/a.csv (100%)
 rename regression-test/data/{external_table_p2 => 
external_table_p0}/tvf/test_path_partition_keys/dt1=cyw/b.csv (100%)
 rename regression-test/data/{external_table_p2 => 
external_table_p0}/tvf/test_path_partition_keys/dt1=hello/c.csv (100%)
 rename regression-test/data/{external_table_p2 => 
external_table_p0}/tvf/test_path_partition_keys/dt2=two/dt1=cyw/a.csv (100%)
 rename regression-test/data/{external_table_p2 => 
external_table_p0}/tvf/test_path_partition_keys/dt2=two/dt1=cyw/b.csv (100%)
 rename regression-test/data/{external_table_p2 => 
external_table_p0}/tvf/test_path_partition_keys/dt2=two/dt1=hello/c.csv (100%)
 rename regression-test/data/{external_table_p2 => 
external_table_p0}/tvf/test_tvf_p2.out (100%)
 rename regression-test/suites/{external_table_p2 => 
external_table_p0}/tvf/test_hdfs_tvf_compression.groovy (92%)
 rename regression-test/suites/{external_table_p2 => 
external_table_p0}/tvf/test_path_partition_keys.groovy (95%)
 rename regression-test/suites/{external_table_p2 => 
external_table_p0}/tvf/test_tvf_p2.groovy (93%)


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) branch branch-2.1 updated: [test](tvf) move p2 tvf tests from p2 to p0 (#37081) (#37152)

2024-07-02 Thread morningman
This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
 new b445c783ebd [test](tvf) move p2 tvf tests from p2 to p0 (#37081) 
(#37152)
b445c783ebd is described below

commit b445c783ebda9fea892b2da10a8727dc072cb536
Author: Tiewei Fang <43782773+bepppo...@users.noreply.github.com>
AuthorDate: Tue Jul 2 22:38:22 2024 +0800

[test](tvf) move p2 tvf tests from p2 to p0 (#37081) (#37152)

bp: #37081
---
 .../data/external_table_p0/tvf/test_tvf_view.out   | 28 ++
 .../external_table_p2/tvf/test_tvf_view_p2.out | 28 --
 .../tvf/test_tvf_view.groovy}  | 12 +-
 .../tvf/test_tvf_view_count.groovy}| 10 
 4 files changed, 39 insertions(+), 39 deletions(-)

diff --git a/regression-test/data/external_table_p0/tvf/test_tvf_view.out 
b/regression-test/data/external_table_p0/tvf/test_tvf_view.out
new file mode 100644
index 000..ddf5113bbf1
--- /dev/null
+++ b/regression-test/data/external_table_p0/tvf/test_tvf_view.out
@@ -0,0 +1,28 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !1 --
+100490
+
+-- !2 --
+1  goldenrod lavender spring chocolate laceManufacturer#1  
Brand#13PROMO BURNISHED COPPER  7   JUMBO PKG   901.00  ly. 
slyly ironi
+2  blush thistle blue yellow saddleManufacturer#1  Brand#13
LARGE BRUSHED BRASS 1   LG CASE 902.00  lar accounts amo
+3  spring green yellow purple cornsilk Manufacturer#4  Brand#42
STANDARD POLISHED BRASS 21  WRAP CASE   903.00  egular deposits hag
+4  cornflower chocolate smoke green pink   Manufacturer#3  Brand#34
SMALL PLATED BRASS  14  MED DRUM904.00  p furiously r
+5  forest brown coral puff cream   Manufacturer#3  Brand#32
STANDARD POLISHED TIN   15  SM PKG  905.00   wake carefully 
+6  bisque cornflower lawn forest magenta   Manufacturer#2  Brand#24
PROMO PLATED STEEL  4   MED BAG 906.00  sual a
+7  moccasin green thistle khaki floral Manufacturer#1  Brand#11
SMALL PLATED COPPER 45  SM BAG  907.00  lyly. ex
+8  misty lace thistle snow royal   Manufacturer#4  Brand#44PROMO 
BURNISHED TIN 41  LG DRUM 908.00  eposi
+9  thistle dim navajo dark gainsboro   Manufacturer#4  Brand#43
SMALL BURNISHED STEEL   12  WRAP CASE   909.00  ironic foxe
+10 linen pink saddle puff powder   Manufacturer#5  Brand#54LARGE 
BURNISHED STEEL   44  LG CAN  910.01  ithely final deposit
+
+-- !3 --
+1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+
diff --git a/regression-test/data/external_table_p2/tvf/test_tvf_view_p2.out 
b/regression-test/data/external_table_p2/tvf/test_tvf_view_p2.out
deleted file mode 100644
index 02304fb57ab..000
--- a/regression-test/data/external_table_p2/tvf/test_tvf_view_p2.out
+++ /dev/null
@@ -1,28 +0,0 @@
--- This file is automatically generated. You should know what you did if you 
want to edit this
--- !1 --
-852910
-
--- !2 --
-199147091  plum blush violet orange bisque Manufacturer#5  Brand#51
MEDIUM ANODIZED NICKEL  28  SM DRUM 1128.14 nding, final decoy
-199147092  brown tan chocolate moccasin peru   Manufacturer#4  
Brand#44STANDARD BRUSHED COPPER 40  JUMBO PKG   1129.14 ully 
even acc
-199147093  white sandy burlywood orange powder Manufacturer#2  
Brand#23MEDIUM PLATED COPPER15  MED PACK1130.14 
furiously special
-199147094  cyan almond olive steel navajo  Manufacturer#1  Brand#15
ECONOMY BRUSHED STEEL   12  WRAP PACK   1131.14 dolites.
-199147095  linen moccasin snow deep dimManufacturer#2  Brand#22
STANDARD POLISHED TIN   37  LG CASE 1132.14  furious
-199147096  dim violet ivory cream drab Manufacturer#4  Brand#44
MEDIUM ANODIZED COPPER  20  JUMBO CAN   1133.14 ions. sometime
-199147097  steel khaki smoke beige sienna  Manufacturer#2  Brand#21
STANDARD BRUSHED BRASS  36  WRAP CASE   1134.14 und the blithely iron
-199147098  cornsilk red brown cyan moccasinManufacturer#4  
Brand#43MEDIUM ANODIZED TIN 12  SM BOX  1135.14 hely across the
-199147099  slate wheat sienna almond springManufacturer#2  
Brand#25LARGE BURNISHED TIN 1   SM CAN  1136.14 uriously ironic 
packag
-199147100  orange gainsboro chocolate ivory grey   Manufacturer#4  
Brand#45PROMO POLISHED BRASS42  MED DRUM1137.15 sual req
-
--- !3 --
-199147091
-199147092
-199147093
-199147094
-199147095
-199147096
-199147097
-199147098
-199147099
-199147100
-
diff --git 
a/regression-test/suites/extern

(doris) branch branch-2.1 updated: [test](migrate) move 2 cases from p2 to p0 for 2.1 (#37139)

2024-07-02 Thread morningman
This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
 new e7e1e967cfc [test](migrate) move 2 cases from p2 to p0 for 2.1 (#37139)
e7e1e967cfc is described below

commit e7e1e967cfcdbe03bd5fe20344e38b7c22d3e48d
Author: wuwenchi 
AuthorDate: Tue Jul 2 22:50:53 2024 +0800

[test](migrate) move 2 cases from p2 to p0 for 2.1 (#37139)

pick #37004
---
 .../docker-compose/hive/hive-2x.yaml.tpl   |   2 +-
 .../docker-compose/hive/hive-3x.yaml.tpl   |   2 +-
 .../hive_textfile_array_all_types/create_table.hql |  27 ++
 .../hive_textfile_array_all_types/data.tar.gz  | Bin 0 -> 625 bytes
 .../hive_textfile_array_all_types/run.sh   |  13 +
 .../hive_textfile_array_delimiter/create_table.hql |  32 ++
 .../hive_textfile_array_delimiter/data.tar.gz  | Bin 0 -> 690 bytes
 .../hive_textfile_array_delimiter/run.sh   |  13 +
 .../hive_textfile_nestedarray/create_table.hql |  16 +
 .../hive_textfile_nestedarray/data.tar.gz  | Bin 0 -> 280 bytes
 .../multi_catalog/hive_textfile_nestedarray/run.sh |  13 +
 .../multi_catalog/logs1_parquet/create_table.hql   |  39 +++
 .../data/multi_catalog/logs1_parquet/run.sh|  21 ++
 .../multi_catalog/one_partition/create_table.hql   |  22 ++
 .../data/multi_catalog/one_partition/data.tar.gz   | Bin 0 -> 296 bytes
 .../data/multi_catalog/one_partition/run.sh|  13 +
 .../test_csv_format_error/create_table.hql |  68 
 .../test_csv_format_error/data.tar.gz  | Bin 0 -> 151583 bytes
 .../multi_catalog/test_csv_format_error/run.sh |  13 +
 .../test_date_string_partition/create_table.hql|  25 ++
 .../test_date_string_partition/data.tar.gz | Bin 0 -> 353 bytes
 .../test_date_string_partition/run.sh  |  13 +
 .../multi_catalog/two_partition/create_table.hql   |  25 ++
 .../data/multi_catalog/two_partition/data.tar.gz   | Bin 0 -> 375 bytes
 .../data/multi_catalog/two_partition/run.sh|  13 +
 .../data/statistics/statistics/create_table.hql|  33 ++
 .../scripts/data/statistics/statistics/data.tar.gz | Bin 0 -> 3956 bytes
 .../hive/scripts/data/statistics/statistics/run.sh |  13 +
 .../data/tpch_1000_parquet/part/create_table.hql   |  24 ++
 .../scripts/data/tpch_1000_parquet/part/run.sh |  22 ++
 .../hive/test_hive_default_partition.out   | 174 +++
 .../hive/test_hive_to_array.out|  21 ++
 .../hive/test_text_garbled_file.out| Bin 296830 -> 593565 bytes
 .../hive/test_hive_default_partition.groovy|  17 +-
 .../hive/test_hive_statistic.groovy| 344 +
 .../hive/test_hive_statistic_timeout.groovy|  23 +-
 .../hive/test_hive_to_array.groovy |  17 +-
 .../hive/test_text_garbled_file.groovy |  47 +++
 .../hive/test_hive_statistic.groovy| 338 
 .../hive/test_text_garbled_file.groovy |  46 ---
 40 files changed, 1085 insertions(+), 404 deletions(-)

diff --git a/docker/thirdparties/docker-compose/hive/hive-2x.yaml.tpl 
b/docker/thirdparties/docker-compose/hive/hive-2x.yaml.tpl
index ca0fe2e9ddb..de19ed9de18 100644
--- a/docker/thirdparties/docker-compose/hive/hive-2x.yaml.tpl
+++ b/docker/thirdparties/docker-compose/hive/hive-2x.yaml.tpl
@@ -101,7 +101,7 @@ services:
   - "${PG_PORT}:5432"
 healthcheck:
   test: ["CMD-SHELL", "pg_isready -U postgres"]
-  interval: 5s
+  interval: 10s
   timeout: 60s
   retries: 120
 
diff --git a/docker/thirdparties/docker-compose/hive/hive-3x.yaml.tpl 
b/docker/thirdparties/docker-compose/hive/hive-3x.yaml.tpl
index 09d150c17b2..9004f28b61a 100644
--- a/docker/thirdparties/docker-compose/hive/hive-3x.yaml.tpl
+++ b/docker/thirdparties/docker-compose/hive/hive-3x.yaml.tpl
@@ -101,7 +101,7 @@ services:
   - "${PG_PORT}:5432"
 healthcheck:
   test: ["CMD-SHELL", "pg_isready -U postgres"]
-  interval: 5s
+  interval: 10s
   timeout: 60s
   retries: 120
 
diff --git 
a/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_array_all_types/create_table.hql
 
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_array_all_types/create_table.hql
new file mode 100644
index 000..6b700396838
--- /dev/null
+++ 
b/docker/thirdparties/docker-compose/hive/scripts/data/multi_catalog/hive_textfile_array_all_types/create_table.hql
@@ -0,0 +1,27 @@
+create database if not exists multi_catalog;
+use multi_catalog;
+
+CREATE TABLE IF NOT EXISTS `hive_textfile_array_all_types`(
+  `col1` array,
+  `col2` array,
+  `col3` array,
+  `col4` array,
+  `col5` array,
+  `col6` array,
+  `col7` array,
+  `col8` array,
+  `col9` array,
+  `col10` array,
+  `col11` array

(doris) branch branch-2.1 updated: [bugfix](testcase)add java error log output for 2.1 (#37128)

2024-07-02 Thread morningman
This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
 new 64a79cc03a4 [bugfix](testcase)add java error log output for 2.1 
(#37128)
64a79cc03a4 is described below

commit 64a79cc03a42fec410c80822ba09e024e0d5016c
Author: wuwenchi 
AuthorDate: Tue Jul 2 22:53:38 2024 +0800

[bugfix](testcase)add java error log output for 2.1 (#37128)

pick #35998
---
 .../suites/connector_p0/spark_connector/spark_connector.groovy | 10 --
 .../connector_p0/spark_connector/spark_connector_arrow.groovy  | 10 --
 .../spark_connector/spark_connector_read_type.groovy   | 10 --
 3 files changed, 24 insertions(+), 6 deletions(-)

diff --git 
a/regression-test/suites/connector_p0/spark_connector/spark_connector.groovy 
b/regression-test/suites/connector_p0/spark_connector/spark_connector.groovy
index ecd4e6dfc14..2bd618fcc3c 100644
--- a/regression-test/suites/connector_p0/spark_connector/spark_connector.groovy
+++ b/regression-test/suites/connector_p0/spark_connector/spark_connector.groovy
@@ -28,7 +28,13 @@ suite("spark_connector", "connector") {
 logger.info("finish download spark doris demo ...")
 def run_cmd = "java -jar spark-doris-demo.jar 
$context.config.feHttpAddress $context.config.feHttpUser 
regression_test_connector_p0_spark_connector.$tableName"
 logger.info("run_cmd : $run_cmd")
-def run_spark_jar = run_cmd.execute().getText()
-logger.info("result: $run_spark_jar")
+def proc = run_cmd.execute()
+def sout = new StringBuilder()
+def serr = new StringBuilder()
+proc.consumeProcessOutput(sout, serr)
+proc.waitForOrKill(1200_000)
+if (proc.exitValue() != 0) {
+  logger.warn("failed to execute jar: code=${proc.exitValue()}, " + 
"output: ${sout.toString()}, error: ${serr.toString()}")
+}
 qt_select """ select * from $tableName order by order_id"""
 }
diff --git 
a/regression-test/suites/connector_p0/spark_connector/spark_connector_arrow.groovy
 
b/regression-test/suites/connector_p0/spark_connector/spark_connector_arrow.groovy
index 1cd2ed31d2e..a5fbc3b2835 100644
--- 
a/regression-test/suites/connector_p0/spark_connector/spark_connector_arrow.groovy
+++ 
b/regression-test/suites/connector_p0/spark_connector/spark_connector_arrow.groovy
@@ -136,8 +136,14 @@ suite("spark_connector_for_arrow", "connector") {
 logger.info("finish download spark doris demo ...")
 def run_cmd = "java -cp ${jar_name} 
org.apache.doris.spark.testcase.TestStreamLoadForArrowType 
$context.config.feHttpAddress $context.config.feHttpUser 
regression_test_connector_p0_spark_connector"
 logger.info("run_cmd : $run_cmd")
-def run_spark_jar = run_cmd.execute().getText()
-logger.info("result: $run_spark_jar")
+def proc = run_cmd.execute()
+def sout = new StringBuilder()
+def serr = new StringBuilder()
+proc.consumeProcessOutput(sout, serr)
+proc.waitForOrKill(1200_000)
+if (proc.exitValue() != 0) {
+  logger.warn("failed to execute jar: code=${proc.exitValue()}, " + 
"output: ${sout.toString()}, error: ${serr.toString()}")
+}
 
 qt_q01 """ select * from spark_connector_primitive """
 qt_q02 """ select * from spark_connector_array """
diff --git 
a/regression-test/suites/connector_p0/spark_connector/spark_connector_read_type.groovy
 
b/regression-test/suites/connector_p0/spark_connector/spark_connector_read_type.groovy
index 32a3ebf68c7..632e5e3d401 100644
--- 
a/regression-test/suites/connector_p0/spark_connector/spark_connector_read_type.groovy
+++ 
b/regression-test/suites/connector_p0/spark_connector/spark_connector_read_type.groovy
@@ -99,8 +99,14 @@ suite("spark_connector_read_type", "connector") {
 logger.info("finish download spark doris demo ...")
 def run_cmd = "java -jar spark-doris-read.jar 
$context.config.feHttpAddress $context.config.feHttpUser 
regression_test_connector_p0_spark_connector.$tableReadName 
regression_test_connector_p0_spark_connector.$tableWriterName"
 logger.info("run_cmd : $run_cmd")
-def run_spark_jar = run_cmd.execute().getText()
-logger.info("result: $run_spark_jar")
+def proc = run_cmd.execute()
+def sout = new StringBuilder()
+def serr = new StringBuilder()
+proc.consumeProcessOutput(sout, serr)
+proc.waitForOrKill(1200_000)
+if (proc.exitValue() != 0) {
+  logger.warn("failed to execute jar: code=${proc.exitValue()}, " + 
"output: ${sout.toString()}, error: ${serr.toString()}")
+}
 
 qt_select """ select * from $tableWriterName order by id"""
 


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) branch branch-2.1 updated: [bugfix](hive)Prevent multiple fs from being generated for 2.1 (#37142)

2024-07-02 Thread morningman
This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
 new b3eaf0e4d2f [bugfix](hive)Prevent multiple fs from being generated for 
2.1 (#37142)
b3eaf0e4d2f is described below

commit b3eaf0e4d2fcc2b6b5a7a05c3095fab6f9086964
Author: wuwenchi 
AuthorDate: Tue Jul 2 22:54:40 2024 +0800

[bugfix](hive)Prevent multiple fs from being generated for 2.1 (#37142)

pick #36954
---
 .../apache/doris/common/security/authentication/HadoopUGI.java | 10 ++
 1 file changed, 10 insertions(+)

diff --git 
a/fe/fe-common/src/main/java/org/apache/doris/common/security/authentication/HadoopUGI.java
 
b/fe/fe-common/src/main/java/org/apache/doris/common/security/authentication/HadoopUGI.java
index 342f86b7125..db8b9093b07 100644
--- 
a/fe/fe-common/src/main/java/org/apache/doris/common/security/authentication/HadoopUGI.java
+++ 
b/fe/fe-common/src/main/java/org/apache/doris/common/security/authentication/HadoopUGI.java
@@ -76,6 +76,16 @@ public class HadoopUGI {
 ((SimpleAuthenticationConfig) 
config).setUsername(hadoopUserName);
 LOG.debug(AuthenticationConfig.HADOOP_USER_NAME + " is unset, 
use default user: hadoop");
 }
+
+try {
+ugi = UserGroupInformation.getLoginUser();
+if (ugi.getUserName().equals(hadoopUserName)) {
+return ugi;
+}
+} catch (IOException e) {
+LOG.warn("A SecurityException occurs with simple, do login 
immediately.", e);
+}
+
 ugi = UserGroupInformation.createRemoteUser(hadoopUserName);
 UserGroupInformation.setLoginUser(ugi);
 LOG.debug("Login by proxy user, hadoop.username: {}", 
hadoopUserName);


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) branch branch-2.1 updated (b3eaf0e4d2f -> 65375b48fcf)

2024-07-02 Thread morningman
This is an automated email from the ASF dual-hosted git repository.

morningman pushed a change to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


from b3eaf0e4d2f [bugfix](hive)Prevent multiple fs from being generated for 
2.1 (#37142)
 add 65375b48fcf [enhance](mtmv)support replace materialized view (#36749) 
(#37147)

No new revisions were added by this update.

Summary of changes:
 .../antlr4/org/apache/doris/nereids/DorisParser.g4 |   1 +
 .../main/java/org/apache/doris/alter/Alter.java|   8 ++
 .../doris/nereids/parser/LogicalPlanBuilder.java   |   6 +
 .../trees/plans/commands/info/AlterMTMVInfo.java   |  10 ++
 .../plans/commands/info/AlterMTMVRenameInfo.java   |   1 +
 .../plans/commands/info/AlterMTMVReplaceInfo.java  | 101 ++
 .../data/mtmv_p0/test_multi_level_rename_mtmv.out  |   4 +
 .../data/mtmv_p0/test_multi_level_replace_mtmv.out |   4 +
 regression-test/data/mtmv_p0/test_replace_mtmv.out |  25 
 .../mtmv_p0/test_multi_level_rename_mtmv.groovy|  88 
 .../mtmv_p0/test_multi_level_replace_mtmv.groovy   |  98 +
 .../suites/mtmv_p0/test_replace_mtmv.groovy| 155 +
 12 files changed, 501 insertions(+)
 create mode 100644 
fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/AlterMTMVReplaceInfo.java
 create mode 100644 
regression-test/data/mtmv_p0/test_multi_level_rename_mtmv.out
 create mode 100644 
regression-test/data/mtmv_p0/test_multi_level_replace_mtmv.out
 create mode 100644 regression-test/data/mtmv_p0/test_replace_mtmv.out
 create mode 100644 
regression-test/suites/mtmv_p0/test_multi_level_rename_mtmv.groovy
 create mode 100644 
regression-test/suites/mtmv_p0/test_multi_level_replace_mtmv.groovy
 create mode 100644 regression-test/suites/mtmv_p0/test_replace_mtmv.groovy


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) branch branch-2.1 updated (65375b48fcf -> 6716f9e7363)

2024-07-02 Thread morningman
This is an automated email from the ASF dual-hosted git repository.

morningman pushed a change to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


from 65375b48fcf [enhance](mtmv)support replace materialized view (#36749) 
(#37147)
 add 6716f9e7363 [fix](mtmv)fix MTMVTask log has null pointer (#36840) 
(#37131)

No new revisions were added by this update.

Summary of changes:
 .../main/java/org/apache/doris/job/task/AbstractTask.java   | 13 +
 .../src/test/java/org/apache/doris/mtmv/MTMVTest.java   |  4 ++--
 2 files changed, 15 insertions(+), 2 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) branch branch-2.1 updated (6716f9e7363 -> 177764647df)

2024-07-02 Thread morningman
This is an automated email from the ASF dual-hosted git repository.

morningman pushed a change to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


from 6716f9e7363 [fix](mtmv)fix MTMVTask log has null pointer (#36840) 
(#37131)
 add 177764647df [fix](mtmv)fix mtmv dead lock (#37009) (#37133)

No new revisions were added by this update.

Summary of changes:
 fe/fe-core/src/main/java/org/apache/doris/alter/Alter.java | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) branch branch-2.0 updated: [fix](auth)ldap set passwd need forward to master (#36436) (#37130)

2024-07-02 Thread morningman
This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
 new 5fefc5785c6 [fix](auth)ldap set passwd need forward to master (#36436) 
(#37130)
5fefc5785c6 is described below

commit 5fefc5785c68a55cfd1dae2e142c3c9a507475d4
Author: zhangdong <493738...@qq.com>
AuthorDate: Tue Jul 2 23:02:53 2024 +0800

[fix](auth)ldap set passwd need forward to master (#36436) (#37130)

pick: https://github.com/apache/doris/pull/36436
---
 fe/fe-core/src/main/java/org/apache/doris/analysis/SetStmt.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/SetStmt.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/SetStmt.java
index 5e0d0f9105c..3c6d938026f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/SetStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/SetStmt.java
@@ -91,7 +91,7 @@ public class SetStmt extends StatementBase {
 public RedirectStatus getRedirectStatus() {
 if (setVars != null) {
 for (SetVar var : setVars) {
-if (var instanceof SetPassVar) {
+if (var instanceof SetPassVar || var instanceof 
SetLdapPassVar) {
 return RedirectStatus.FORWARD_WITH_SYNC;
 } else if (var.getType() == SetType.GLOBAL) {
 return RedirectStatus.FORWARD_WITH_SYNC;


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) branch branch-2.1 updated (177764647df -> 0a1abf10d6b)

2024-07-02 Thread morningman
This is an automated email from the ASF dual-hosted git repository.

morningman pushed a change to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


from 177764647df [fix](mtmv)fix mtmv dead lock (#37009) (#37133)
 add 0a1abf10d6b [fix](auth)support check priv when tvf use resource 
(#36928) (#37132)

No new revisions were added by this update.

Summary of changes:
 .../org/apache/doris/analysis/AlterPolicyStmt.java |  1 +
 .../apache/doris/analysis/CreatePolicyStmt.java|  1 +
 .../java/org/apache/doris/common/ErrorCode.java|  4 ++
 .../nereids/rules/rewrite/CheckPrivileges.java |  9 
 .../functions/table/TableValuedFunction.java   |  5 +++
 .../ExternalFileTableValuedFunction.java   | 17 
 .../doris/tablefunction/TableValuedFunctionIf.java |  5 +++
 .../tvf/test_s3_tvf_with_resource.groovy   | 48 ++
 8 files changed, 90 insertions(+)


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) branch branch-2.1 updated: [enhance](mtmv) not allow modify data of MTMV (#35870) (#37129)

2024-07-02 Thread morningman
This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
 new fbca3196c50 [enhance](mtmv) not allow modify data of MTMV (#35870) 
(#37129)
fbca3196c50 is described below

commit fbca3196c501a13351b5eb79118c47c2648ec2e7
Author: zhangdong <493738...@qq.com>
AuthorDate: Tue Jul 2 23:06:45 2024 +0800

[enhance](mtmv) not allow modify data of MTMV (#35870) (#37129)

pick: https://github.com/apache/doris/pull/35870
---
 .../doris/analysis/InsertOverwriteTableStmt.java   | 13 
 .../apache/doris/datasource/InternalCatalog.java   |  4 ++
 .../java/org/apache/doris/mtmv/MTMVPlanUtil.java   |  1 +
 .../main/java/org/apache/doris/mtmv/MTMVUtil.java  | 25 +++
 .../insert/InsertOverwriteTableCommand.java|  5 ++
 .../java/org/apache/doris/qe/SessionVariable.java  | 16 +
 .../doris/transaction/DatabaseTransactionMgr.java  |  2 +
 .../suites/mtmv_p0/test_modify_data_mtmv.groovy| 83 ++
 8 files changed, 149 insertions(+)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/InsertOverwriteTableStmt.java
 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/InsertOverwriteTableStmt.java
index 24713eed5c2..cfe030c428a 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/InsertOverwriteTableStmt.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/InsertOverwriteTableStmt.java
@@ -18,10 +18,14 @@
 package org.apache.doris.analysis;
 
 import org.apache.doris.catalog.Env;
+import org.apache.doris.catalog.MTMV;
+import org.apache.doris.catalog.TableIf;
+import org.apache.doris.common.DdlException;
 import org.apache.doris.common.ErrorCode;
 import org.apache.doris.common.ErrorReport;
 import org.apache.doris.common.UserException;
 import org.apache.doris.common.util.InternalDatabaseUtil;
+import org.apache.doris.mtmv.MTMVUtil;
 import org.apache.doris.mysql.privilege.PrivPredicate;
 import org.apache.doris.qe.ConnectContext;
 
@@ -62,6 +66,10 @@ public class InsertOverwriteTableStmt extends DdlStmt {
 return target.getTblName().getTbl();
 }
 
+public String getCtl() {
+return target.getTblName().getCtl();
+}
+
 public QueryStmt getQueryStmt() {
 return source.getQueryStmt();
 }
@@ -84,6 +92,11 @@ public class InsertOverwriteTableStmt extends DdlStmt {
 public void analyze(Analyzer analyzer) throws UserException {
 target.getTblName().analyze(analyzer);
 InternalDatabaseUtil.checkDatabase(getDb(), ConnectContext.get());
+TableIf tableIf = 
Env.getCurrentEnv().getCatalogMgr().getCatalogOrAnalysisException(getCtl())
+
.getDbOrAnalysisException(getDb()).getTableOrAnalysisException(getTbl());
+if (tableIf instanceof MTMV && 
!MTMVUtil.allowModifyMTMVData(ConnectContext.get())) {
+throw new DdlException("Not allowed to perform current operation 
on async materialized view");
+}
 if (!Env.getCurrentEnv().getAccessManager()
 .checkTblPriv(ConnectContext.get(), 
target.getTblName().getCtl(), getDb(), getTbl(),
 PrivPredicate.LOAD)) {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java
index b6c0c73eae2..e10f13a7ea6 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java
@@ -136,6 +136,7 @@ import org.apache.doris.common.util.TimeUtils;
 import org.apache.doris.common.util.Util;
 import org.apache.doris.datasource.es.EsRepository;
 import org.apache.doris.event.DropPartitionEvent;
+import org.apache.doris.mtmv.MTMVUtil;
 import org.apache.doris.nereids.trees.plans.commands.info.DropMTMVInfo;
 import org.apache.doris.nereids.trees.plans.commands.info.TableNameInfo;
 import org.apache.doris.persist.AlterDatabasePropertyInfo;
@@ -3090,6 +3091,9 @@ public class InternalCatalog implements 
CatalogIf {
 OlapTable olapTable = db.getOlapTableOrDdlException(dbTbl.getTbl());
 
 long rowsToTruncate = 0;
+if (olapTable instanceof MTMV && 
!MTMVUtil.allowModifyMTMVData(ConnectContext.get())) {
+throw new DdlException("Not allowed to perform current operation 
on async materialized view");
+}
 
 BinlogConfig binlogConfig;
 olapTable.readLock();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVPlanUtil.java 
b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVPlanUtil.java
index e74ca1f8aff..e1ad35aa758 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVPlanUtil.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVPlanUtil.java
@@ -58,6 +58,7 @@ public class MTMVPlanUtil {
 ctx.setTh

(doris) branch branch-2.1 updated: [fix](mtmv)mtmv support default key (#36221) (#36601)

2024-07-02 Thread morningman
This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
 new 5bb6642b867 [fix](mtmv)mtmv support default key (#36221) (#36601)
5bb6642b867 is described below

commit 5bb6642b867ef74851d53fbd13772240155c38bd
Author: zhangdong <493738...@qq.com>
AuthorDate: Tue Jul 2 23:08:38 2024 +0800

[fix](mtmv)mtmv support default key (#36221) (#36601)

pick : https://github.com/apache/doris/pull/36221
---
 .../trees/plans/commands/info/CreateMTMVInfo.java  | 42 ++
 regression-test/data/mtmv_p0/test_build_mtmv.out   |  2 +-
 .../suites/mtmv_p0/test_build_mtmv.groovy  |  5 ---
 3 files changed, 43 insertions(+), 6 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateMTMVInfo.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateMTMVInfo.java
index ffacc401e27..bad9acd7468 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateMTMVInfo.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateMTMVInfo.java
@@ -30,8 +30,10 @@ import org.apache.doris.catalog.Env;
 import org.apache.doris.catalog.KeysType;
 import org.apache.doris.catalog.PartitionType;
 import org.apache.doris.catalog.TableIf;
+import org.apache.doris.catalog.Type;
 import org.apache.doris.catalog.View;
 import org.apache.doris.common.ErrorCode;
+import org.apache.doris.common.FeConstants;
 import org.apache.doris.common.FeNameFormat;
 import org.apache.doris.common.util.DynamicPartitionUtil;
 import org.apache.doris.common.util.PropertyAnalyzer;
@@ -63,6 +65,7 @@ import 
org.apache.doris.nereids.trees.plans.logical.LogicalSink;
 import org.apache.doris.nereids.trees.plans.logical.LogicalSubQueryAlias;
 import 
org.apache.doris.nereids.trees.plans.visitor.NondeterministicFunctionCollector;
 import org.apache.doris.nereids.types.AggStateType;
+import org.apache.doris.nereids.types.DataType;
 import org.apache.doris.nereids.util.Utils;
 import org.apache.doris.qe.ConnectContext;
 import org.apache.doris.qe.SessionVariable;
@@ -218,11 +221,50 @@ public class CreateMTMVInfo {
 }
 getRelation(planner);
 getColumns(plan);
+analyzeKeys();
 this.mvPartitionInfo = mvPartitionDefinition
 .analyzeAndTransferToMTMVPartitionInfo(planner, ctx, 
logicalQuery);
 this.partitionDesc = generatePartitionDesc(ctx);
 }
 
+private void analyzeKeys() {
+boolean enableDuplicateWithoutKeysByDefault = false;
+try {
+if (properties != null) {
+enableDuplicateWithoutKeysByDefault =
+
PropertyAnalyzer.analyzeEnableDuplicateWithoutKeysByDefault(properties);
+}
+} catch (Exception e) {
+throw new AnalysisException(e.getMessage(), e.getCause());
+}
+if (keys.isEmpty() && !enableDuplicateWithoutKeysByDefault) {
+keys = Lists.newArrayList();
+int keyLength = 0;
+for (ColumnDefinition column : columns) {
+DataType type = column.getType();
+Type catalogType = column.getType().toCatalogDataType();
+keyLength += catalogType.getIndexSize();
+if (keys.size() >= FeConstants.shortkey_max_column_count
+|| keyLength > FeConstants.shortkey_maxsize_bytes) {
+if (keys.isEmpty() && type.isStringLikeType()) {
+keys.add(column.getName());
+}
+break;
+}
+if (type.isFloatLikeType() || type.isStringType() || 
type.isJsonType()
+|| catalogType.isComplexType() || type.isBitmapType() 
|| type.isHllType()
+|| type.isQuantileStateType() || type.isJsonType() || 
type.isStructType()
+|| column.getAggType() != null) {
+break;
+}
+keys.add(column.getName());
+if (type.isVarcharType()) {
+break;
+}
+}
+}
+}
+
 private void getRelation(NereidsPlanner planner) {
 // Should not make table without data to empty relation when analyze 
the related table,
 // so add disable rules
diff --git a/regression-test/data/mtmv_p0/test_build_mtmv.out 
b/regression-test/data/mtmv_p0/test_build_mtmv.out
index db69c393748..5e5632511f4 100644
--- a/regression-test/data/mtmv_p0/test_build_mtmv.out
+++ b/regression-test/data/mtmv_p0/test_build_mtmv.out
@@ -61,7 +61,7 @@ zhangsang 200
 11 111
 
 -- !desc_mv --
-field_1VARCHAR(16) No  false   \N  NONE
+field_1VARCHAR(16

(doris) branch branch-2.1 updated (5bb6642b867 -> 03942f23f60)

2024-07-02 Thread morningman
This is an automated email from the ASF dual-hosted git repository.

morningman pushed a change to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


from 5bb6642b867 [fix](mtmv)mtmv support default key (#36221) (#36601)
 add 03942f23f60 [fix](mtmv)fix when related table drop partition,mv 
partition is sync… (#36602)

No new revisions were added by this update.

Summary of changes:
 .../java/org/apache/doris/mtmv/MTMVPartitionUtil.java  |  5 +
 .../org/apache/doris/mtmv/MTMVRefreshSnapshot.java |  9 +
 .../org/apache/doris/mtmv/MTMVPartitionUtilTest.java   | 18 ++
 3 files changed, 32 insertions(+)


-
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org



(doris) branch master updated: [test](migrate) move 2 cases from p2 to p0 (#36935)

2024-07-02 Thread morningman
This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
 new b3b035e3227 [test](migrate) move 2 cases from p2 to p0 (#36935)
b3b035e3227 is described below

commit b3b035e3227b6766a5abf6bbf8b29b9c9b27a195
Author: zhangdong <493738...@qq.com>
AuthorDate: Tue Jul 2 23:25:35 2024 +0800

[test](migrate) move 2 cases from p2 to p0 (#36935)

1. Extend the exploration time of hive docker
2. move case
- test_hive_remove_partition
- test_hive_statistics_from_hms
- test_hive_partition_column_analyze
- test_hive_parquet_alter_column
- test_hive_analyze_db
---
 .../docker-compose/hive/hive-2x.yaml.tpl   |2 +-
 .../docker-compose/hive/hive-3x.yaml.tpl   |2 +-
 .../create_table.hql   |   30 +
 .../parquet_alter_column_to_bigint/data.tar.gz |  Bin 0 -> 3714 bytes
 .../parquet_alter_column_to_bigint/run.sh  |   12 +
 .../create_table.hql   |   30 +
 .../parquet_alter_column_to_boolean/data.tar.gz|  Bin 0 -> 3718 bytes
 .../parquet_alter_column_to_boolean/run.sh |   12 +
 .../parquet_alter_column_to_char/create_table.hql  |   30 +
 .../parquet_alter_column_to_char/data.tar.gz   |  Bin 0 -> 3717 bytes
 .../parquet_alter_column_to_char/run.sh|   12 +
 .../parquet_alter_column_to_date/create_table.hql  |   30 +
 .../parquet_alter_column_to_date/data.tar.gz   |  Bin 0 -> 3718 bytes
 .../parquet_alter_column_to_date/run.sh|   12 +
 .../create_table.hql   |   30 +
 .../parquet_alter_column_to_decimal/data.tar.gz|  Bin 0 -> 3718 bytes
 .../parquet_alter_column_to_decimal/run.sh |   12 +
 .../create_table.hql   |   30 +
 .../parquet_alter_column_to_double/data.tar.gz |  Bin 0 -> 3713 bytes
 .../parquet_alter_column_to_double/run.sh  |   12 +
 .../parquet_alter_column_to_float/create_table.hql |   30 +
 .../parquet_alter_column_to_float/data.tar.gz  |  Bin 0 -> 3716 bytes
 .../parquet_alter_column_to_float/run.sh   |   12 +
 .../parquet_alter_column_to_int/create_table.hql   |   30 +
 .../parquet_alter_column_to_int/data.tar.gz|  Bin 0 -> 3715 bytes
 .../parquet_alter_column_to_int/run.sh |   12 +
 .../create_table.hql   |   30 +
 .../parquet_alter_column_to_smallint/data.tar.gz   |  Bin 0 -> 3718 bytes
 .../parquet_alter_column_to_smallint/run.sh|   12 +
 .../create_table.hql   |   30 +
 .../parquet_alter_column_to_string/data.tar.gz |  Bin 0 -> 3716 bytes
 .../parquet_alter_column_to_string/run.sh  |   12 +
 .../create_table.hql   |   30 +
 .../parquet_alter_column_to_timestamp/data.tar.gz  |  Bin 0 -> 3716 bytes
 .../parquet_alter_column_to_timestamp/run.sh   |   12 +
 .../create_table.hql   |   30 +
 .../parquet_alter_column_to_tinyint/data.tar.gz|  Bin 0 -> 3716 bytes
 .../parquet_alter_column_to_tinyint/run.sh |   12 +
 .../create_table.hql   |   30 +
 .../parquet_alter_column_to_varchar/data.tar.gz|  Bin 0 -> 3717 bytes
 .../parquet_alter_column_to_varchar/run.sh |   12 +
 .../partition_manual_remove/create_table.hql   |   24 +
 .../partition_manual_remove/data.tar.gz|  Bin 0 -> 1026 bytes
 .../multi_catalog/partition_manual_remove/run.sh   |   13 +
 .../bigint_partition/create_table.hql  |   21 +
 .../partition_type/bigint_partition/data.tar.gz|  Bin 0 -> 258064 bytes
 .../data/partition_type/bigint_partition/run.sh|   12 +
 .../partition_type/char_partition/create_table.hql |   21 +
 .../data/partition_type/char_partition/data.tar.gz |  Bin 0 -> 258066 bytes
 .../data/partition_type/char_partition/run.sh  |   12 +
 .../partition_type/date_partition/create_table.hql |   21 +
 .../data/partition_type/date_partition/data.tar.gz |  Bin 0 -> 257868 bytes
 .../data/partition_type/date_partition/run.sh  |   12 +
 .../decimal_partition/create_table.hql |   21 +
 .../partition_type/decimal_partition/data.tar.gz   |  Bin 0 -> 258769 bytes
 .../data/partition_type/decimal_partition/run.sh   |   12 +
 .../double_partition/create_table.hql  |   21 +
 .../partition_type/double_partition/data.tar.gz|  Bin 0 -> 258850 bytes
 .../data/partition_type/double_partition/run.sh|   12 +
 .../float_partition/create_table.hql   |   21 +
 .../partition_type/float_partition/data.tar.gz |  Bin 0 -> 258843 bytes
 .../data/partition_type/float_partition/run.sh |   12 +
 .../partition_type/int_partition/create_table.hql  |   21 +
 .../data/partition_type/int_partition/data.tar.gz  |  Bin 0 -> 257936 bytes
 

  1   2   3   4   5   6   7   8   >