This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push: new 6be7a62 [Config] Add ignore config to determine whether to continue to start be when load tablet from header failed. (#3632) 6be7a62 is described below commit 6be7a6232fd52cba9c9ac32e054d3ffc76210eb0 Author: yangzhg <780531...@qq.com> AuthorDate: Wed May 20 09:40:50 2020 +0800 [Config] Add ignore config to determine whether to continue to start be when load tablet from header failed. (#3632) Add config ignore_load_tablet_failure to determine whether to continue to start be when load tablet from header failed. --- be/src/common/config.h | 23 ++++++++++++---------- be/src/olap/data_dir.cpp | 7 ++++++- docs/en/administrator-guide/config/be_config.md | 5 +++++ docs/zh-CN/administrator-guide/config/be_config.md | 5 +++++ thirdparty/vars.sh | 2 +- 5 files changed, 30 insertions(+), 12 deletions(-) diff --git a/be/src/common/config.h b/be/src/common/config.h index ce6a360..b93b72c 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -34,7 +34,7 @@ namespace config { // Note that there should at most one ip match this list. // this is a list in semicolon-delimited format, in CIDR notation, e.g. 10.10.10.0/24 // If no ip match this rule, will choose one randomly. - CONF_String(priority_networks, "") + CONF_String(priority_networks, ""); //// //// tcmalloc gc parameter @@ -221,7 +221,7 @@ namespace config { // 仅仅是建议值,当磁盘空间不足时,trash下的文件保存期可不遵守这个参数 CONF_mInt32(trash_file_expire_time_sec, "259200"); // check row nums for BE/CE and schema change. true is open, false is closed. - CONF_mBool(row_nums_check, "true") + CONF_mBool(row_nums_check, "true"); //file descriptors cache, by default, cache 32768 descriptors CONF_Int32(file_descriptor_cache_capacity, "32768"); // minimum file descriptor number @@ -259,7 +259,7 @@ namespace config { // if compaction of a tablet failed, this tablet should not be chosen to // compaction until this interval passes. - CONF_mInt64(min_compaction_failure_interval_sec, "600") // 10 min + CONF_mInt64(min_compaction_failure_interval_sec, "600"); // 10 min // Too many compaction tasks may run out of memory. // This config is to limit the max concurrency of running compaction tasks. // -1 means no limit, and the max concurrency will be: @@ -345,11 +345,11 @@ namespace config { CONF_Bool(enable_quadratic_probing, "false"); // for pprof - CONF_String(pprof_profile_dir, "${DORIS_HOME}/log") + CONF_String(pprof_profile_dir, "${DORIS_HOME}/log"); // for partition // CONF_Bool(enable_partitioned_hash_join, "false") - CONF_Bool(enable_partitioned_aggregation, "true") + CONF_Bool(enable_partitioned_aggregation, "true"); // to forward compatibility, will be removed later CONF_mBool(enable_token_check, "true"); @@ -478,7 +478,7 @@ namespace config { // The percent of max used capacity of a data dir CONF_mInt32(storage_flood_stage_usage_percent, "95"); // 95% // The min bytes that should be left of a data dir - CONF_mInt64(storage_flood_stage_left_capacity_bytes, "1073741824") // 1GB + CONF_mInt64(storage_flood_stage_left_capacity_bytes, "1073741824"); // 1GB // number of thread for flushing memtable per store CONF_Int32(flush_thread_num_per_store, "2"); @@ -491,9 +491,9 @@ namespace config { CONF_String(default_rowset_type, "ALPHA"); // Maximum size of a single message body in all protocols - CONF_Int64(brpc_max_body_size, "209715200") + CONF_Int64(brpc_max_body_size, "209715200"); // Max unwritten bytes in each socket, if the limit is reached, Socket.Write fails with EOVERCROWDED - CONF_Int64(brpc_socket_max_unwritten_bytes, "67108864") + CONF_Int64(brpc_socket_max_unwritten_bytes, "67108864"); // max number of txns for every txn_partition_map in txn manager // this is a self protection to avoid too many txns saving in manager @@ -503,7 +503,7 @@ namespace config { // this is a an enhancement for better performance to manage tablet CONF_Int32(tablet_map_shard_size, "1"); - CONF_String(plugin_path, "${DORIS_HOME}/plugin") + CONF_String(plugin_path, "${DORIS_HOME}/plugin"); // txn_map_lock shard size, the value is 2^n, n=0,1,2,3,4 // this is a an enhancement for better performance to manage txn @@ -511,7 +511,10 @@ namespace config { // txn_lock shard size, the value is 2^n, n=0,1,2,3,4 // this is a an enhancement for better performance to commit and publish txn - CONF_Int32(txn_shard_size, "1024") + CONF_Int32(txn_shard_size, "1024"); + + // Whether to continue to start be when load tablet from header failed. + CONF_Bool(ignore_load_tablet_failure, "false"); } // namespace config diff --git a/be/src/olap/data_dir.cpp b/be/src/olap/data_dir.cpp index fbcab3b..3d32302 100644 --- a/be/src/olap/data_dir.cpp +++ b/be/src/olap/data_dir.cpp @@ -694,19 +694,24 @@ OLAPStatus DataDir::load() { // create tablet from tablet meta and add it to tablet mgr LOG(INFO) << "begin loading tablet from meta"; std::set<int64_t> tablet_ids; - auto load_tablet_func = [this, &tablet_ids](int64_t tablet_id, int32_t schema_hash, + std::set<int64_t> failed_tablet_ids; + auto load_tablet_func = [this, &tablet_ids, &failed_tablet_ids](int64_t tablet_id, int32_t schema_hash, const std::string& value) -> bool { OLAPStatus status = _tablet_manager->load_tablet_from_meta(this, tablet_id, schema_hash, value, false, false); if (status != OLAP_SUCCESS) { LOG(WARNING) << "load tablet from header failed. status:" << status << ", tablet=" << tablet_id << "." << schema_hash; + failed_tablet_ids.insert(tablet_id); } else { tablet_ids.insert(tablet_id); } return true; }; OLAPStatus load_tablet_status = TabletMetaManager::traverse_headers(_meta, load_tablet_func); + if (failed_tablet_ids.size() != 0 && !config::ignore_load_tablet_failure) { + LOG(FATAL) << "load tablets from header failed, failed tablets size: " << failed_tablet_ids.size(); + } if (load_tablet_status != OLAP_SUCCESS) { LOG(WARNING) << "there is failure when loading tablet headers, path:" << _path; } else { diff --git a/docs/en/administrator-guide/config/be_config.md b/docs/en/administrator-guide/config/be_config.md index 89fe9fb..b037b1d 100644 --- a/docs/en/administrator-guide/config/be_config.md +++ b/docs/en/administrator-guide/config/be_config.md @@ -398,3 +398,8 @@ Since this is a brpc configuration, users can also modify this parameter directl ### webserver_port ### write_buffer_size + +### ignore_load_tablet_failure +* Type: boolean +* Description: Whether to continue to start be when load tablet from header failed. +* Default: false diff --git a/docs/zh-CN/administrator-guide/config/be_config.md b/docs/zh-CN/administrator-guide/config/be_config.md index 3cc73b6..02dbd1e 100644 --- a/docs/zh-CN/administrator-guide/config/be_config.md +++ b/docs/zh-CN/administrator-guide/config/be_config.md @@ -396,3 +396,8 @@ under the License. ### `webserver_port` ### `write_buffer_size` + +### ignore_load_tablet_failure +* 类型:布尔 +* 描述:用来决定在有tablet 加在失败的情况下是否忽略错误,继续启动be +* 默认值: false \ No newline at end of file diff --git a/thirdparty/vars.sh b/thirdparty/vars.sh index 29c2a7b..36e64f5 100644 --- a/thirdparty/vars.sh +++ b/thirdparty/vars.sh @@ -262,7 +262,7 @@ CROARINGBITMAP_MD5SUM="29602918e6890ffdeed84cb171857046" # ORC ORC_DOWNLOAD="https://github.com/apache/orc/archive/rel/release-1.5.8.tar.gz" ORC_NAME=orc-1.5.8.tar.gz -ORC_SOURCE=orc-1.5.8 +ORC_SOURCE=orc-rel-release-1.5.8 ORC_MD5SUM="49eb9ce94060b26d4bc3595b2f1efb4c" # jemalloc --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org