This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new df30de085f9 [fix](kerberos) fix BE kerberos ccache renew, optimize kerbero options (#29291) df30de085f9 is described below commit df30de085f9d97856d96fb2170e0f81f9bdc5bf2 Author: slothever <18522955+w...@users.noreply.github.com> AuthorDate: Sat Jan 13 00:08:33 2024 +0800 [fix](kerberos) fix BE kerberos ccache renew, optimize kerbero options (#29291) 1. we need remove BE kinit, and use jni login with keytab, because kinit cannot renew TGT for doris in many complex cases. > This pull requet will support new instance from keytab: https://github.com/apache/doris-thirdparty/pull/173, so now we won't need kinit cmd, just login with keytab and principal 2. add `kerberos_ccache_path` to set kerberos credentials cache path manually. 3. add `max_hdfs_file_handle_cache_time_ms` to set hdfs fs handle cache time. --- be/src/common/config.cpp | 4 ++- be/src/common/config.h | 10 +++--- be/src/io/fs/hdfs_file_system.cpp | 17 +++++------ be/src/io/fs/hdfs_file_system.h | 10 ++---- be/src/io/hdfs_builder.cpp | 64 ++++++++++++++------------------------- be/src/io/hdfs_builder.h | 6 ++-- bin/start_be.sh | 28 ++++++++--------- 7 files changed, 59 insertions(+), 80 deletions(-) diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp index 49eb046e7ae..17ac1905dd6 100644 --- a/be/src/common/config.cpp +++ b/be/src/common/config.cpp @@ -1060,6 +1060,7 @@ DEFINE_Bool(enable_feature_binlog, "false"); DEFINE_Bool(enable_set_in_bitmap_value, "false"); DEFINE_Int64(max_hdfs_file_handle_cache_num, "20000"); +DEFINE_Int32(max_hdfs_file_handle_cache_time_sec, "3600"); DEFINE_Int64(max_external_file_meta_cache_num, "20000"); // Apply delete pred in cumu compaction DEFINE_mBool(enable_delete_when_cumu_compaction, "false"); @@ -1068,7 +1069,8 @@ DEFINE_mBool(enable_delete_when_cumu_compaction, "false"); DEFINE_Int32(rocksdb_max_write_buffer_number, "5"); DEFINE_Bool(allow_invalid_decimalv2_literal, "false"); -DEFINE_mInt64(kerberos_expiration_time_seconds, "43200"); +DEFINE_mString(kerberos_ccache_path, ""); +DEFINE_mString(kerberos_krb5_conf_path, "/etc/krb5.conf"); DEFINE_mString(get_stack_trace_tool, "libunwind"); DEFINE_mString(dwarf_location_info_mode, "FAST"); diff --git a/be/src/common/config.h b/be/src/common/config.h index e9a5d38fe6a..0f7c47f8ec3 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -1099,6 +1099,8 @@ DECLARE_Bool(enable_set_in_bitmap_value); // max number of hdfs file handle in cache DECLARE_Int64(max_hdfs_file_handle_cache_num); +DECLARE_Int32(max_hdfs_file_handle_cache_time_sec); + // max number of meta info of external files, such as parquet footer DECLARE_Int64(max_external_file_meta_cache_num); // Apply delete pred in cumu compaction @@ -1109,10 +1111,10 @@ DECLARE_Int32(rocksdb_max_write_buffer_number); // Allow invalid decimalv2 literal for compatible with old version. Recommend set it false strongly. DECLARE_mBool(allow_invalid_decimalv2_literal); -// the max expiration time of kerberos ticket. -// If a hdfs filesytem with kerberos authentication live longer -// than this time, it will be expired. -DECLARE_mInt64(kerberos_expiration_time_seconds); +// Allow to specify kerberos credentials cache path. +DECLARE_mString(kerberos_ccache_path); +// set krb5.conf path, use "/etc/krb5.conf" by default +DECLARE_mString(kerberos_krb5_conf_path); // Values include `none`, `glog`, `boost`, `glibc`, `libunwind` DECLARE_mString(get_stack_trace_tool); diff --git a/be/src/io/fs/hdfs_file_system.cpp b/be/src/io/fs/hdfs_file_system.cpp index 7e2fda45fe5..8ada4b92acc 100644 --- a/be/src/io/fs/hdfs_file_system.cpp +++ b/be/src/io/fs/hdfs_file_system.cpp @@ -80,8 +80,7 @@ private: HdfsFileSystemCache() = default; uint64 _hdfs_hash_code(const THdfsParams& hdfs_params, const std::string& fs_name); - Status _create_fs(const THdfsParams& hdfs_params, const std::string& fs_name, hdfsFS* fs, - bool* is_kerberos); + Status _create_fs(const THdfsParams& hdfs_params, const std::string& fs_name, hdfsFS* fs); void _clean_invalid(); void _clean_oldest(); }; @@ -105,7 +104,9 @@ public: private: FileHandleCache _cache; - HdfsFileHandleCache() : _cache(config::max_hdfs_file_handle_cache_num, 16, 3600 * 1000L) {} + HdfsFileHandleCache() + : _cache(config::max_hdfs_file_handle_cache_num, 16, + config::max_hdfs_file_handle_cache_time_sec * 1000L) {}; }; Status HdfsFileHandleCache::get_file(const std::shared_ptr<HdfsFileSystem>& fs, const Path& file, @@ -390,10 +391,9 @@ HdfsFileSystemHandle* HdfsFileSystem::get_handle() { int HdfsFileSystemCache::MAX_CACHE_HANDLE = 64; Status HdfsFileSystemCache::_create_fs(const THdfsParams& hdfs_params, const std::string& fs_name, - hdfsFS* fs, bool* is_kerberos) { + hdfsFS* fs) { HDFSCommonBuilder builder; RETURN_IF_ERROR(create_hdfs_builder(hdfs_params, fs_name, &builder)); - *is_kerberos = builder.is_need_kinit(); hdfsFS hdfs_fs = hdfsBuilderConnect(builder.get()); if (hdfs_fs == nullptr) { return Status::IOError("faield to connect to hdfs {}: {}", fs_name, hdfs_error()); @@ -448,20 +448,19 @@ Status HdfsFileSystemCache::get_connection(const THdfsParams& hdfs_params, // not find in cache, or fs handle is invalid // create a new one and try to put it into cache hdfsFS hdfs_fs = nullptr; - bool is_kerberos = false; - RETURN_IF_ERROR(_create_fs(hdfs_params, fs_name, &hdfs_fs, &is_kerberos)); + RETURN_IF_ERROR(_create_fs(hdfs_params, fs_name, &hdfs_fs)); if (_cache.size() >= MAX_CACHE_HANDLE) { _clean_invalid(); _clean_oldest(); } if (_cache.size() < MAX_CACHE_HANDLE) { std::unique_ptr<HdfsFileSystemHandle> handle = - std::make_unique<HdfsFileSystemHandle>(hdfs_fs, true, is_kerberos); + std::make_unique<HdfsFileSystemHandle>(hdfs_fs, true); handle->inc_ref(); *fs_handle = handle.get(); _cache[hash_code] = std::move(handle); } else { - *fs_handle = new HdfsFileSystemHandle(hdfs_fs, false, is_kerberos); + *fs_handle = new HdfsFileSystemHandle(hdfs_fs, false); } } return Status::OK(); diff --git a/be/src/io/fs/hdfs_file_system.h b/be/src/io/fs/hdfs_file_system.h index db87ddfdd96..16a5b39bab8 100644 --- a/be/src/io/fs/hdfs_file_system.h +++ b/be/src/io/fs/hdfs_file_system.h @@ -42,10 +42,9 @@ struct FileInfo; class HdfsFileSystemHandle { public: - HdfsFileSystemHandle(hdfsFS fs, bool cached, bool is_kerberos) + HdfsFileSystemHandle(hdfsFS fs, bool cached) : hdfs_fs(fs), from_cache(cached), - _is_kerberos(is_kerberos), _ref_cnt(0), _create_time(_now()), _last_access_time(0), @@ -75,11 +74,7 @@ public: int ref_cnt() { return _ref_cnt; } - bool invalid() { - return _invalid || - (_is_kerberos && - _now() - _create_time.load() > config::kerberos_expiration_time_seconds * 1000 / 2); - } + bool invalid() { return _invalid; } void set_invalid() { _invalid = true; } @@ -89,7 +84,6 @@ public: const bool from_cache; private: - const bool _is_kerberos; // the number of referenced client std::atomic<int> _ref_cnt; // For kerberos authentication, we need to save create time so that diff --git a/be/src/io/hdfs_builder.cpp b/be/src/io/hdfs_builder.cpp index 41cac611de7..945ef3ab02b 100644 --- a/be/src/io/hdfs_builder.cpp +++ b/be/src/io/hdfs_builder.cpp @@ -45,35 +45,19 @@ Status HDFSCommonBuilder::init_hdfs_builder() { return Status::OK(); } -Status HDFSCommonBuilder::run_kinit() { +Status HDFSCommonBuilder::check_krb_params() { + std::string ticket_path = doris::config::kerberos_ccache_path; + if (!ticket_path.empty()) { + hdfsBuilderConfSetStr(hdfs_builder, "hadoop.security.kerberos.ticket.cache.path", + ticket_path.c_str()); + return Status::OK(); + } + // we should check hdfs_kerberos_principal and hdfs_kerberos_keytab nonnull to login kdc. if (hdfs_kerberos_principal.empty() || hdfs_kerberos_keytab.empty()) { return Status::InvalidArgument("Invalid hdfs_kerberos_principal or hdfs_kerberos_keytab"); } - std::string ticket_path = TICKET_CACHE_PATH + generate_uuid_string(); - const char* krb_home = getenv("KRB_HOME"); - std::string krb_home_str(krb_home ? krb_home : ""); - fmt::memory_buffer kinit_command; - if (krb_home_str.empty()) { - fmt::format_to(kinit_command, "kinit -c {} -R -t {} -k {}", ticket_path, - hdfs_kerberos_keytab, hdfs_kerberos_principal); - } else { - // Assign kerberos home in env, get kinit in kerberos home - fmt::format_to(kinit_command, krb_home_str + "/bin/kinit -c {} -R -t {} -k {}", ticket_path, - hdfs_kerberos_keytab, hdfs_kerberos_principal); - } - VLOG_NOTICE << "kinit command: " << fmt::to_string(kinit_command); - std::string msg; - AgentUtils util; - bool rc = util.exec_cmd(fmt::to_string(kinit_command), &msg); - if (!rc) { - return Status::InternalError("Kinit failed, errMsg: " + msg); - } -#ifdef USE_LIBHDFS3 - hdfsBuilderSetPrincipal(hdfs_builder, hdfs_kerberos_principal.c_str()); -#endif - hdfsBuilderConfSetStr(hdfs_builder, "hadoop.security.kerberos.ticket.cache.path", - ticket_path.c_str()); - LOG(INFO) << "finished to run kinit command: " << fmt::to_string(kinit_command); + // enable auto-renew thread + hdfsBuilderConfSetStr(hdfs_builder, "hadoop.kerberos.keytab.login.autorenewal.enabled", "true"); return Status::OK(); } @@ -114,22 +98,23 @@ Status create_hdfs_builder(const THdfsParams& hdfsParams, const std::string& fs_ RETURN_IF_ERROR(builder->init_hdfs_builder()); hdfsBuilderSetNameNode(builder->get(), fs_name.c_str()); // set kerberos conf + if (hdfsParams.__isset.hdfs_kerberos_keytab) { + builder->kerberos_login = true; + builder->hdfs_kerberos_keytab = hdfsParams.hdfs_kerberos_keytab; +#ifdef USE_HADOOP_HDFS + hdfsBuilderSetKerb5Conf(builder->get(), doris::config::kerberos_krb5_conf_path.c_str()); + hdfsBuilderSetKeyTabFile(builder->get(), hdfsParams.hdfs_kerberos_keytab.c_str()); +#endif + } if (hdfsParams.__isset.hdfs_kerberos_principal) { - builder->need_kinit = true; + builder->kerberos_login = true; builder->hdfs_kerberos_principal = hdfsParams.hdfs_kerberos_principal; - hdfsBuilderSetUserName(builder->get(), hdfsParams.hdfs_kerberos_principal.c_str()); + hdfsBuilderSetPrincipal(builder->get(), hdfsParams.hdfs_kerberos_principal.c_str()); } else if (hdfsParams.__isset.user) { hdfsBuilderSetUserName(builder->get(), hdfsParams.user.c_str()); #ifdef USE_HADOOP_HDFS hdfsBuilderSetKerb5Conf(builder->get(), nullptr); hdfsBuilderSetKeyTabFile(builder->get(), nullptr); -#endif - } - if (hdfsParams.__isset.hdfs_kerberos_keytab) { - builder->need_kinit = true; - builder->hdfs_kerberos_keytab = hdfsParams.hdfs_kerberos_keytab; -#ifdef USE_HADOOP_HDFS - hdfsBuilderSetKeyTabFile(builder->get(), hdfsParams.hdfs_kerberos_keytab.c_str()); #endif } // set other conf @@ -145,13 +130,10 @@ Status create_hdfs_builder(const THdfsParams& hdfsParams, const std::string& fs_ #endif } } - - hdfsBuilderConfSetStr(builder->get(), "ipc.client.fallback-to-simple-auth-allowed", "true"); - - if (builder->is_need_kinit()) { - RETURN_IF_ERROR(builder->run_kinit()); + if (builder->is_kerberos()) { + RETURN_IF_ERROR(builder->check_krb_params()); } - + hdfsBuilderConfSetStr(builder->get(), "ipc.client.fallback-to-simple-auth-allowed", "true"); return Status::OK(); } diff --git a/be/src/io/hdfs_builder.h b/be/src/io/hdfs_builder.h index 159544fc589..125f3bed82b 100644 --- a/be/src/io/hdfs_builder.h +++ b/be/src/io/hdfs_builder.h @@ -56,12 +56,12 @@ public: Status init_hdfs_builder(); hdfsBuilder* get() { return hdfs_builder; } - bool is_need_kinit() const { return need_kinit; } - Status run_kinit(); + bool is_kerberos() const { return kerberos_login; } + Status check_krb_params(); private: hdfsBuilder* hdfs_builder = nullptr; - bool need_kinit {false}; + bool kerberos_login {false}; std::string hdfs_kerberos_keytab; std::string hdfs_kerberos_principal; }; diff --git a/bin/start_be.sh b/bin/start_be.sh index 434c06cfe34..fb1ebfdd9dc 100755 --- a/bin/start_be.sh +++ b/bin/start_be.sh @@ -88,20 +88,6 @@ if [[ "${MAX_FILE_COUNT}" -lt 60000 ]]; then exit 1 fi -# add java libs -preload_jars=("preload-extensions") -preload_jars+=("java-udf") - -for preload_jar_dir in "${preload_jars[@]}"; do - for f in "${DORIS_HOME}/lib/java_extensions/${preload_jar_dir}"/*.jar; do - if [[ -z "${DORIS_CLASSPATH}" ]]; then - export DORIS_CLASSPATH="${f}" - else - export DORIS_CLASSPATH="${DORIS_CLASSPATH}:${f}" - fi - done -done - if [[ -d "${DORIS_HOME}/lib/hadoop_hdfs/" ]]; then # add hadoop libs for f in "${DORIS_HOME}/lib/hadoop_hdfs/common"/*.jar; do @@ -118,6 +104,20 @@ if [[ -d "${DORIS_HOME}/lib/hadoop_hdfs/" ]]; then done fi +# add java libs +preload_jars=("preload-extensions") +preload_jars+=("java-udf") + +for preload_jar_dir in "${preload_jars[@]}"; do + for f in "${DORIS_HOME}/lib/java_extensions/${preload_jar_dir}"/*.jar; do + if [[ -z "${DORIS_CLASSPATH}" ]]; then + export DORIS_CLASSPATH="${f}" + else + export DORIS_CLASSPATH="${DORIS_CLASSPATH}:${f}" + fi + done +done + # add custom_libs to CLASSPATH if [[ -d "${DORIS_HOME}/custom_lib" ]]; then for f in "${DORIS_HOME}/custom_lib"/*.jar; do --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org