This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new df30de085f9 [fix](kerberos) fix BE kerberos ccache renew, optimize 
kerbero options (#29291)
df30de085f9 is described below

commit df30de085f9d97856d96fb2170e0f81f9bdc5bf2
Author: slothever <18522955+w...@users.noreply.github.com>
AuthorDate: Sat Jan 13 00:08:33 2024 +0800

    [fix](kerberos) fix BE kerberos ccache renew, optimize kerbero options 
(#29291)
    
    1. we need  remove BE kinit, and use jni login with keytab, because kinit 
cannot renew TGT for doris in many complex cases.
    > This pull requet will support new instance from keytab: 
https://github.com/apache/doris-thirdparty/pull/173, so now we  won't need 
kinit cmd, just login with keytab and principal
    
    2. add `kerberos_ccache_path` to set kerberos credentials cache path 
manually.
    
    3. add `max_hdfs_file_handle_cache_time_ms` to set hdfs fs handle cache 
time.
---
 be/src/common/config.cpp          |  4 ++-
 be/src/common/config.h            | 10 +++---
 be/src/io/fs/hdfs_file_system.cpp | 17 +++++------
 be/src/io/fs/hdfs_file_system.h   | 10 ++----
 be/src/io/hdfs_builder.cpp        | 64 ++++++++++++++-------------------------
 be/src/io/hdfs_builder.h          |  6 ++--
 bin/start_be.sh                   | 28 ++++++++---------
 7 files changed, 59 insertions(+), 80 deletions(-)

diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index 49eb046e7ae..17ac1905dd6 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -1060,6 +1060,7 @@ DEFINE_Bool(enable_feature_binlog, "false");
 DEFINE_Bool(enable_set_in_bitmap_value, "false");
 
 DEFINE_Int64(max_hdfs_file_handle_cache_num, "20000");
+DEFINE_Int32(max_hdfs_file_handle_cache_time_sec, "3600");
 DEFINE_Int64(max_external_file_meta_cache_num, "20000");
 // Apply delete pred in cumu compaction
 DEFINE_mBool(enable_delete_when_cumu_compaction, "false");
@@ -1068,7 +1069,8 @@ DEFINE_mBool(enable_delete_when_cumu_compaction, "false");
 DEFINE_Int32(rocksdb_max_write_buffer_number, "5");
 
 DEFINE_Bool(allow_invalid_decimalv2_literal, "false");
-DEFINE_mInt64(kerberos_expiration_time_seconds, "43200");
+DEFINE_mString(kerberos_ccache_path, "");
+DEFINE_mString(kerberos_krb5_conf_path, "/etc/krb5.conf");
 
 DEFINE_mString(get_stack_trace_tool, "libunwind");
 DEFINE_mString(dwarf_location_info_mode, "FAST");
diff --git a/be/src/common/config.h b/be/src/common/config.h
index e9a5d38fe6a..0f7c47f8ec3 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -1099,6 +1099,8 @@ DECLARE_Bool(enable_set_in_bitmap_value);
 
 // max number of hdfs file handle in cache
 DECLARE_Int64(max_hdfs_file_handle_cache_num);
+DECLARE_Int32(max_hdfs_file_handle_cache_time_sec);
+
 // max number of meta info of external files, such as parquet footer
 DECLARE_Int64(max_external_file_meta_cache_num);
 // Apply delete pred in cumu compaction
@@ -1109,10 +1111,10 @@ DECLARE_Int32(rocksdb_max_write_buffer_number);
 
 // Allow invalid decimalv2 literal for compatible with old version. Recommend 
set it false strongly.
 DECLARE_mBool(allow_invalid_decimalv2_literal);
-// the max expiration time of kerberos ticket.
-// If a hdfs filesytem with kerberos authentication live longer
-// than this time, it will be expired.
-DECLARE_mInt64(kerberos_expiration_time_seconds);
+// Allow to specify kerberos credentials cache path.
+DECLARE_mString(kerberos_ccache_path);
+// set krb5.conf path, use "/etc/krb5.conf" by default
+DECLARE_mString(kerberos_krb5_conf_path);
 
 // Values include `none`, `glog`, `boost`, `glibc`, `libunwind`
 DECLARE_mString(get_stack_trace_tool);
diff --git a/be/src/io/fs/hdfs_file_system.cpp 
b/be/src/io/fs/hdfs_file_system.cpp
index 7e2fda45fe5..8ada4b92acc 100644
--- a/be/src/io/fs/hdfs_file_system.cpp
+++ b/be/src/io/fs/hdfs_file_system.cpp
@@ -80,8 +80,7 @@ private:
     HdfsFileSystemCache() = default;
 
     uint64 _hdfs_hash_code(const THdfsParams& hdfs_params, const std::string& 
fs_name);
-    Status _create_fs(const THdfsParams& hdfs_params, const std::string& 
fs_name, hdfsFS* fs,
-                      bool* is_kerberos);
+    Status _create_fs(const THdfsParams& hdfs_params, const std::string& 
fs_name, hdfsFS* fs);
     void _clean_invalid();
     void _clean_oldest();
 };
@@ -105,7 +104,9 @@ public:
 
 private:
     FileHandleCache _cache;
-    HdfsFileHandleCache() : _cache(config::max_hdfs_file_handle_cache_num, 16, 
3600 * 1000L) {}
+    HdfsFileHandleCache()
+            : _cache(config::max_hdfs_file_handle_cache_num, 16,
+                     config::max_hdfs_file_handle_cache_time_sec * 1000L) {};
 };
 
 Status HdfsFileHandleCache::get_file(const std::shared_ptr<HdfsFileSystem>& 
fs, const Path& file,
@@ -390,10 +391,9 @@ HdfsFileSystemHandle* HdfsFileSystem::get_handle() {
 int HdfsFileSystemCache::MAX_CACHE_HANDLE = 64;
 
 Status HdfsFileSystemCache::_create_fs(const THdfsParams& hdfs_params, const 
std::string& fs_name,
-                                       hdfsFS* fs, bool* is_kerberos) {
+                                       hdfsFS* fs) {
     HDFSCommonBuilder builder;
     RETURN_IF_ERROR(create_hdfs_builder(hdfs_params, fs_name, &builder));
-    *is_kerberos = builder.is_need_kinit();
     hdfsFS hdfs_fs = hdfsBuilderConnect(builder.get());
     if (hdfs_fs == nullptr) {
         return Status::IOError("faield to connect to hdfs {}: {}", fs_name, 
hdfs_error());
@@ -448,20 +448,19 @@ Status HdfsFileSystemCache::get_connection(const 
THdfsParams& hdfs_params,
         // not find in cache, or fs handle is invalid
         // create a new one and try to put it into cache
         hdfsFS hdfs_fs = nullptr;
-        bool is_kerberos = false;
-        RETURN_IF_ERROR(_create_fs(hdfs_params, fs_name, &hdfs_fs, 
&is_kerberos));
+        RETURN_IF_ERROR(_create_fs(hdfs_params, fs_name, &hdfs_fs));
         if (_cache.size() >= MAX_CACHE_HANDLE) {
             _clean_invalid();
             _clean_oldest();
         }
         if (_cache.size() < MAX_CACHE_HANDLE) {
             std::unique_ptr<HdfsFileSystemHandle> handle =
-                    std::make_unique<HdfsFileSystemHandle>(hdfs_fs, true, 
is_kerberos);
+                    std::make_unique<HdfsFileSystemHandle>(hdfs_fs, true);
             handle->inc_ref();
             *fs_handle = handle.get();
             _cache[hash_code] = std::move(handle);
         } else {
-            *fs_handle = new HdfsFileSystemHandle(hdfs_fs, false, is_kerberos);
+            *fs_handle = new HdfsFileSystemHandle(hdfs_fs, false);
         }
     }
     return Status::OK();
diff --git a/be/src/io/fs/hdfs_file_system.h b/be/src/io/fs/hdfs_file_system.h
index db87ddfdd96..16a5b39bab8 100644
--- a/be/src/io/fs/hdfs_file_system.h
+++ b/be/src/io/fs/hdfs_file_system.h
@@ -42,10 +42,9 @@ struct FileInfo;
 
 class HdfsFileSystemHandle {
 public:
-    HdfsFileSystemHandle(hdfsFS fs, bool cached, bool is_kerberos)
+    HdfsFileSystemHandle(hdfsFS fs, bool cached)
             : hdfs_fs(fs),
               from_cache(cached),
-              _is_kerberos(is_kerberos),
               _ref_cnt(0),
               _create_time(_now()),
               _last_access_time(0),
@@ -75,11 +74,7 @@ public:
 
     int ref_cnt() { return _ref_cnt; }
 
-    bool invalid() {
-        return _invalid ||
-               (_is_kerberos &&
-                _now() - _create_time.load() > 
config::kerberos_expiration_time_seconds * 1000 / 2);
-    }
+    bool invalid() { return _invalid; }
 
     void set_invalid() { _invalid = true; }
 
@@ -89,7 +84,6 @@ public:
     const bool from_cache;
 
 private:
-    const bool _is_kerberos;
     // the number of referenced client
     std::atomic<int> _ref_cnt;
     // For kerberos authentication, we need to save create time so that
diff --git a/be/src/io/hdfs_builder.cpp b/be/src/io/hdfs_builder.cpp
index 41cac611de7..945ef3ab02b 100644
--- a/be/src/io/hdfs_builder.cpp
+++ b/be/src/io/hdfs_builder.cpp
@@ -45,35 +45,19 @@ Status HDFSCommonBuilder::init_hdfs_builder() {
     return Status::OK();
 }
 
-Status HDFSCommonBuilder::run_kinit() {
+Status HDFSCommonBuilder::check_krb_params() {
+    std::string ticket_path = doris::config::kerberos_ccache_path;
+    if (!ticket_path.empty()) {
+        hdfsBuilderConfSetStr(hdfs_builder, 
"hadoop.security.kerberos.ticket.cache.path",
+                              ticket_path.c_str());
+        return Status::OK();
+    }
+    // we should check hdfs_kerberos_principal and hdfs_kerberos_keytab 
nonnull to login kdc.
     if (hdfs_kerberos_principal.empty() || hdfs_kerberos_keytab.empty()) {
         return Status::InvalidArgument("Invalid hdfs_kerberos_principal or 
hdfs_kerberos_keytab");
     }
-    std::string ticket_path = TICKET_CACHE_PATH + generate_uuid_string();
-    const char* krb_home = getenv("KRB_HOME");
-    std::string krb_home_str(krb_home ? krb_home : "");
-    fmt::memory_buffer kinit_command;
-    if (krb_home_str.empty()) {
-        fmt::format_to(kinit_command, "kinit -c {} -R -t {} -k {}", 
ticket_path,
-                       hdfs_kerberos_keytab, hdfs_kerberos_principal);
-    } else {
-        // Assign kerberos home in env, get kinit in kerberos home
-        fmt::format_to(kinit_command, krb_home_str + "/bin/kinit -c {} -R -t 
{} -k {}", ticket_path,
-                       hdfs_kerberos_keytab, hdfs_kerberos_principal);
-    }
-    VLOG_NOTICE << "kinit command: " << fmt::to_string(kinit_command);
-    std::string msg;
-    AgentUtils util;
-    bool rc = util.exec_cmd(fmt::to_string(kinit_command), &msg);
-    if (!rc) {
-        return Status::InternalError("Kinit failed, errMsg: " + msg);
-    }
-#ifdef USE_LIBHDFS3
-    hdfsBuilderSetPrincipal(hdfs_builder, hdfs_kerberos_principal.c_str());
-#endif
-    hdfsBuilderConfSetStr(hdfs_builder, 
"hadoop.security.kerberos.ticket.cache.path",
-                          ticket_path.c_str());
-    LOG(INFO) << "finished to run kinit command: " << 
fmt::to_string(kinit_command);
+    // enable auto-renew thread
+    hdfsBuilderConfSetStr(hdfs_builder, 
"hadoop.kerberos.keytab.login.autorenewal.enabled", "true");
     return Status::OK();
 }
 
@@ -114,22 +98,23 @@ Status create_hdfs_builder(const THdfsParams& hdfsParams, 
const std::string& fs_
     RETURN_IF_ERROR(builder->init_hdfs_builder());
     hdfsBuilderSetNameNode(builder->get(), fs_name.c_str());
     // set kerberos conf
+    if (hdfsParams.__isset.hdfs_kerberos_keytab) {
+        builder->kerberos_login = true;
+        builder->hdfs_kerberos_keytab = hdfsParams.hdfs_kerberos_keytab;
+#ifdef USE_HADOOP_HDFS
+        hdfsBuilderSetKerb5Conf(builder->get(), 
doris::config::kerberos_krb5_conf_path.c_str());
+        hdfsBuilderSetKeyTabFile(builder->get(), 
hdfsParams.hdfs_kerberos_keytab.c_str());
+#endif
+    }
     if (hdfsParams.__isset.hdfs_kerberos_principal) {
-        builder->need_kinit = true;
+        builder->kerberos_login = true;
         builder->hdfs_kerberos_principal = hdfsParams.hdfs_kerberos_principal;
-        hdfsBuilderSetUserName(builder->get(), 
hdfsParams.hdfs_kerberos_principal.c_str());
+        hdfsBuilderSetPrincipal(builder->get(), 
hdfsParams.hdfs_kerberos_principal.c_str());
     } else if (hdfsParams.__isset.user) {
         hdfsBuilderSetUserName(builder->get(), hdfsParams.user.c_str());
 #ifdef USE_HADOOP_HDFS
         hdfsBuilderSetKerb5Conf(builder->get(), nullptr);
         hdfsBuilderSetKeyTabFile(builder->get(), nullptr);
-#endif
-    }
-    if (hdfsParams.__isset.hdfs_kerberos_keytab) {
-        builder->need_kinit = true;
-        builder->hdfs_kerberos_keytab = hdfsParams.hdfs_kerberos_keytab;
-#ifdef USE_HADOOP_HDFS
-        hdfsBuilderSetKeyTabFile(builder->get(), 
hdfsParams.hdfs_kerberos_keytab.c_str());
 #endif
     }
     // set other conf
@@ -145,13 +130,10 @@ Status create_hdfs_builder(const THdfsParams& hdfsParams, 
const std::string& fs_
 #endif
         }
     }
-
-    hdfsBuilderConfSetStr(builder->get(), 
"ipc.client.fallback-to-simple-auth-allowed", "true");
-
-    if (builder->is_need_kinit()) {
-        RETURN_IF_ERROR(builder->run_kinit());
+    if (builder->is_kerberos()) {
+        RETURN_IF_ERROR(builder->check_krb_params());
     }
-
+    hdfsBuilderConfSetStr(builder->get(), 
"ipc.client.fallback-to-simple-auth-allowed", "true");
     return Status::OK();
 }
 
diff --git a/be/src/io/hdfs_builder.h b/be/src/io/hdfs_builder.h
index 159544fc589..125f3bed82b 100644
--- a/be/src/io/hdfs_builder.h
+++ b/be/src/io/hdfs_builder.h
@@ -56,12 +56,12 @@ public:
     Status init_hdfs_builder();
 
     hdfsBuilder* get() { return hdfs_builder; }
-    bool is_need_kinit() const { return need_kinit; }
-    Status run_kinit();
+    bool is_kerberos() const { return kerberos_login; }
+    Status check_krb_params();
 
 private:
     hdfsBuilder* hdfs_builder = nullptr;
-    bool need_kinit {false};
+    bool kerberos_login {false};
     std::string hdfs_kerberos_keytab;
     std::string hdfs_kerberos_principal;
 };
diff --git a/bin/start_be.sh b/bin/start_be.sh
index 434c06cfe34..fb1ebfdd9dc 100755
--- a/bin/start_be.sh
+++ b/bin/start_be.sh
@@ -88,20 +88,6 @@ if [[ "${MAX_FILE_COUNT}" -lt 60000 ]]; then
     exit 1
 fi
 
-# add java libs
-preload_jars=("preload-extensions")
-preload_jars+=("java-udf")
-
-for preload_jar_dir in "${preload_jars[@]}"; do
-    for f in "${DORIS_HOME}/lib/java_extensions/${preload_jar_dir}"/*.jar; do
-        if [[ -z "${DORIS_CLASSPATH}" ]]; then
-            export DORIS_CLASSPATH="${f}"
-        else
-            export DORIS_CLASSPATH="${DORIS_CLASSPATH}:${f}"
-        fi
-    done
-done
-
 if [[ -d "${DORIS_HOME}/lib/hadoop_hdfs/" ]]; then
     # add hadoop libs
     for f in "${DORIS_HOME}/lib/hadoop_hdfs/common"/*.jar; do
@@ -118,6 +104,20 @@ if [[ -d "${DORIS_HOME}/lib/hadoop_hdfs/" ]]; then
     done
 fi
 
+# add java libs
+preload_jars=("preload-extensions")
+preload_jars+=("java-udf")
+
+for preload_jar_dir in "${preload_jars[@]}"; do
+    for f in "${DORIS_HOME}/lib/java_extensions/${preload_jar_dir}"/*.jar; do
+        if [[ -z "${DORIS_CLASSPATH}" ]]; then
+            export DORIS_CLASSPATH="${f}"
+        else
+            export DORIS_CLASSPATH="${DORIS_CLASSPATH}:${f}"
+        fi
+    done
+done
+
 # add custom_libs to CLASSPATH
 if [[ -d "${DORIS_HOME}/custom_lib" ]]; then
     for f in "${DORIS_HOME}/custom_lib"/*.jar; do


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to