This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch branch-1.2-lts in repository https://gitbox.apache.org/repos/asf/doris.git
commit 8d17e5a50cc99a44e2a26f2cf59fe86cbbc5e471 Author: zhangstar333 <87313068+zhangstar...@users.noreply.github.com> AuthorDate: Tue Jul 4 20:35:35 2023 +0800 [enchanment](udf) add more info when download jar package failed (#21440) when download jar package, some times show the checksum is not equal, but the root reason is unknown, now add some error msg if failed. --- be/src/runtime/user_function_cache.cpp | 40 +++++++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 8 deletions(-) diff --git a/be/src/runtime/user_function_cache.cpp b/be/src/runtime/user_function_cache.cpp index 693a4672de..7c0dacc824 100644 --- a/be/src/runtime/user_function_cache.cpp +++ b/be/src/runtime/user_function_cache.cpp @@ -18,6 +18,7 @@ #include "runtime/user_function_cache.h" #include <atomic> +#include <cstdint> #include <regex> #include <vector> @@ -49,6 +50,15 @@ struct UserFunctionCacheEntry { // If unref() returns true, this object should be delete bool unref() { return _refs.fetch_sub(1) == 1; } + std::string debug_string() { + fmt::memory_buffer error_msg; + fmt::format_to(error_msg, + " the info of UserFunctionCacheEntry save in BE, function_id:{}, " + "checksum:{}, lib_file:{}, is_downloaded:{}. ", + function_id, checksum, lib_file, is_downloaded); + return fmt::to_string(error_msg); + } + int64_t function_id = 0; // used to check if this library is valid. std::string checksum; @@ -136,12 +146,17 @@ Status UserFunctionCache::_load_entry_from_lib(const std::string& dir, const std } else if (ends_with(file, ".jar")) { lib_type = LibType::JAR; } else { - return Status::InternalError("unknown library file format: " + file); + return Status::InternalError( + "unknown library file format. the file type is not end with xxx.jar or xxx.so : " + + file); } std::vector<std::string> split_parts = strings::Split(file, "."); if (split_parts.size() != 3) { - return Status::InternalError("user function's name should be function_id.checksum.so"); + return Status::InternalError( + "user function's name should be function_id.checksum[.file_name].file_type, now " + "the all split parts are by delimiter(.): " + + file); } int64_t function_id = std::stol(split_parts[0]); std::string checksum = split_parts[1]; @@ -149,7 +164,7 @@ Status UserFunctionCache::_load_entry_from_lib(const std::string& dir, const std if (it != _entry_map.end()) { LOG(WARNING) << "meet a same function id user function library, function_id=" << function_id << ", one_checksum=" << checksum - << ", other_checksum=" << it->second->checksum; + << ", other_checksum info: = " << it->second->debug_string(); return Status::InternalError("duplicate function id"); } // create a cache entry and put it into entry map @@ -268,7 +283,7 @@ Status UserFunctionCache::_get_cache_entry(int64_t fid, const std::string& url, } auto st = _load_cache_entry(url, entry); if (!st.ok()) { - LOG(WARNING) << "fail to load cache entry, fid=" << fid; + LOG(WARNING) << "fail to load cache entry, fid=" << fid << " " << file_name << " " << url; // if we load a cache entry failed, I think we should delete this entry cache // even if this cache was valid before. _destroy_cache_entry(entry); @@ -332,10 +347,13 @@ Status UserFunctionCache::_download_lib(const std::string& url, UserFunctionCach Md5Digest digest; HttpClient client; + int64_t file_size = 0; RETURN_IF_ERROR(client.init(real_url)); Status status; - auto download_cb = [&status, &tmp_file, &fp, &digest](const void* data, size_t length) { + auto download_cb = [&status, &tmp_file, &fp, &digest, &file_size](const void* data, + size_t length) { digest.update(data, length); + file_size = file_size + length; auto res = fwrite(data, length, 1, fp.get()); if (res != 1) { LOG(WARNING) << "fail to write data to file, file=" << tmp_file @@ -349,9 +367,15 @@ Status UserFunctionCache::_download_lib(const std::string& url, UserFunctionCach RETURN_IF_ERROR(status); digest.digest(); if (!iequal(digest.hex(), entry->checksum)) { - LOG(WARNING) << "UDF's checksum is not equal, one=" << digest.hex() - << ", other=" << entry->checksum; - return Status::InternalError("UDF's library checksum is not match"); + fmt::memory_buffer error_msg; + fmt::format_to( + error_msg, + " The checksum is not equal of {} ({}). The init info of first create entry is:" + "{} But download file check_sum is: {}, file_size is: {}.", + url, real_url, entry->debug_string(), digest.hex(), file_size); + std::string error(fmt::to_string(error_msg)); + LOG(WARNING) << error; + return Status::InternalError(error); } // close this file fp.reset(); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org