This is an automated email from the ASF dual-hosted git repository. dataroaring pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 585662b37af [improvement](http) Support to acquire md5 digest of the file to download (#35807) 585662b37af is described below commit 585662b37af9c77ee0caf6bdf4bec7c81d44366e Author: walter <w41te...@gmail.com> AuthorDate: Tue Jun 4 10:05:00 2024 +0800 [improvement](http) Support to acquire md5 digest of the file to download (#35807) --- be/src/http/http_client.cpp | 46 ++++++++++++++++++- be/src/http/http_client.h | 5 +- be/src/http/utils.cpp | 37 +++++++++------ be/src/http/utils.h | 3 +- be/test/http/http_client_test.cpp | 96 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 169 insertions(+), 18 deletions(-) diff --git a/be/src/http/http_client.cpp b/be/src/http/http_client.cpp index d7a6c9c9665..b8ef9834341 100644 --- a/be/src/http/http_client.cpp +++ b/be/src/http/http_client.cpp @@ -24,12 +24,36 @@ #include <ostream> #include "common/config.h" +#include "http/http_headers.h" #include "http/http_status.h" #include "util/stack_util.h" namespace doris { -HttpClient::HttpClient() {} +static const char* header_error_msg(CURLHcode code) { + switch (code) { + case CURLHE_OK: + return "OK"; + case CURLHE_BADINDEX: + return "header exists but not with this index "; + case CURLHE_MISSING: + return "no such header exists"; + case CURLHE_NOHEADERS: + return "no headers at all exist (yet)"; + case CURLHE_NOREQUEST: + return "no request with this number was used"; + case CURLHE_OUT_OF_MEMORY: + return "out of memory while processing"; + case CURLHE_BAD_ARGUMENT: + return "a function argument was not okay"; + case CURLHE_NOT_BUILT_IN: + return "curl_easy_header() was disabled in the build"; + default: + return "unknown"; + } +} + +HttpClient::HttpClient() = default; HttpClient::~HttpClient() { if (_curl != nullptr) { @@ -88,7 +112,7 @@ Status HttpClient::init(const std::string& url) { } curl_write_callback callback = [](char* buffer, size_t size, size_t nmemb, void* param) { - HttpClient* client = (HttpClient*)param; + auto* client = (HttpClient*)param; return client->on_response_data(buffer, size * nmemb); }; @@ -177,6 +201,24 @@ Status HttpClient::execute(const std::function<bool(const void* data, size_t len return Status::OK(); } +Status HttpClient::get_content_md5(std::string* md5) const { + struct curl_header* header_ptr; + auto code = curl_easy_header(_curl, HttpHeaders::CONTENT_MD5, 0, CURLH_HEADER, 0, &header_ptr); + if (code == CURLHE_MISSING || code == CURLHE_NOHEADERS) { + // no such headers exists + md5->clear(); + return Status::OK(); + } else if (code != CURLHE_OK) { + auto msg = fmt::format("failed to get http header {}: {} ({})", HttpHeaders::CONTENT_MD5, + header_error_msg(code), code); + LOG(WARNING) << msg << ", trace=" << get_stack_trace(); + return Status::HttpError(std::move(msg)); + } + + *md5 = header_ptr->value; + return Status::OK(); +} + Status HttpClient::download(const std::string& local_path) { // set method to GET set_method(GET); diff --git a/be/src/http/http_client.h b/be/src/http/http_client.h index d80f484ce80..e379895a73e 100644 --- a/be/src/http/http_client.h +++ b/be/src/http/http_client.h @@ -106,7 +106,7 @@ public: if (cl < 0) { return Status::InternalError( fmt::format("failed to get content length, it should be a positive value, " - "actrual is : {}", + "actual is : {}", cl)); } *length = (uint64_t)cl; @@ -115,6 +115,9 @@ public: return Status::InternalError("failed to get content length. err code: {}", code); } + // Get the value of the header CONTENT-MD5. The output is empty if no such header exists. + Status get_content_md5(std::string* md5) const; + long get_http_status() const { long code; curl_easy_getinfo(_curl, CURLINFO_RESPONSE_CODE, &code); diff --git a/be/src/http/utils.cpp b/be/src/http/utils.cpp index 49f9d2c4993..b03017c12a7 100644 --- a/be/src/http/utils.cpp +++ b/be/src/http/utils.cpp @@ -22,8 +22,6 @@ #include <sys/stat.h> #include <unistd.h> -#include <algorithm> -#include <memory> #include <ostream> #include <vector> @@ -41,6 +39,7 @@ #include "io/fs/local_file_system.h" #include "olap/wal/wal_manager.h" #include "runtime/exec_env.h" +#include "util/md5.h" #include "util/path_util.h" #include "util/url_coding.h" @@ -56,7 +55,7 @@ std::string encode_basic_auth(const std::string& user, const std::string& passwd bool parse_basic_auth(const HttpRequest& req, std::string* user, std::string* passwd) { const char k_basic[] = "Basic "; - auto& auth = req.header(HttpHeaders::AUTHORIZATION); + const auto& auth = req.header(HttpHeaders::AUTHORIZATION); if (auth.compare(0, sizeof(k_basic) - 1, k_basic, sizeof(k_basic) - 1) != 0) { return false; } @@ -76,8 +75,8 @@ bool parse_basic_auth(const HttpRequest& req, std::string* user, std::string* pa } bool parse_basic_auth(const HttpRequest& req, AuthInfo* auth) { - auto& token = req.header("token"); - auto& auth_code = req.header(HTTP_AUTH_CODE); + const auto& token = req.header("token"); + const auto& auth_code = req.header(HTTP_AUTH_CODE); if (!token.empty()) { auth->token = token; } else if (!auth_code.empty()) { @@ -111,25 +110,24 @@ std::string get_content_type(const std::string& file_name) { std::string file_ext = path_util::file_extension(file_name); VLOG_TRACE << "file_name: " << file_name << "; file extension: [" << file_ext << "]"; if (file_ext == std::string(".html") || file_ext == std::string(".htm")) { - return std::string("text/html; charset=utf-8"); + return "text/html; charset=utf-8"; } else if (file_ext == std::string(".js")) { - return std::string("application/javascript; charset=utf-8"); + return "application/javascript; charset=utf-8"; } else if (file_ext == std::string(".css")) { - return std::string("text/css; charset=utf-8"); + return "text/css; charset=utf-8"; } else if (file_ext == std::string(".txt")) { - return std::string("text/plain; charset=utf-8"); + return "text/plain; charset=utf-8"; } else if (file_ext == std::string(".png")) { - return std::string("image/png"); + return "image/png"; } else if (file_ext == std::string(".ico")) { - return std::string("image/x-icon"); + return "image/x-icon"; } else { return "text/plain; charset=utf-8"; } - return ""; } void do_file_response(const std::string& file_path, HttpRequest* req, - bufferevent_rate_limit_group* rate_limit_group) { + bufferevent_rate_limit_group* rate_limit_group, bool is_acquire_md5) { if (file_path.find("..") != std::string::npos) { LOG(WARNING) << "Not allowed to read relative path: " << file_path; HttpChannel::send_error(req, HttpStatus::FORBIDDEN); @@ -163,6 +161,17 @@ void do_file_response(const std::string& file_path, HttpRequest* req, req->add_output_header(HttpHeaders::CONTENT_TYPE, get_content_type(file_path).c_str()); + if (is_acquire_md5) { + Md5Digest md5; + + void* buf = mmap(nullptr, file_size, PROT_READ, MAP_SHARED, fd, 0); + md5.update(buf, file_size); + md5.digest(); + munmap(buf, file_size); + + req->add_output_header(HttpHeaders::CONTENT_MD5, md5.hex().c_str()); + } + if (req->method() == HttpMethod::HEAD) { close(fd); req->add_output_header(HttpHeaders::CONTENT_LENGTH, std::to_string(file_size).c_str()); @@ -194,7 +203,7 @@ void do_dir_response(const std::string& dir_path, HttpRequest* req) { } bool load_size_smaller_than_wal_limit(int64_t content_length) { - // 1. req->header(HttpHeaders::CONTENT_LENGTH) will return streamload content length. If it is empty or equels to 0, it means this streamload + // 1. req->header(HttpHeaders::CONTENT_LENGTH) will return streamload content length. If it is empty or equals to 0, it means this streamload // is a chunked streamload and we are not sure its size. // 2. if streamload content length is too large, like larger than 80% of the WAL constrain. // diff --git a/be/src/http/utils.h b/be/src/http/utils.h index 254d59cf13d..20be6c0fcd7 100644 --- a/be/src/http/utils.h +++ b/be/src/http/utils.h @@ -37,7 +37,8 @@ bool parse_basic_auth(const HttpRequest& req, std::string* user, std::string* pa bool parse_basic_auth(const HttpRequest& req, AuthInfo* auth); void do_file_response(const std::string& dir_path, HttpRequest* req, - bufferevent_rate_limit_group* rate_limit_group = nullptr); + bufferevent_rate_limit_group* rate_limit_group = nullptr, + bool is_acquire_md5 = false); void do_dir_response(const std::string& dir_path, HttpRequest* req); diff --git a/be/test/http/http_client_test.cpp b/be/test/http/http_client_test.cpp index 729a709fb93..c157f1a13c0 100644 --- a/be/test/http/http_client_test.cpp +++ b/be/test/http/http_client_test.cpp @@ -17,8 +17,11 @@ #include "http/http_client.h" +#include <fcntl.h> #include <gtest/gtest-message.h> #include <gtest/gtest-test-part.h> +#include <sys/mman.h> +#include <sys/stat.h> #include <unistd.h> #include <boost/algorithm/string/predicate.hpp> @@ -30,6 +33,7 @@ #include "http/http_headers.h" #include "http/http_request.h" #include "http/utils.h" +#include "util/md5.h" namespace doris { @@ -43,8 +47,15 @@ public: return; } req->add_output_header(HttpHeaders::CONTENT_TYPE, "text/plain; version=0.0.4"); + bool is_acquire_md5 = !req->param("acquire_md5").empty(); if (req->method() == HttpMethod::HEAD) { req->add_output_header(HttpHeaders::CONTENT_LENGTH, std::to_string(5).c_str()); + if (is_acquire_md5) { + Md5Digest md5; + md5.update("md5sum", 6); + md5.digest(); + req->add_output_header(HttpHeaders::CONTENT_MD5, md5.hex().c_str()); + } HttpChannel::send_reply(req); } else { std::string response = "test1"; @@ -80,6 +91,13 @@ public: } }; +class HttpDownloadFileHandler : public HttpHandler { +public: + void handle(HttpRequest* req) override { + do_file_response("/proc/self/exe", req, nullptr, true); + } +}; + static EvHttpServer* s_server = nullptr; static int real_port = 0; static std::string hostname = ""; @@ -87,6 +105,7 @@ static std::string hostname = ""; static HttpClientTestSimpleGetHandler s_simple_get_handler; static HttpClientTestSimplePostHandler s_simple_post_handler; static HttpNotFoundHandler s_not_found_handler; +static HttpDownloadFileHandler s_download_file_handler; class HttpClientTest : public testing::Test { public: @@ -99,6 +118,7 @@ public: s_server->register_handler(HEAD, "/simple_get", &s_simple_get_handler); s_server->register_handler(POST, "/simple_post", &s_simple_post_handler); s_server->register_handler(GET, "/not_found", &s_not_found_handler); + s_server->register_handler(HEAD, "/download_file", &s_download_file_handler); static_cast<void>(s_server->start()); real_port = s_server->get_real_port(); EXPECT_NE(0, real_port); @@ -203,4 +223,80 @@ TEST_F(HttpClientTest, not_found) { EXPECT_FALSE(status.ok()); } +TEST_F(HttpClientTest, header_content_md5) { + std::string url = hostname + "/simple_get"; + + { + // without md5 + HttpClient client; + auto st = client.init(url); + EXPECT_TRUE(st.ok()); + client.set_method(HEAD); + client.set_basic_auth("test1", ""); + st = client.execute(); + EXPECT_TRUE(st.ok()); + uint64_t len = 0; + st = client.get_content_length(&len); + EXPECT_TRUE(st.ok()); + EXPECT_EQ(5, len); + std::string md5; + st = client.get_content_md5(&md5); + EXPECT_TRUE(st.ok()); + EXPECT_TRUE(md5.empty()); + } + + { + // with md5 + HttpClient client; + auto st = client.init(url + "?acquire_md5=true"); + EXPECT_TRUE(st.ok()); + client.set_method(HEAD); + client.set_basic_auth("test1", ""); + st = client.execute(); + EXPECT_TRUE(st.ok()); + uint64_t len = 0; + st = client.get_content_length(&len); + EXPECT_TRUE(st.ok()); + EXPECT_EQ(5, len); + std::string md5_value; + st = client.get_content_md5(&md5_value); + EXPECT_TRUE(st.ok()); + + Md5Digest md5; + md5.update("md5sum", 6); + md5.digest(); + EXPECT_EQ(md5_value, md5.hex()); + } +} + +TEST_F(HttpClientTest, download_file_md5) { + std::string url = hostname + "/download_file"; + HttpClient client; + auto st = client.init(url); + EXPECT_TRUE(st.ok()); + client.set_method(HEAD); + client.set_basic_auth("test1", ""); + st = client.execute(); + EXPECT_TRUE(st.ok()); + + std::string md5_value; + st = client.get_content_md5(&md5_value); + EXPECT_TRUE(st.ok()); + + int fd = open("/proc/self/exe", O_RDONLY); + ASSERT_TRUE(fd >= 0); + struct stat stat; + ASSERT_TRUE(fstat(fd, &stat) >= 0); + + int64_t file_size = stat.st_size; + Md5Digest md5; + void* buf = mmap(nullptr, file_size, PROT_READ, MAP_SHARED, fd, 0); + md5.update(buf, file_size); + md5.digest(); + munmap(buf, file_size); + + EXPECT_EQ(md5_value, md5.hex()); + close(fd); +} + } // namespace doris --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org