This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 585662b37af [improvement](http) Support to acquire md5 digest of the 
file to download (#35807)
585662b37af is described below

commit 585662b37af9c77ee0caf6bdf4bec7c81d44366e
Author: walter <w41te...@gmail.com>
AuthorDate: Tue Jun 4 10:05:00 2024 +0800

    [improvement](http) Support to acquire md5 digest of the file to download 
(#35807)
---
 be/src/http/http_client.cpp       | 46 ++++++++++++++++++-
 be/src/http/http_client.h         |  5 +-
 be/src/http/utils.cpp             | 37 +++++++++------
 be/src/http/utils.h               |  3 +-
 be/test/http/http_client_test.cpp | 96 +++++++++++++++++++++++++++++++++++++++
 5 files changed, 169 insertions(+), 18 deletions(-)

diff --git a/be/src/http/http_client.cpp b/be/src/http/http_client.cpp
index d7a6c9c9665..b8ef9834341 100644
--- a/be/src/http/http_client.cpp
+++ b/be/src/http/http_client.cpp
@@ -24,12 +24,36 @@
 #include <ostream>
 
 #include "common/config.h"
+#include "http/http_headers.h"
 #include "http/http_status.h"
 #include "util/stack_util.h"
 
 namespace doris {
 
-HttpClient::HttpClient() {}
+static const char* header_error_msg(CURLHcode code) {
+    switch (code) {
+    case CURLHE_OK:
+        return "OK";
+    case CURLHE_BADINDEX:
+        return "header exists but not with this index ";
+    case CURLHE_MISSING:
+        return "no such header exists";
+    case CURLHE_NOHEADERS:
+        return "no headers at all exist (yet)";
+    case CURLHE_NOREQUEST:
+        return "no request with this number was used";
+    case CURLHE_OUT_OF_MEMORY:
+        return "out of memory while processing";
+    case CURLHE_BAD_ARGUMENT:
+        return "a function argument was not okay";
+    case CURLHE_NOT_BUILT_IN:
+        return "curl_easy_header() was disabled in the build";
+    default:
+        return "unknown";
+    }
+}
+
+HttpClient::HttpClient() = default;
 
 HttpClient::~HttpClient() {
     if (_curl != nullptr) {
@@ -88,7 +112,7 @@ Status HttpClient::init(const std::string& url) {
     }
 
     curl_write_callback callback = [](char* buffer, size_t size, size_t nmemb, 
void* param) {
-        HttpClient* client = (HttpClient*)param;
+        auto* client = (HttpClient*)param;
         return client->on_response_data(buffer, size * nmemb);
     };
 
@@ -177,6 +201,24 @@ Status HttpClient::execute(const std::function<bool(const 
void* data, size_t len
     return Status::OK();
 }
 
+Status HttpClient::get_content_md5(std::string* md5) const {
+    struct curl_header* header_ptr;
+    auto code = curl_easy_header(_curl, HttpHeaders::CONTENT_MD5, 0, 
CURLH_HEADER, 0, &header_ptr);
+    if (code == CURLHE_MISSING || code == CURLHE_NOHEADERS) {
+        // no such headers exists
+        md5->clear();
+        return Status::OK();
+    } else if (code != CURLHE_OK) {
+        auto msg = fmt::format("failed to get http header {}: {} ({})", 
HttpHeaders::CONTENT_MD5,
+                               header_error_msg(code), code);
+        LOG(WARNING) << msg << ", trace=" << get_stack_trace();
+        return Status::HttpError(std::move(msg));
+    }
+
+    *md5 = header_ptr->value;
+    return Status::OK();
+}
+
 Status HttpClient::download(const std::string& local_path) {
     // set method to GET
     set_method(GET);
diff --git a/be/src/http/http_client.h b/be/src/http/http_client.h
index d80f484ce80..e379895a73e 100644
--- a/be/src/http/http_client.h
+++ b/be/src/http/http_client.h
@@ -106,7 +106,7 @@ public:
             if (cl < 0) {
                 return Status::InternalError(
                         fmt::format("failed to get content length, it should 
be a positive value, "
-                                    "actrual is : {}",
+                                    "actual is : {}",
                                     cl));
             }
             *length = (uint64_t)cl;
@@ -115,6 +115,9 @@ public:
         return Status::InternalError("failed to get content length. err code: 
{}", code);
     }
 
+    // Get the value of the header CONTENT-MD5. The output is empty if no such 
header exists.
+    Status get_content_md5(std::string* md5) const;
+
     long get_http_status() const {
         long code;
         curl_easy_getinfo(_curl, CURLINFO_RESPONSE_CODE, &code);
diff --git a/be/src/http/utils.cpp b/be/src/http/utils.cpp
index 49f9d2c4993..b03017c12a7 100644
--- a/be/src/http/utils.cpp
+++ b/be/src/http/utils.cpp
@@ -22,8 +22,6 @@
 #include <sys/stat.h>
 #include <unistd.h>
 
-#include <algorithm>
-#include <memory>
 #include <ostream>
 #include <vector>
 
@@ -41,6 +39,7 @@
 #include "io/fs/local_file_system.h"
 #include "olap/wal/wal_manager.h"
 #include "runtime/exec_env.h"
+#include "util/md5.h"
 #include "util/path_util.h"
 #include "util/url_coding.h"
 
@@ -56,7 +55,7 @@ std::string encode_basic_auth(const std::string& user, const 
std::string& passwd
 
 bool parse_basic_auth(const HttpRequest& req, std::string* user, std::string* 
passwd) {
     const char k_basic[] = "Basic ";
-    auto& auth = req.header(HttpHeaders::AUTHORIZATION);
+    const auto& auth = req.header(HttpHeaders::AUTHORIZATION);
     if (auth.compare(0, sizeof(k_basic) - 1, k_basic, sizeof(k_basic) - 1) != 
0) {
         return false;
     }
@@ -76,8 +75,8 @@ bool parse_basic_auth(const HttpRequest& req, std::string* 
user, std::string* pa
 }
 
 bool parse_basic_auth(const HttpRequest& req, AuthInfo* auth) {
-    auto& token = req.header("token");
-    auto& auth_code = req.header(HTTP_AUTH_CODE);
+    const auto& token = req.header("token");
+    const auto& auth_code = req.header(HTTP_AUTH_CODE);
     if (!token.empty()) {
         auth->token = token;
     } else if (!auth_code.empty()) {
@@ -111,25 +110,24 @@ std::string get_content_type(const std::string& 
file_name) {
     std::string file_ext = path_util::file_extension(file_name);
     VLOG_TRACE << "file_name: " << file_name << "; file extension: [" << 
file_ext << "]";
     if (file_ext == std::string(".html") || file_ext == std::string(".htm")) {
-        return std::string("text/html; charset=utf-8");
+        return "text/html; charset=utf-8";
     } else if (file_ext == std::string(".js")) {
-        return std::string("application/javascript; charset=utf-8");
+        return "application/javascript; charset=utf-8";
     } else if (file_ext == std::string(".css")) {
-        return std::string("text/css; charset=utf-8");
+        return "text/css; charset=utf-8";
     } else if (file_ext == std::string(".txt")) {
-        return std::string("text/plain; charset=utf-8");
+        return "text/plain; charset=utf-8";
     } else if (file_ext == std::string(".png")) {
-        return std::string("image/png");
+        return "image/png";
     } else if (file_ext == std::string(".ico")) {
-        return std::string("image/x-icon");
+        return "image/x-icon";
     } else {
         return "text/plain; charset=utf-8";
     }
-    return "";
 }
 
 void do_file_response(const std::string& file_path, HttpRequest* req,
-                      bufferevent_rate_limit_group* rate_limit_group) {
+                      bufferevent_rate_limit_group* rate_limit_group, bool 
is_acquire_md5) {
     if (file_path.find("..") != std::string::npos) {
         LOG(WARNING) << "Not allowed to read relative path: " << file_path;
         HttpChannel::send_error(req, HttpStatus::FORBIDDEN);
@@ -163,6 +161,17 @@ void do_file_response(const std::string& file_path, 
HttpRequest* req,
 
     req->add_output_header(HttpHeaders::CONTENT_TYPE, 
get_content_type(file_path).c_str());
 
+    if (is_acquire_md5) {
+        Md5Digest md5;
+
+        void* buf = mmap(nullptr, file_size, PROT_READ, MAP_SHARED, fd, 0);
+        md5.update(buf, file_size);
+        md5.digest();
+        munmap(buf, file_size);
+
+        req->add_output_header(HttpHeaders::CONTENT_MD5, md5.hex().c_str());
+    }
+
     if (req->method() == HttpMethod::HEAD) {
         close(fd);
         req->add_output_header(HttpHeaders::CONTENT_LENGTH, 
std::to_string(file_size).c_str());
@@ -194,7 +203,7 @@ void do_dir_response(const std::string& dir_path, 
HttpRequest* req) {
 }
 
 bool load_size_smaller_than_wal_limit(int64_t content_length) {
-    // 1. req->header(HttpHeaders::CONTENT_LENGTH) will return streamload 
content length. If it is empty or equels to 0, it means this streamload
+    // 1. req->header(HttpHeaders::CONTENT_LENGTH) will return streamload 
content length. If it is empty or equals to 0, it means this streamload
     // is a chunked streamload and we are not sure its size.
     // 2. if streamload content length is too large, like larger than 80% of 
the WAL constrain.
     //
diff --git a/be/src/http/utils.h b/be/src/http/utils.h
index 254d59cf13d..20be6c0fcd7 100644
--- a/be/src/http/utils.h
+++ b/be/src/http/utils.h
@@ -37,7 +37,8 @@ bool parse_basic_auth(const HttpRequest& req, std::string* 
user, std::string* pa
 bool parse_basic_auth(const HttpRequest& req, AuthInfo* auth);
 
 void do_file_response(const std::string& dir_path, HttpRequest* req,
-                      bufferevent_rate_limit_group* rate_limit_group = 
nullptr);
+                      bufferevent_rate_limit_group* rate_limit_group = nullptr,
+                      bool is_acquire_md5 = false);
 
 void do_dir_response(const std::string& dir_path, HttpRequest* req);
 
diff --git a/be/test/http/http_client_test.cpp 
b/be/test/http/http_client_test.cpp
index 729a709fb93..c157f1a13c0 100644
--- a/be/test/http/http_client_test.cpp
+++ b/be/test/http/http_client_test.cpp
@@ -17,8 +17,11 @@
 
 #include "http/http_client.h"
 
+#include <fcntl.h>
 #include <gtest/gtest-message.h>
 #include <gtest/gtest-test-part.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
 #include <unistd.h>
 
 #include <boost/algorithm/string/predicate.hpp>
@@ -30,6 +33,7 @@
 #include "http/http_headers.h"
 #include "http/http_request.h"
 #include "http/utils.h"
+#include "util/md5.h"
 
 namespace doris {
 
@@ -43,8 +47,15 @@ public:
             return;
         }
         req->add_output_header(HttpHeaders::CONTENT_TYPE, "text/plain; 
version=0.0.4");
+        bool is_acquire_md5 = !req->param("acquire_md5").empty();
         if (req->method() == HttpMethod::HEAD) {
             req->add_output_header(HttpHeaders::CONTENT_LENGTH, 
std::to_string(5).c_str());
+            if (is_acquire_md5) {
+                Md5Digest md5;
+                md5.update("md5sum", 6);
+                md5.digest();
+                req->add_output_header(HttpHeaders::CONTENT_MD5, 
md5.hex().c_str());
+            }
             HttpChannel::send_reply(req);
         } else {
             std::string response = "test1";
@@ -80,6 +91,13 @@ public:
     }
 };
 
+class HttpDownloadFileHandler : public HttpHandler {
+public:
+    void handle(HttpRequest* req) override {
+        do_file_response("/proc/self/exe", req, nullptr, true);
+    }
+};
+
 static EvHttpServer* s_server = nullptr;
 static int real_port = 0;
 static std::string hostname = "";
@@ -87,6 +105,7 @@ static std::string hostname = "";
 static HttpClientTestSimpleGetHandler s_simple_get_handler;
 static HttpClientTestSimplePostHandler s_simple_post_handler;
 static HttpNotFoundHandler s_not_found_handler;
+static HttpDownloadFileHandler s_download_file_handler;
 
 class HttpClientTest : public testing::Test {
 public:
@@ -99,6 +118,7 @@ public:
         s_server->register_handler(HEAD, "/simple_get", &s_simple_get_handler);
         s_server->register_handler(POST, "/simple_post", 
&s_simple_post_handler);
         s_server->register_handler(GET, "/not_found", &s_not_found_handler);
+        s_server->register_handler(HEAD, "/download_file", 
&s_download_file_handler);
         static_cast<void>(s_server->start());
         real_port = s_server->get_real_port();
         EXPECT_NE(0, real_port);
@@ -203,4 +223,80 @@ TEST_F(HttpClientTest, not_found) {
     EXPECT_FALSE(status.ok());
 }
 
+TEST_F(HttpClientTest, header_content_md5) {
+    std::string url = hostname + "/simple_get";
+
+    {
+        // without md5
+        HttpClient client;
+        auto st = client.init(url);
+        EXPECT_TRUE(st.ok());
+        client.set_method(HEAD);
+        client.set_basic_auth("test1", "");
+        st = client.execute();
+        EXPECT_TRUE(st.ok());
+        uint64_t len = 0;
+        st = client.get_content_length(&len);
+        EXPECT_TRUE(st.ok());
+        EXPECT_EQ(5, len);
+        std::string md5;
+        st = client.get_content_md5(&md5);
+        EXPECT_TRUE(st.ok());
+        EXPECT_TRUE(md5.empty());
+    }
+
+    {
+        // with md5
+        HttpClient client;
+        auto st = client.init(url + "?acquire_md5=true");
+        EXPECT_TRUE(st.ok());
+        client.set_method(HEAD);
+        client.set_basic_auth("test1", "");
+        st = client.execute();
+        EXPECT_TRUE(st.ok());
+        uint64_t len = 0;
+        st = client.get_content_length(&len);
+        EXPECT_TRUE(st.ok());
+        EXPECT_EQ(5, len);
+        std::string md5_value;
+        st = client.get_content_md5(&md5_value);
+        EXPECT_TRUE(st.ok());
+
+        Md5Digest md5;
+        md5.update("md5sum", 6);
+        md5.digest();
+        EXPECT_EQ(md5_value, md5.hex());
+    }
+}
+
+TEST_F(HttpClientTest, download_file_md5) {
+    std::string url = hostname + "/download_file";
+    HttpClient client;
+    auto st = client.init(url);
+    EXPECT_TRUE(st.ok());
+    client.set_method(HEAD);
+    client.set_basic_auth("test1", "");
+    st = client.execute();
+    EXPECT_TRUE(st.ok());
+
+    std::string md5_value;
+    st = client.get_content_md5(&md5_value);
+    EXPECT_TRUE(st.ok());
+
+    int fd = open("/proc/self/exe", O_RDONLY);
+    ASSERT_TRUE(fd >= 0);
+    struct stat stat;
+    ASSERT_TRUE(fstat(fd, &stat) >= 0);
+
+    int64_t file_size = stat.st_size;
+    Md5Digest md5;
+    void* buf = mmap(nullptr, file_size, PROT_READ, MAP_SHARED, fd, 0);
+    md5.update(buf, file_size);
+    md5.digest();
+    munmap(buf, file_size);
+
+    EXPECT_EQ(md5_value, md5.hex());
+    close(fd);
+}
+
 } // namespace doris


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to