This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.1 by this push:
     new 9b1e935d641 branch-3.1: [opt](storage) Add log and metric when 
aws/azure sdk do retry operation #51485 (#52025)
9b1e935d641 is described below

commit 9b1e935d641df9f1c54c2fdafeb64af5b37f7e43
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Fri Jun 20 17:15:13 2025 +0800

    branch-3.1: [opt](storage) Add log and metric when aws/azure sdk do retry 
operation #51485 (#52025)
    
    Cherry-picked from #51485
    
    Co-authored-by: Lei Zhang <zhang...@selectdb.com>
---
 be/src/common/config.cpp                        | 13 ++++++++-
 be/src/common/config.h                          |  7 +++++
 be/src/util/s3_util.cpp                         | 36 ++++++++++++++++++++++-
 be/test/io/fs/azure_obj_storage_client_test.cpp | 18 +++++++-----
 cloud/src/common/config.h                       | 12 +++++++-
 cloud/src/recycler/s3_accessor.cpp              | 31 ++++++++++++++++++--
 common/cpp/aws_logger.h                         |  2 +-
 common/cpp/obj_retry_strategy.cpp               | 38 ++++++++++++++++---------
 common/cpp/obj_retry_strategy.h                 |  8 ++----
 conf/be.conf                                    |  9 +++++-
 10 files changed, 141 insertions(+), 33 deletions(-)

diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index 04e04aa4b4b..4ce28d89f0e 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -839,7 +839,18 @@ DEFINE_mInt32(zone_map_row_num_threshold, "20");
 //    Info = 4,
 //    Debug = 5,
 //    Trace = 6
-DEFINE_Int32(aws_log_level, "2");
+DEFINE_Int32(aws_log_level, "3");
+DEFINE_Validator(aws_log_level,
+                 [](const int config) -> bool { return config >= 0 && config 
<= 6; });
+
+// azure sdk log level
+//    Verbose = 1,
+//    Informational = 2,
+//    Warning = 3,
+//    Error = 4
+DEFINE_Int32(azure_log_level, "3");
+DEFINE_Validator(azure_log_level,
+                 [](const int config) -> bool { return config >= 1 && config 
<= 4; });
 
 // the buffer size when read data from remote storage like s3
 DEFINE_mInt32(remote_storage_read_buffer_mb, "16");
diff --git a/be/src/common/config.h b/be/src/common/config.h
index 51a275a55b8..fc4377254f8 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -885,6 +885,13 @@ DECLARE_mInt32(zone_map_row_num_threshold);
 //    Trace = 6
 DECLARE_Int32(aws_log_level);
 
+// azure sdk log level
+//    Verbose = 1,
+//    Informational = 2,
+//    Warning = 3,
+//    Error = 4
+DECLARE_Int32(azure_log_level);
+
 // the buffer size when read data from remote storage like s3
 DECLARE_mInt32(remote_storage_read_buffer_mb);
 
diff --git a/be/src/util/s3_util.cpp b/be/src/util/s3_util.cpp
index ce8d9c2d6c2..f6505583337 100644
--- a/be/src/util/s3_util.cpp
+++ b/be/src/util/s3_util.cpp
@@ -32,6 +32,7 @@
 
 #include <atomic>
 #ifdef USE_AZURE
+#include <azure/core/diagnostics/logger.hpp>
 #include <azure/storage/blobs/blob_container_client.hpp>
 #endif
 #include <cstdlib>
@@ -150,6 +151,33 @@ S3ClientFactory::S3ClientFactory() {
                     config::s3_put_token_per_second, 
config::s3_put_bucket_tokens,
                     config::s3_put_token_limit,
                     metric_func_factory(put_rate_limit_ns, 
put_rate_limit_exceed_req_num))};
+
+#ifdef USE_AZURE
+    auto azureLogLevel =
+            
static_cast<Azure::Core::Diagnostics::Logger::Level>(config::azure_log_level);
+    Azure::Core::Diagnostics::Logger::SetLevel(azureLogLevel);
+    Azure::Core::Diagnostics::Logger::SetListener(
+            [&](Azure::Core::Diagnostics::Logger::Level level, const 
std::string& message) {
+                switch (level) {
+                case Azure::Core::Diagnostics::Logger::Level::Verbose:
+                    LOG(INFO) << message;
+                    break;
+                case Azure::Core::Diagnostics::Logger::Level::Informational:
+                    LOG(INFO) << message;
+                    break;
+                case Azure::Core::Diagnostics::Logger::Level::Warning:
+                    LOG(WARNING) << message;
+                    break;
+                case Azure::Core::Diagnostics::Logger::Level::Error:
+                    LOG(ERROR) << message;
+                    break;
+                default:
+                    LOG(WARNING) << "Unknown level: " << 
static_cast<int>(level)
+                                 << ", message: " << message;
+                    break;
+                }
+            });
+#endif
 }
 
 S3ClientFactory::~S3ClientFactory() {
@@ -204,7 +232,13 @@ std::shared_ptr<io::ObjStorageClient> 
S3ClientFactory::_create_azure_client(
         }
     }
 
-    auto containerClient = 
std::make_shared<Azure::Storage::Blobs::BlobContainerClient>(uri, cred);
+    Azure::Storage::Blobs::BlobClientOptions options;
+    
options.Retry.StatusCodes.insert(Azure::Core::Http::HttpStatusCode::TooManyRequests);
+    options.Retry.MaxRetries = config::max_s3_client_retry;
+    
options.PerRetryPolicies.emplace_back(std::make_unique<AzureRetryRecordPolicy>());
+
+    auto containerClient = 
std::make_shared<Azure::Storage::Blobs::BlobContainerClient>(
+            uri, cred, std::move(options));
     LOG_INFO("create one azure client with {}", s3_conf.to_string());
     return 
std::make_shared<io::AzureObjStorageClient>(std::move(containerClient));
 #else
diff --git a/be/test/io/fs/azure_obj_storage_client_test.cpp 
b/be/test/io/fs/azure_obj_storage_client_test.cpp
index 1297e7c75f7..4ef1770128c 100644
--- a/be/test/io/fs/azure_obj_storage_client_test.cpp
+++ b/be/test/io/fs/azure_obj_storage_client_test.cpp
@@ -21,6 +21,7 @@
 
 #include "io/fs/file_system.h"
 #include "io/fs/obj_storage_client.h"
+#include "util/s3_util.h"
 
 #ifdef USE_AZURE
 #include <azure/storage/blobs.hpp>
@@ -49,13 +50,16 @@ protected:
         std::string accountKey = std::getenv("AZURE_ACCOUNT_KEY");
         std::string containerName = std::getenv("AZURE_CONTAINER_NAME");
 
-        auto cred = 
std::make_shared<Azure::Storage::StorageSharedKeyCredential>(accountName,
-                                                                               
  accountKey);
-        const std::string uri =
-                fmt::format("https://{}.blob.core.windows.net/{}";, 
accountName, containerName);
-        auto containerClient = std::make_shared<BlobContainerClient>(uri, 
cred);
-        AzureObjStorageClientTest::obj_storage_client =
-                
std::make_shared<io::AzureObjStorageClient>(std::move(containerClient));
+        // Initialize Azure SDK
+        [[maybe_unused]] auto& s3ClientFactory = S3ClientFactory::instance();
+
+        AzureObjStorageClientTest::obj_storage_client = 
S3ClientFactory::instance().create(
+                {.endpoint = fmt::format("https://{}.blob.core.windows.net";, 
accountName),
+                 .region = "dummy-region",
+                 .ak = accountName,
+                 .sk = accountKey,
+                 .bucket = containerName,
+                 .provider = io::ObjStorageType::AZURE});
     }
 
     void SetUp() override {
diff --git a/cloud/src/common/config.h b/cloud/src/common/config.h
index 857b4292c35..84d889a059e 100644
--- a/cloud/src/common/config.h
+++ b/cloud/src/common/config.h
@@ -285,7 +285,17 @@ CONF_Strings(recycler_storage_vault_white_list, "");
 //    Info = 4,
 //    Debug = 5,
 //    Trace = 6
-CONF_Int32(aws_log_level, "2");
+CONF_Int32(aws_log_level, "3");
+CONF_Validator(aws_log_level, [](const int config) -> bool { return config >= 
0 && config <= 6; });
+
+// azure sdk log level
+//    Verbose = 1,
+//    Informational = 2,
+//    Warning = 3,
+//    Error = 4
+CONF_Int32(azure_log_level, "3");
+CONF_Validator(azure_log_level,
+               [](const int config) -> bool { return config >= 1 && config <= 
4; });
 
 // ca_cert_file is in this path by default, Normally no modification is 
required
 // ca cert default path is different from different OS
diff --git a/cloud/src/recycler/s3_accessor.cpp 
b/cloud/src/recycler/s3_accessor.cpp
index ada9a4a1e8c..3c36f5a01bb 100644
--- a/cloud/src/recycler/s3_accessor.cpp
+++ b/cloud/src/recycler/s3_accessor.cpp
@@ -29,6 +29,7 @@
 
 #include <algorithm>
 #ifdef USE_AZURE
+#include <azure/core/diagnostics/logger.hpp>
 #include <azure/storage/blobs/blob_container_client.hpp>
 #include <azure/storage/common/storage_credential.hpp>
 #endif
@@ -120,6 +121,33 @@ public:
             return std::make_shared<DorisAWSLogger>(logLevel);
         };
         Aws::InitAPI(aws_options_);
+
+#ifdef USE_AZURE
+        auto azureLogLevel =
+                
static_cast<Azure::Core::Diagnostics::Logger::Level>(config::azure_log_level);
+        Azure::Core::Diagnostics::Logger::SetLevel(azureLogLevel);
+        Azure::Core::Diagnostics::Logger::SetListener(
+                [&](Azure::Core::Diagnostics::Logger::Level level, const 
std::string& message) {
+                    switch (level) {
+                    case Azure::Core::Diagnostics::Logger::Level::Verbose:
+                        LOG(INFO) << message;
+                        break;
+                    case 
Azure::Core::Diagnostics::Logger::Level::Informational:
+                        LOG(INFO) << message;
+                        break;
+                    case Azure::Core::Diagnostics::Logger::Level::Warning:
+                        LOG(WARNING) << message;
+                        break;
+                    case Azure::Core::Diagnostics::Logger::Level::Error:
+                        LOG(ERROR) << message;
+                        break;
+                    default:
+                        LOG(WARNING) << "Unknown level: " << 
static_cast<int>(level)
+                                     << ", message: " << message;
+                        break;
+                    }
+                });
+#endif
     }
 
     ~S3Environment() { Aws::ShutdownAPI(aws_options_); }
@@ -308,8 +336,7 @@ int S3Accessor::init() {
         // Within the RetryPolicy, the nextPolicy is called multiple times 
inside a loop.
         // All policies in the PerRetryPolicies are downstream of the 
RetryPolicy.
         // Therefore, you only need to add a policy to check if the response 
code is 429 and if the retry count meets the condition, it can record the retry 
count.
-        options.PerRetryPolicies.emplace_back(
-                
std::make_unique<AzureRetryRecordPolicy>(config::max_s3_client_retry));
+        
options.PerRetryPolicies.emplace_back(std::make_unique<AzureRetryRecordPolicy>());
         auto container_client = 
std::make_shared<Azure::Storage::Blobs::BlobContainerClient>(
                 uri_, cred, std::move(options));
         // uri format for debug: 
${scheme}://${ak}.blob.core.windows.net/${bucket}/${prefix}
diff --git a/common/cpp/aws_logger.h b/common/cpp/aws_logger.h
index ca607cab056..85734d13e14 100644
--- a/common/cpp/aws_logger.h
+++ b/common/cpp/aws_logger.h
@@ -19,7 +19,7 @@
 
 #include <aws/core/utils/logging/LogLevel.h>
 #include <aws/core/utils/logging/LogSystemInterface.h>
-#include <glog/logging.h> // IWYU pragma: export
+#include <glog/logging.h>
 
 class DorisAWSLogger final : public Aws::Utils::Logging::LogSystemInterface {
 public:
diff --git a/common/cpp/obj_retry_strategy.cpp 
b/common/cpp/obj_retry_strategy.cpp
index 0226d1af28f..6da4c23980a 100644
--- a/common/cpp/obj_retry_strategy.cpp
+++ b/common/cpp/obj_retry_strategy.cpp
@@ -19,10 +19,11 @@
 
 #include <aws/core/http/HttpResponse.h>
 #include <bvar/reducer.h>
+#include <glog/logging.h>
 
 namespace doris {
 
-bvar::Adder<int64_t> 
s3_too_many_request_retry_cnt("s3_too_many_request_retry_cnt");
+bvar::Adder<int64_t> object_request_retry_count("object_request_retry_count");
 
 S3CustomRetryStrategy::S3CustomRetryStrategy(int maxRetries) : 
DefaultRetryStrategy(maxRetries) {}
 
@@ -35,33 +36,42 @@ bool S3CustomRetryStrategy::ShouldRetry(const 
Aws::Client::AWSError<Aws::Client:
     }
 
     if (Aws::Http::IsRetryableHttpResponseCode(error.GetResponseCode()) || 
error.ShouldRetry()) {
-        s3_too_many_request_retry_cnt << 1;
+        object_request_retry_count << 1;
+        LOG(INFO) << "retry due to error: " << error << ", attempt: " << 
attemptedRetries + 1 << "/"
+                  << m_maxRetries;
         return true;
     }
 
     return false;
 }
 #ifdef USE_AZURE
-AzureRetryRecordPolicy::AzureRetryRecordPolicy(int retry_cnt) : 
retry_cnt(retry_cnt) {}
-
-AzureRetryRecordPolicy::~AzureRetryRecordPolicy() = default;
 
 std::unique_ptr<Azure::Core::Http::RawResponse> AzureRetryRecordPolicy::Send(
         Azure::Core::Http::Request& request, 
Azure::Core::Http::Policies::NextHttpPolicy nextPolicy,
         Azure::Core::Context const& context) const {
-    auto resp = nextPolicy.Send(request, context);
-    if (retry_cnt != 0 &&
-        resp->GetStatusCode() == 
Azure::Core::Http::HttpStatusCode::TooManyRequests) {
-        retry_cnt--;
-        s3_too_many_request_retry_cnt << 1;
+    // 
https://learn.microsoft.com/en-us/azure/developer/cpp/sdk/fundamentals/http-pipelines-and-retries
+
+    std::unique_ptr<Azure::Core::Http::RawResponse> response = 
nextPolicy.Send(request, context);
+    int32_t retry_count =
+            
Azure::Core::Http::Policies::_internal::RetryPolicy::GetRetryCount(context);
+
+    if (static_cast<int>(response->GetStatusCode()) > 299 ||
+        static_cast<int>(response->GetStatusCode()) < 200) {
+        if (retry_count > 0) {
+            object_request_retry_count << 1;
+        }
+
+        // If the response is not successful, we log the retry attempt and 
status code.
+        LOG(INFO) << "azure retry retry_count: " << retry_count
+                  << ", status code: " << 
static_cast<int>(response->GetStatusCode())
+                  << ", reason: " << response->GetReasonPhrase();
     }
-    return resp;
+
+    return response;
 }
 
 std::unique_ptr<AzureRetryRecordPolicy::HttpPolicy> 
AzureRetryRecordPolicy::Clone() const {
-    auto ret = std::make_unique<AzureRetryRecordPolicy>(*this);
-    ret->retry_cnt = 0;
-    return ret;
+    return std::make_unique<AzureRetryRecordPolicy>(*this);
 }
 #endif
 } // namespace doris
\ No newline at end of file
diff --git a/common/cpp/obj_retry_strategy.h b/common/cpp/obj_retry_strategy.h
index b081ca91a22..dd98f871716 100644
--- a/common/cpp/obj_retry_strategy.h
+++ b/common/cpp/obj_retry_strategy.h
@@ -37,16 +37,14 @@ public:
 #ifdef USE_AZURE
 class AzureRetryRecordPolicy final : public 
Azure::Core::Http::Policies::HttpPolicy {
 public:
-    AzureRetryRecordPolicy(int retry_cnt);
-    ~AzureRetryRecordPolicy() override;
+    AzureRetryRecordPolicy() = default;
+    ~AzureRetryRecordPolicy() override = default;
+
     std::unique_ptr<HttpPolicy> Clone() const override;
     std::unique_ptr<Azure::Core::Http::RawResponse> Send(
             Azure::Core::Http::Request& request,
             Azure::Core::Http::Policies::NextHttpPolicy nextPolicy,
             Azure::Core::Context const& context) const override;
-
-private:
-    mutable int retry_cnt;
 };
 #endif
 } // namespace doris
\ No newline at end of file
diff --git a/conf/be.conf b/conf/be.conf
index 961b41b51fd..28207c16962 100644
--- a/conf/be.conf
+++ b/conf/be.conf
@@ -88,6 +88,13 @@ sys_log_level = INFO
 #    Debug = 5,
 #    Trace = 6
 # Default to turn off aws sdk log, because aws sdk errors that need to be 
cared will be output through Doris logs
-aws_log_level=2
+aws_log_level = 3
+
+# azure sdk log level
+#    Verbose = 1,
+#    Informational = 2,
+#    Warning = 3,
+#    Error = 4
+azure_log_level = 3
 ## If you are not running in aws cloud, you can disable EC2 metadata
 AWS_EC2_METADATA_DISABLED=true


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to