This is an automated email from the ASF dual-hosted git repository. morrysnow pushed a commit to branch branch-3.1 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.1 by this push: new 9b1e935d641 branch-3.1: [opt](storage) Add log and metric when aws/azure sdk do retry operation #51485 (#52025) 9b1e935d641 is described below commit 9b1e935d641df9f1c54c2fdafeb64af5b37f7e43 Author: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> AuthorDate: Fri Jun 20 17:15:13 2025 +0800 branch-3.1: [opt](storage) Add log and metric when aws/azure sdk do retry operation #51485 (#52025) Cherry-picked from #51485 Co-authored-by: Lei Zhang <zhang...@selectdb.com> --- be/src/common/config.cpp | 13 ++++++++- be/src/common/config.h | 7 +++++ be/src/util/s3_util.cpp | 36 ++++++++++++++++++++++- be/test/io/fs/azure_obj_storage_client_test.cpp | 18 +++++++----- cloud/src/common/config.h | 12 +++++++- cloud/src/recycler/s3_accessor.cpp | 31 ++++++++++++++++++-- common/cpp/aws_logger.h | 2 +- common/cpp/obj_retry_strategy.cpp | 38 ++++++++++++++++--------- common/cpp/obj_retry_strategy.h | 8 ++---- conf/be.conf | 9 +++++- 10 files changed, 141 insertions(+), 33 deletions(-) diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp index 04e04aa4b4b..4ce28d89f0e 100644 --- a/be/src/common/config.cpp +++ b/be/src/common/config.cpp @@ -839,7 +839,18 @@ DEFINE_mInt32(zone_map_row_num_threshold, "20"); // Info = 4, // Debug = 5, // Trace = 6 -DEFINE_Int32(aws_log_level, "2"); +DEFINE_Int32(aws_log_level, "3"); +DEFINE_Validator(aws_log_level, + [](const int config) -> bool { return config >= 0 && config <= 6; }); + +// azure sdk log level +// Verbose = 1, +// Informational = 2, +// Warning = 3, +// Error = 4 +DEFINE_Int32(azure_log_level, "3"); +DEFINE_Validator(azure_log_level, + [](const int config) -> bool { return config >= 1 && config <= 4; }); // the buffer size when read data from remote storage like s3 DEFINE_mInt32(remote_storage_read_buffer_mb, "16"); diff --git a/be/src/common/config.h b/be/src/common/config.h index 51a275a55b8..fc4377254f8 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -885,6 +885,13 @@ DECLARE_mInt32(zone_map_row_num_threshold); // Trace = 6 DECLARE_Int32(aws_log_level); +// azure sdk log level +// Verbose = 1, +// Informational = 2, +// Warning = 3, +// Error = 4 +DECLARE_Int32(azure_log_level); + // the buffer size when read data from remote storage like s3 DECLARE_mInt32(remote_storage_read_buffer_mb); diff --git a/be/src/util/s3_util.cpp b/be/src/util/s3_util.cpp index ce8d9c2d6c2..f6505583337 100644 --- a/be/src/util/s3_util.cpp +++ b/be/src/util/s3_util.cpp @@ -32,6 +32,7 @@ #include <atomic> #ifdef USE_AZURE +#include <azure/core/diagnostics/logger.hpp> #include <azure/storage/blobs/blob_container_client.hpp> #endif #include <cstdlib> @@ -150,6 +151,33 @@ S3ClientFactory::S3ClientFactory() { config::s3_put_token_per_second, config::s3_put_bucket_tokens, config::s3_put_token_limit, metric_func_factory(put_rate_limit_ns, put_rate_limit_exceed_req_num))}; + +#ifdef USE_AZURE + auto azureLogLevel = + static_cast<Azure::Core::Diagnostics::Logger::Level>(config::azure_log_level); + Azure::Core::Diagnostics::Logger::SetLevel(azureLogLevel); + Azure::Core::Diagnostics::Logger::SetListener( + [&](Azure::Core::Diagnostics::Logger::Level level, const std::string& message) { + switch (level) { + case Azure::Core::Diagnostics::Logger::Level::Verbose: + LOG(INFO) << message; + break; + case Azure::Core::Diagnostics::Logger::Level::Informational: + LOG(INFO) << message; + break; + case Azure::Core::Diagnostics::Logger::Level::Warning: + LOG(WARNING) << message; + break; + case Azure::Core::Diagnostics::Logger::Level::Error: + LOG(ERROR) << message; + break; + default: + LOG(WARNING) << "Unknown level: " << static_cast<int>(level) + << ", message: " << message; + break; + } + }); +#endif } S3ClientFactory::~S3ClientFactory() { @@ -204,7 +232,13 @@ std::shared_ptr<io::ObjStorageClient> S3ClientFactory::_create_azure_client( } } - auto containerClient = std::make_shared<Azure::Storage::Blobs::BlobContainerClient>(uri, cred); + Azure::Storage::Blobs::BlobClientOptions options; + options.Retry.StatusCodes.insert(Azure::Core::Http::HttpStatusCode::TooManyRequests); + options.Retry.MaxRetries = config::max_s3_client_retry; + options.PerRetryPolicies.emplace_back(std::make_unique<AzureRetryRecordPolicy>()); + + auto containerClient = std::make_shared<Azure::Storage::Blobs::BlobContainerClient>( + uri, cred, std::move(options)); LOG_INFO("create one azure client with {}", s3_conf.to_string()); return std::make_shared<io::AzureObjStorageClient>(std::move(containerClient)); #else diff --git a/be/test/io/fs/azure_obj_storage_client_test.cpp b/be/test/io/fs/azure_obj_storage_client_test.cpp index 1297e7c75f7..4ef1770128c 100644 --- a/be/test/io/fs/azure_obj_storage_client_test.cpp +++ b/be/test/io/fs/azure_obj_storage_client_test.cpp @@ -21,6 +21,7 @@ #include "io/fs/file_system.h" #include "io/fs/obj_storage_client.h" +#include "util/s3_util.h" #ifdef USE_AZURE #include <azure/storage/blobs.hpp> @@ -49,13 +50,16 @@ protected: std::string accountKey = std::getenv("AZURE_ACCOUNT_KEY"); std::string containerName = std::getenv("AZURE_CONTAINER_NAME"); - auto cred = std::make_shared<Azure::Storage::StorageSharedKeyCredential>(accountName, - accountKey); - const std::string uri = - fmt::format("https://{}.blob.core.windows.net/{}", accountName, containerName); - auto containerClient = std::make_shared<BlobContainerClient>(uri, cred); - AzureObjStorageClientTest::obj_storage_client = - std::make_shared<io::AzureObjStorageClient>(std::move(containerClient)); + // Initialize Azure SDK + [[maybe_unused]] auto& s3ClientFactory = S3ClientFactory::instance(); + + AzureObjStorageClientTest::obj_storage_client = S3ClientFactory::instance().create( + {.endpoint = fmt::format("https://{}.blob.core.windows.net", accountName), + .region = "dummy-region", + .ak = accountName, + .sk = accountKey, + .bucket = containerName, + .provider = io::ObjStorageType::AZURE}); } void SetUp() override { diff --git a/cloud/src/common/config.h b/cloud/src/common/config.h index 857b4292c35..84d889a059e 100644 --- a/cloud/src/common/config.h +++ b/cloud/src/common/config.h @@ -285,7 +285,17 @@ CONF_Strings(recycler_storage_vault_white_list, ""); // Info = 4, // Debug = 5, // Trace = 6 -CONF_Int32(aws_log_level, "2"); +CONF_Int32(aws_log_level, "3"); +CONF_Validator(aws_log_level, [](const int config) -> bool { return config >= 0 && config <= 6; }); + +// azure sdk log level +// Verbose = 1, +// Informational = 2, +// Warning = 3, +// Error = 4 +CONF_Int32(azure_log_level, "3"); +CONF_Validator(azure_log_level, + [](const int config) -> bool { return config >= 1 && config <= 4; }); // ca_cert_file is in this path by default, Normally no modification is required // ca cert default path is different from different OS diff --git a/cloud/src/recycler/s3_accessor.cpp b/cloud/src/recycler/s3_accessor.cpp index ada9a4a1e8c..3c36f5a01bb 100644 --- a/cloud/src/recycler/s3_accessor.cpp +++ b/cloud/src/recycler/s3_accessor.cpp @@ -29,6 +29,7 @@ #include <algorithm> #ifdef USE_AZURE +#include <azure/core/diagnostics/logger.hpp> #include <azure/storage/blobs/blob_container_client.hpp> #include <azure/storage/common/storage_credential.hpp> #endif @@ -120,6 +121,33 @@ public: return std::make_shared<DorisAWSLogger>(logLevel); }; Aws::InitAPI(aws_options_); + +#ifdef USE_AZURE + auto azureLogLevel = + static_cast<Azure::Core::Diagnostics::Logger::Level>(config::azure_log_level); + Azure::Core::Diagnostics::Logger::SetLevel(azureLogLevel); + Azure::Core::Diagnostics::Logger::SetListener( + [&](Azure::Core::Diagnostics::Logger::Level level, const std::string& message) { + switch (level) { + case Azure::Core::Diagnostics::Logger::Level::Verbose: + LOG(INFO) << message; + break; + case Azure::Core::Diagnostics::Logger::Level::Informational: + LOG(INFO) << message; + break; + case Azure::Core::Diagnostics::Logger::Level::Warning: + LOG(WARNING) << message; + break; + case Azure::Core::Diagnostics::Logger::Level::Error: + LOG(ERROR) << message; + break; + default: + LOG(WARNING) << "Unknown level: " << static_cast<int>(level) + << ", message: " << message; + break; + } + }); +#endif } ~S3Environment() { Aws::ShutdownAPI(aws_options_); } @@ -308,8 +336,7 @@ int S3Accessor::init() { // Within the RetryPolicy, the nextPolicy is called multiple times inside a loop. // All policies in the PerRetryPolicies are downstream of the RetryPolicy. // Therefore, you only need to add a policy to check if the response code is 429 and if the retry count meets the condition, it can record the retry count. - options.PerRetryPolicies.emplace_back( - std::make_unique<AzureRetryRecordPolicy>(config::max_s3_client_retry)); + options.PerRetryPolicies.emplace_back(std::make_unique<AzureRetryRecordPolicy>()); auto container_client = std::make_shared<Azure::Storage::Blobs::BlobContainerClient>( uri_, cred, std::move(options)); // uri format for debug: ${scheme}://${ak}.blob.core.windows.net/${bucket}/${prefix} diff --git a/common/cpp/aws_logger.h b/common/cpp/aws_logger.h index ca607cab056..85734d13e14 100644 --- a/common/cpp/aws_logger.h +++ b/common/cpp/aws_logger.h @@ -19,7 +19,7 @@ #include <aws/core/utils/logging/LogLevel.h> #include <aws/core/utils/logging/LogSystemInterface.h> -#include <glog/logging.h> // IWYU pragma: export +#include <glog/logging.h> class DorisAWSLogger final : public Aws::Utils::Logging::LogSystemInterface { public: diff --git a/common/cpp/obj_retry_strategy.cpp b/common/cpp/obj_retry_strategy.cpp index 0226d1af28f..6da4c23980a 100644 --- a/common/cpp/obj_retry_strategy.cpp +++ b/common/cpp/obj_retry_strategy.cpp @@ -19,10 +19,11 @@ #include <aws/core/http/HttpResponse.h> #include <bvar/reducer.h> +#include <glog/logging.h> namespace doris { -bvar::Adder<int64_t> s3_too_many_request_retry_cnt("s3_too_many_request_retry_cnt"); +bvar::Adder<int64_t> object_request_retry_count("object_request_retry_count"); S3CustomRetryStrategy::S3CustomRetryStrategy(int maxRetries) : DefaultRetryStrategy(maxRetries) {} @@ -35,33 +36,42 @@ bool S3CustomRetryStrategy::ShouldRetry(const Aws::Client::AWSError<Aws::Client: } if (Aws::Http::IsRetryableHttpResponseCode(error.GetResponseCode()) || error.ShouldRetry()) { - s3_too_many_request_retry_cnt << 1; + object_request_retry_count << 1; + LOG(INFO) << "retry due to error: " << error << ", attempt: " << attemptedRetries + 1 << "/" + << m_maxRetries; return true; } return false; } #ifdef USE_AZURE -AzureRetryRecordPolicy::AzureRetryRecordPolicy(int retry_cnt) : retry_cnt(retry_cnt) {} - -AzureRetryRecordPolicy::~AzureRetryRecordPolicy() = default; std::unique_ptr<Azure::Core::Http::RawResponse> AzureRetryRecordPolicy::Send( Azure::Core::Http::Request& request, Azure::Core::Http::Policies::NextHttpPolicy nextPolicy, Azure::Core::Context const& context) const { - auto resp = nextPolicy.Send(request, context); - if (retry_cnt != 0 && - resp->GetStatusCode() == Azure::Core::Http::HttpStatusCode::TooManyRequests) { - retry_cnt--; - s3_too_many_request_retry_cnt << 1; + // https://learn.microsoft.com/en-us/azure/developer/cpp/sdk/fundamentals/http-pipelines-and-retries + + std::unique_ptr<Azure::Core::Http::RawResponse> response = nextPolicy.Send(request, context); + int32_t retry_count = + Azure::Core::Http::Policies::_internal::RetryPolicy::GetRetryCount(context); + + if (static_cast<int>(response->GetStatusCode()) > 299 || + static_cast<int>(response->GetStatusCode()) < 200) { + if (retry_count > 0) { + object_request_retry_count << 1; + } + + // If the response is not successful, we log the retry attempt and status code. + LOG(INFO) << "azure retry retry_count: " << retry_count + << ", status code: " << static_cast<int>(response->GetStatusCode()) + << ", reason: " << response->GetReasonPhrase(); } - return resp; + + return response; } std::unique_ptr<AzureRetryRecordPolicy::HttpPolicy> AzureRetryRecordPolicy::Clone() const { - auto ret = std::make_unique<AzureRetryRecordPolicy>(*this); - ret->retry_cnt = 0; - return ret; + return std::make_unique<AzureRetryRecordPolicy>(*this); } #endif } // namespace doris \ No newline at end of file diff --git a/common/cpp/obj_retry_strategy.h b/common/cpp/obj_retry_strategy.h index b081ca91a22..dd98f871716 100644 --- a/common/cpp/obj_retry_strategy.h +++ b/common/cpp/obj_retry_strategy.h @@ -37,16 +37,14 @@ public: #ifdef USE_AZURE class AzureRetryRecordPolicy final : public Azure::Core::Http::Policies::HttpPolicy { public: - AzureRetryRecordPolicy(int retry_cnt); - ~AzureRetryRecordPolicy() override; + AzureRetryRecordPolicy() = default; + ~AzureRetryRecordPolicy() override = default; + std::unique_ptr<HttpPolicy> Clone() const override; std::unique_ptr<Azure::Core::Http::RawResponse> Send( Azure::Core::Http::Request& request, Azure::Core::Http::Policies::NextHttpPolicy nextPolicy, Azure::Core::Context const& context) const override; - -private: - mutable int retry_cnt; }; #endif } // namespace doris \ No newline at end of file diff --git a/conf/be.conf b/conf/be.conf index 961b41b51fd..28207c16962 100644 --- a/conf/be.conf +++ b/conf/be.conf @@ -88,6 +88,13 @@ sys_log_level = INFO # Debug = 5, # Trace = 6 # Default to turn off aws sdk log, because aws sdk errors that need to be cared will be output through Doris logs -aws_log_level=2 +aws_log_level = 3 + +# azure sdk log level +# Verbose = 1, +# Informational = 2, +# Warning = 3, +# Error = 4 +azure_log_level = 3 ## If you are not running in aws cloud, you can disable EC2 metadata AWS_EC2_METADATA_DISABLED=true --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org