dolfinus commented on code in PR #50126: URL: https://github.com/apache/doris/pull/50126#discussion_r2062603957
########## be/src/util/uuid_generator.h: ########## @@ -17,18 +17,74 @@ #pragma once +#include <atomic> #include <boost/uuid/uuid.hpp> #include <boost/uuid/uuid_generators.hpp> #include <boost/uuid/uuid_io.hpp> +#include <chrono> +#include <cstdint> #include <mutex> +#include <random> namespace doris { +// Format: Review Comment: quick-bench doesn't support including third-party libraries like Boost. I've tried to use https://github.com/google/benchmark locally, and got these results: ```cpp #include <atomic> #include <boost/random/mersenne_twister.hpp> #include <boost/uuid/uuid.hpp> #include <boost/uuid/uuid_generators.hpp> #include <boost/uuid/uuid_io.hpp> #include <chrono> #include <cstdint> #include <mutex> #include <random> #include <benchmark/benchmark.h> class BoostUUID4Generator { public: boost::uuids::uuid next_uuid() { std::lock_guard<std::mutex> lock(_uuid_gen_lock); return _boost_uuid_generator(); } static BoostUUID4Generator* instance() { static BoostUUID4Generator generator; return &generator; } private: boost::uuids::basic_random_generator<boost::random::mt19937> _boost_uuid_generator; std::mutex _uuid_gen_lock; }; class BoostUUID7Generator { public: boost::uuids::uuid next_uuid() { std::lock_guard<std::mutex> lock(_uuid_gen_lock); return _boost_uuid_generator(); } static BoostUUID7Generator* instance() { static BoostUUID7Generator generator; return &generator; } private: boost::uuids::time_generator_v7 _boost_uuid_generator; std::mutex _uuid_gen_lock; }; class CustomUUID7Generator { public: boost::uuids::uuid next_uuid() { std::lock_guard<std::mutex> lock(_uuid_gen_lock); uint64_t millis = std::chrono::time_point_cast<std::chrono::milliseconds>(std::chrono::system_clock::now()).time_since_epoch() .count(); uint16_t counter = _counter.fetch_add(1, std::memory_order_relaxed) & 0xFF; // Generate random bits std::uniform_int_distribution<uint64_t> dis; uint64_t random = dis(gen); boost::uuids::uuid uuid; uuid.data[0] = static_cast<uint8_t>((millis >> 40) & 0xFF); uuid.data[1] = static_cast<uint8_t>((millis >> 32) & 0xFF); uuid.data[2] = static_cast<uint8_t>((millis >> 24) & 0xFF); uuid.data[3] = static_cast<uint8_t>((millis >> 16) & 0xFF); uuid.data[4] = static_cast<uint8_t>((millis >> 8) & 0xFF); uuid.data[5] = static_cast<uint8_t>(millis & 0xFF); // Next 4 bits: version (7) // Next 12 bits: counter uuid.data[6] = static_cast<uint8_t>(0x70 | ((counter >> 8) & 0x0F)); uuid.data[7] = static_cast<uint8_t>(counter & 0xFF); // Next 2 bits: variant (2) // Remaining 62 bits: random uuid.data[8] = static_cast<uint8_t>(0x80 | ((random >> 56) & 0x3F)); uuid.data[9] = static_cast<uint8_t>((random >> 48) & 0xFF); uuid.data[10] = static_cast<uint8_t>((random >> 40) & 0xFF); uuid.data[11] = static_cast<uint8_t>((random >> 32) & 0xFF); uuid.data[12] = static_cast<uint8_t>((random >> 24) & 0xFF); uuid.data[13] = static_cast<uint8_t>((random >> 16) & 0xFF); uuid.data[14] = static_cast<uint8_t>((random >> 8) & 0xFF); uuid.data[15] = static_cast<uint8_t>(random & 0xFF); return uuid; } static CustomUUID7Generator* instance() { static CustomUUID7Generator generator; return &generator; } private: std::mutex _uuid_gen_lock; std::atomic<uint16_t> _counter {0}; std::random_device rd; std::mt19937_64 gen { std::mt19937_64(rd()) }; }; static void BoostUUID4(benchmark::State& state) { // Code before the loop is not measured BoostUUID4Generator* generator = BoostUUID4Generator::instance(); // Code inside this loop is measured repeatedly for (auto _ : state) { boost::uuids::uuid generated_uuid = generator->next_uuid(); // Make sure the variable is not optimized away by compiler benchmark::DoNotOptimize(generated_uuid); } } // Register the function as a benchmark BENCHMARK(BoostUUID4); static void BoostUUID7(benchmark::State& state) { // Code before the loop is not measured BoostUUID7Generator* generator = BoostUUID7Generator::instance(); // Code inside this loop is measured repeatedly for (auto _ : state) { boost::uuids::uuid generated_uuid = generator->next_uuid(); // Make sure the variable is not optimized away by compiler benchmark::DoNotOptimize(generated_uuid); } } // Register the function as a benchmark BENCHMARK(BoostUUID7); static void CustomUUID7(benchmark::State& state) { // Code before the loop is not measured CustomUUID7Generator* generator = CustomUUID7Generator::instance(); // Code inside this loop is measured repeatedly for (auto _ : state) { boost::uuids::uuid generated_uuid = generator->next_uuid(); // Make sure the variable is not optimized away by compiler benchmark::DoNotOptimize(generated_uuid); } } BENCHMARK(CustomUUID7); BENCHMARK_MAIN(); ``` ```bash g++ -O3 mybenchmark.cc -std=c++11 -isystem benchmark/include -Lbenchmark/build/src -lbenchmark -lpthread -o mybenchmark ./mybenchmark ``` ``` 2025-04-27T14:32:24+03:00 Running ./mybenchmark Run on (8 X 4800 MHz CPU s) CPU Caches: L1 Data 32 KiB (x4) L1 Instruction 32 KiB (x4) L2 Unified 256 KiB (x4) L3 Unified 8192 KiB (x1) Load Average: 1.45, 1.49, 1.47 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. ------------------------------------------------------ Benchmark Time CPU Iterations ------------------------------------------------------ BoostUUID4 25.7 ns 25.6 ns 24760690 BoostUUID7 60.8 ns 60.6 ns 10957843 CustomUUID7 53.2 ns 53.1 ns 12940347 ``` This benchmark uses code after fixing https://github.com/apache/doris/pull/50126#pullrequestreview-2797611290 and https://github.com/apache/doris/pull/50126/files/4190be14312634b95ba1cbb4edcf1af8a5473314#diff-e0023bb1a4d322e4239f33381749a03e826aba045e5944e41795ad72dcbfd916. Without these fixes, CustomUUID7 took 10k nanoseconds, due to random generator initialization on every call. BTW, Boost 1.86+ already includes generator for UUID7 which can be used almost as-is. And it has a code handling counter wraparound which current approach doesn't. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org