dolfinus commented on code in PR #50126:
URL: https://github.com/apache/doris/pull/50126#discussion_r2062603957


##########
be/src/util/uuid_generator.h:
##########
@@ -17,18 +17,74 @@
 
 #pragma once
 
+#include <atomic>
 #include <boost/uuid/uuid.hpp>
 #include <boost/uuid/uuid_generators.hpp>
 #include <boost/uuid/uuid_io.hpp>
+#include <chrono>
+#include <cstdint>
 #include <mutex>
+#include <random>
 
 namespace doris {
 
+// Format:

Review Comment:
   quick-bench doesn't support including third-party libraries like Boost.
   
   I've tried to use https://github.com/google/benchmark locally, and got these 
results:
   ```cpp
   #include <atomic>
   #include <boost/random/mersenne_twister.hpp>
   #include <boost/uuid/uuid.hpp>
   #include <boost/uuid/uuid_generators.hpp>
   #include <boost/uuid/uuid_io.hpp>
   #include <chrono>
   #include <cstdint>
   #include <mutex>
   #include <random>
   #include <benchmark/benchmark.h>
   
   class BoostUUID4Generator {
   public:
       boost::uuids::uuid next_uuid() {
           std::lock_guard<std::mutex> lock(_uuid_gen_lock);
           return _boost_uuid_generator();
       }
   
       static BoostUUID4Generator* instance() {
           static BoostUUID4Generator generator;
           return &generator;
       }
   
   private:
       boost::uuids::basic_random_generator<boost::random::mt19937> 
_boost_uuid_generator;
       std::mutex _uuid_gen_lock;
   };
   
   class BoostUUID7Generator {
   public:
       boost::uuids::uuid next_uuid() {
           std::lock_guard<std::mutex> lock(_uuid_gen_lock);
           return _boost_uuid_generator();
       }
   
       static BoostUUID7Generator* instance() {
           static BoostUUID7Generator generator;
           return &generator;
       }
   
   private:
       boost::uuids::time_generator_v7 _boost_uuid_generator;
       std::mutex _uuid_gen_lock;
   };
   
   class CustomUUID7Generator {
   public:
       boost::uuids::uuid next_uuid() {
           std::lock_guard<std::mutex> lock(_uuid_gen_lock);
   
           uint64_t millis =
                   
std::chrono::time_point_cast<std::chrono::milliseconds>(std::chrono::system_clock::now()).time_since_epoch()
                           .count();
   
           uint16_t counter = _counter.fetch_add(1, std::memory_order_relaxed) 
& 0xFF;
   
           // Generate random bits
           std::uniform_int_distribution<uint64_t> dis;
           uint64_t random = dis(gen);
   
           boost::uuids::uuid uuid;
   
           uuid.data[0] = static_cast<uint8_t>((millis >> 40) & 0xFF);
           uuid.data[1] = static_cast<uint8_t>((millis >> 32) & 0xFF);
           uuid.data[2] = static_cast<uint8_t>((millis >> 24) & 0xFF);
           uuid.data[3] = static_cast<uint8_t>((millis >> 16) & 0xFF);
           uuid.data[4] = static_cast<uint8_t>((millis >> 8) & 0xFF);
           uuid.data[5] = static_cast<uint8_t>(millis & 0xFF);
   
           // Next 4 bits: version (7)
           // Next 12 bits: counter
           uuid.data[6] = static_cast<uint8_t>(0x70 | ((counter >> 8) & 0x0F));
           uuid.data[7] = static_cast<uint8_t>(counter & 0xFF);
   
           // Next 2 bits: variant (2)
           // Remaining 62 bits: random
           uuid.data[8] = static_cast<uint8_t>(0x80 | ((random >> 56) & 0x3F));
           uuid.data[9] = static_cast<uint8_t>((random >> 48) & 0xFF);
           uuid.data[10] = static_cast<uint8_t>((random >> 40) & 0xFF);
           uuid.data[11] = static_cast<uint8_t>((random >> 32) & 0xFF);
           uuid.data[12] = static_cast<uint8_t>((random >> 24) & 0xFF);
           uuid.data[13] = static_cast<uint8_t>((random >> 16) & 0xFF);
           uuid.data[14] = static_cast<uint8_t>((random >> 8) & 0xFF);
           uuid.data[15] = static_cast<uint8_t>(random & 0xFF);
   
           return uuid;
       }
   
       static CustomUUID7Generator* instance() {
           static CustomUUID7Generator generator;
           return &generator;
       }
   
   private:
       std::mutex _uuid_gen_lock;
       std::atomic<uint16_t> _counter {0};
       std::random_device rd;
       std::mt19937_64 gen { std::mt19937_64(rd()) };
   };
   
   static void BoostUUID4(benchmark::State& state) {
     // Code before the loop is not measured
     BoostUUID4Generator* generator = BoostUUID4Generator::instance();
     // Code inside this loop is measured repeatedly
     for (auto _ : state) {
       boost::uuids::uuid generated_uuid = generator->next_uuid();
       // Make sure the variable is not optimized away by compiler
       benchmark::DoNotOptimize(generated_uuid);
     }
   }
   // Register the function as a benchmark
   BENCHMARK(BoostUUID4);
   
   static void BoostUUID7(benchmark::State& state) {
     // Code before the loop is not measured
     BoostUUID7Generator* generator = BoostUUID7Generator::instance();
     // Code inside this loop is measured repeatedly
     for (auto _ : state) {
       boost::uuids::uuid generated_uuid = generator->next_uuid();
       // Make sure the variable is not optimized away by compiler
       benchmark::DoNotOptimize(generated_uuid);
     }
   }
   // Register the function as a benchmark
   BENCHMARK(BoostUUID7);
   
   static void CustomUUID7(benchmark::State& state) {
     // Code before the loop is not measured
     CustomUUID7Generator* generator = CustomUUID7Generator::instance();
     // Code inside this loop is measured repeatedly
     for (auto _ : state) {
       boost::uuids::uuid generated_uuid = generator->next_uuid();
       // Make sure the variable is not optimized away by compiler
       benchmark::DoNotOptimize(generated_uuid);
     }
   }
   BENCHMARK(CustomUUID7);
   
   BENCHMARK_MAIN();
   ```
   
   ```bash
   g++ -O3 mybenchmark.cc -std=c++11 -isystem benchmark/include 
-Lbenchmark/build/src -lbenchmark -lpthread -o mybenchmark
   ./mybenchmark
   ```
   
   ```
   2025-04-27T14:32:24+03:00
   Running ./mybenchmark
   Run on (8 X 4800 MHz CPU s)
   CPU Caches:
     L1 Data 32 KiB (x4)
     L1 Instruction 32 KiB (x4)
     L2 Unified 256 KiB (x4)
     L3 Unified 8192 KiB (x1)
   Load Average: 1.45, 1.49, 1.47
   ***WARNING*** CPU scaling is enabled, the benchmark real time measurements 
may be noisy and will incur extra overhead.
   ------------------------------------------------------
   Benchmark            Time             CPU   Iterations
   ------------------------------------------------------
   BoostUUID4        25.7 ns         25.6 ns     24760690
   BoostUUID7        60.8 ns         60.6 ns     10957843
   CustomUUID7       53.2 ns         53.1 ns     12940347
   ```
   
   This benchmark uses code after fixing 
https://github.com/apache/doris/pull/50126#pullrequestreview-2797611290 and 
https://github.com/apache/doris/pull/50126/files/4190be14312634b95ba1cbb4edcf1af8a5473314#diff-e0023bb1a4d322e4239f33381749a03e826aba045e5944e41795ad72dcbfd916.
 Without these fixes, CustomUUID7 took 10k nanoseconds, due to random generator 
initialization on every call. 
   
   BTW, Boost 1.86+ already includes generator for UUID7 which can be used 
almost as-is. And it has a code handling counter wraparound which current 
approach doesn't.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to