dolfinus commented on code in PR #50126: URL: https://github.com/apache/doris/pull/50126#discussion_r2062691368
########## be/src/util/uuid_generator.h: ########## @@ -17,18 +17,92 @@ #pragma once +#include <atomic> #include <boost/uuid/uuid.hpp> #include <boost/uuid/uuid_generators.hpp> #include <boost/uuid/uuid_io.hpp> +#include <chrono> +#include <cstdint> #include <mutex> +#include <random> namespace doris { +// Format: +// We use UUID v7 (RFC 4122) for generating UUIDs. +// UUIDv7 was chosen for the following benefits: +// 1. Time-ordered - Contains a timestamp component that makes UUIDs sortable by generation time, +// which is valuable for query tracking, debugging, and performance analysis +// 2. High performance - Efficient generation with minimal overhead +// 3. Global uniqueness - Combines timestamp with random data to ensure uniqueness across +// distributed systems without coordination +// 4. Database friendly - The time-ordered nature makes it more efficient for database indexing +// and storage compared to purely random UUIDs (like v4) +// 5. Future-proof - Follows the latest UUID standard with improvements over older versions + +// Note: Our implementation differs slightly from the standard UUIDv7 specification by +// using a counter instead of random bits in the "rand_a" field to further enhance +// uniqueness when generating multiple UUIDs in rapid succession. + +// 0 1 2 3 +// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +// | unix_ts_ms | +// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +// | unix_ts_ms | ver | counter | +// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +// |var| rand_b | +// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +// | rand_b | +// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + class UUIDGenerator { public: + UUIDGenerator() { + // Initialize random generator once + std::random_device rd; + _random_gen.seed(rd()); + } + boost::uuids::uuid next_uuid() { std::lock_guard<std::mutex> lock(_uuid_gen_lock); - return _boost_uuid_generator(); + + auto now = std::chrono::system_clock::now(); + uint64_t millis = + std::chrono::duration_cast<std::chrono::milliseconds>(now.time_since_epoch()) Review Comment: According to https://en.cppreference.com/w/cpp/chrono/system_clock, result is not monotonic, so probably the same trick used for Java with `lastTimestamp` should be applied here as well -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org