dolfinus commented on code in PR #50126: URL: https://github.com/apache/doris/pull/50126#discussion_r2097373744
########## fe/fe-core/src/main/java/org/apache/doris/common/UUIDv7Generator.java: ########## @@ -0,0 +1,132 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.common; + +// Format +// We use UUID v7 (RFC 4122) for generating UUIDs. +// UUIDv7 was chosen for the following benefits: +// 1. Time-ordered - Contains a timestamp component that makes UUIDs sortable by generation time, +// which is valuable for query tracking, debugging, and performance analysis +// 2. High performance - Efficient generation with minimal overhead +// 3. Global uniqueness - Combines timestamp with random data to ensure uniqueness across +// distributed systems without coordination +// 4. Database friendly - The time-ordered nature makes it more efficient for database indexing +// and storage compared to purely random UUIDs (like v4) +// 5. Future-proof - Follows the latest UUID standard with improvements over older versions + +// Note: Our implementation differs slightly from the standard UUIDv7 specification by +// using a counter instead of random bits in the "rand_a" field to further enhance +// uniqueness when generating multiple UUIDs in rapid succession. + +// 0 1 2 3 +// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +// | unix_ts_ms | +// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +// | unix_ts_ms | ver | counter | +// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +// |var| rand_b | +// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +// | rand_b | +// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + +import java.util.Random; +import java.util.UUID; +import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.locks.LockSupport; + +public class UUIDv7Generator { + private static final UUIDv7Generator INSTANCE = new UUIDv7Generator(); + private static final Random RANDOM = ThreadLocalRandom.current(); + private static final AtomicInteger COUNTER = new AtomicInteger(0); + private static final long VERSION = 7L << 12; + private static final long VARIANT = 2L << 62; + private static final long MAX_SEQUENCE = 0xFFF; + private static final int SPIN_THRESHOLD = 10; + private static final long PARK_NANOS = 100L; // 100 nanoseconds + + private final AtomicLong timestampAndSequence; + + private UUIDv7Generator() { + // Initialize with current timestamp and 0 sequence + long initialTimestamp = System.currentTimeMillis(); + timestampAndSequence = new AtomicLong((initialTimestamp << 12)); + } + + public static UUIDv7Generator getInstance() { + return INSTANCE; + } + + public UUID nextUUID() { + int spinCount = 0; + while (true) { + long currentTimestamp = System.currentTimeMillis(); + long current = timestampAndSequence.get(); + long timestamp = current >>> 12; + long sequence = current & MAX_SEQUENCE; + + // Implementation difference from C++ version: + // In the Java implementation, we use a more complex approach to handle timestamp synchronization: + // 1. We check if the current timestamp is less than the last timestamp (clock drift/adjustment case) + // 2. Or if we've exhausted the sequence counter for the current timestamp + // In these cases, we spin-wait (or park) to avoid generating duplicate timestamps + // This approach prioritizes strict ordering and uniqueness over throughput in edge cases + // The C++ version handles this by just reusing the last timestamp, which is simpler but + // might allow multiple UUIDs with the same timestamp and different counters + if (currentTimestamp < timestamp || (currentTimestamp == timestamp && sequence >= MAX_SEQUENCE)) { + if (spinCount < SPIN_THRESHOLD) { + spinCount++; + continue; + } + LockSupport.parkNanos(PARK_NANOS); + continue; + } + + long next; + if (currentTimestamp > timestamp) { + next = currentTimestamp << 12; + } else { + next = current + 1; + } + + if (timestampAndSequence.compareAndSet(current, next)) { + return generateUUID(next >>> 12, next & MAX_SEQUENCE); + } + spinCount++; + } + } + + private UUID generateUUID(long timestamp, long sequence) { + // Get counter value (12 bits) + int counter = (int) (sequence & 0xFFF); Review Comment: ```suggestion int counter = (int) (sequence & MAX_SEQUENCE); ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org