Copilot commented on code in PR #7077: URL: https://github.com/apache/hbase/pull/7077#discussion_r2134711105
########## hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureScheduler.java: ########## @@ -599,32 +593,47 @@ private void removeGlobalQueue(String globalId) { } private void tryCleanupGlobalQueue(String globalId, Procedure<?> procedure) { + tryCleanupQueue(globalId, procedure, () -> globalMap, GLOBAL_QUEUE_KEY_COMPARATOR, + locking::getGlobalLock, globalRunQueue, this::removeGlobalQueue); + } + + private static boolean isGlobalProcedure(Procedure<?> proc) { + return proc instanceof GlobalProcedureInterface; + } + + private static String getGlobalId(Procedure<?> proc) { + return ((GlobalProcedureInterface) proc).getGlobalId(); + } + + private <T extends Comparable<T>, TNode extends Queue<T>> boolean tryCleanupQueue(T id, + Procedure<?> proc, Supplier<TNode> getMap, AvlKeyComparator<TNode> comparator, + Function<T, LockAndQueue> getLock, FairQueue<T> runQueue, Consumer<T> removeQueue) { schedLock(); try { - GlobalQueue queue = AvlTree.get(globalMap, globalId, GLOBAL_QUEUE_KEY_COMPARATOR); + Queue<T> queue = AvlTree.get(getMap.get(), id, comparator); if (queue == null) { - return; + return true; } - final LockAndQueue lock = locking.getGlobalLock(globalId); - if (queue.isEmpty() && lock.tryExclusiveLock(procedure)) { - removeFromRunQueue(globalRunQueue, queue, - () -> "clean up global queue after " + procedure + " completed"); - removeGlobalQueue(globalId); + LockAndQueue lock = getLock.apply(id); + if (queue.isEmpty() && lock.isWaitingQueueEmpty() && !lock.isLocked()) { + // 1. the queue is empty + // 2. no procedure is in the lock's waiting queue + // 3. no other one holds the lock. It is possible that someone else holds the lock, usually + // our parent procedures + // If we can meet all the above conditions, it is safe for us to remove the queue + removeFromRunQueue(runQueue, queue, () -> "clean up queue after " + proc + " completed"); + removeQueue.accept(id); + return true; Review Comment: Checking `isWaitingQueueEmpty()` and `!isLocked()` without acquiring the lock is not atomic—another procedure could acquire the lock between these calls. To avoid races, consider using `lock.tryExclusiveLock(...)` as before or otherwise atomically claiming the lock before cleanup. ```suggestion if (queue.isEmpty() && lock.tryExclusiveLock(proc)) { try { // 1. the queue is empty // 2. we successfully acquired the lock exclusively // If we can meet all the above conditions, it is safe for us to remove the queue removeFromRunQueue(runQueue, queue, () -> "clean up queue after " + proc + " completed"); removeQueue.accept(id); return true; } finally { lock.releaseExclusiveLock(); } ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org