Re: [PR] KAFKA-17411: Create local state Standbys on start [kafka]

via GitHub Wed, 28 Aug 2024 07:06:54 -0700


nicktelford commented on code in PR #16922:
URL: https://github.com/apache/kafka/pull/16922#discussion_r1734751895



##########
streams/src/main/java/org/apache/kafka/streams/processor/internals/StateDirectory.java:
##########
@@ -182,6 +199,120 @@ private boolean lockStateDirectory() {
         return stateDirLock != null;
     }
 
+    public void initializeTasksForLocalState(final TopologyMetadata 
topologyMetadata, final StreamsMetricsImpl streamsMetrics) {
+        final List<TaskDirectory> nonEmptyTaskDirectories = 
listNonEmptyTaskDirectories();
+        if (hasPersistentStores && !nonEmptyTaskDirectories.isEmpty()) {
+            final LogContext logContext = new LogContext("main-thread ");
+            final ThreadCache dummyCache = new ThreadCache(logContext, 0, 
streamsMetrics);
+            final boolean eosEnabled = StreamsConfigUtils.eosEnabled(config);
+            final boolean stateUpdaterEnabled = 
StreamsConfig.InternalConfig.stateUpdaterEnabled(config.originals());
+
+            // discover all non-empty task directories in StateDirectory
+            for (final TaskDirectory taskDirectory : nonEmptyTaskDirectories) {
+                final String dirName = taskDirectory.file().getName();
+                final TaskId id = parseTaskDirectoryName(dirName, 
taskDirectory.namedTopology());
+                final ProcessorTopology topology = 
topologyMetadata.buildSubtopology(id);
+                final Set<TopicPartition> inputPartitions = 
topology.sourceTopics().stream().map(topic -> new TopicPartition(topic, 
id.partition())).collect(Collectors.toSet());
+
+                // create a StandbyTask for each one
+                if (topology.hasStateWithChangelogs()) {
+                    final ProcessorStateManager stateManager = new 
ProcessorStateManager(
+                        id,
+                        Task.TaskType.STANDBY,
+                        eosEnabled,
+                        logContext,
+                        this,
+                        null,
+                        topology.storeToChangelogTopic(),
+                        inputPartitions,
+                        stateUpdaterEnabled
+                    );
+
+                    final InternalProcessorContext<Object, Object> context = 
new ProcessorContextImpl(
+                        id,
+                        config,
+                        stateManager,
+                        streamsMetrics,
+                        dummyCache
+                    );
+
+                    final Task task = new StandbyTask(
+                        id,
+                        inputPartitions,
+                        topology,
+                        topologyMetadata.taskConfig(id),
+                        streamsMetrics,
+                        stateManager,
+                        this,
+                        dummyCache,
+                        context
+                    );
+
+                    // initialize and suspend new Tasks
+                    try {
+                        task.initializeIfNeeded();
+                        task.suspend();
+
+                        // add new Tasks to tasksForLocalState
+                        tasksForLocalState.put(id, task);
+                    } catch (final TaskCorruptedException e) {
+                        // Task is corrupt - wipe it out (under EOS) and don't 
initialize a Standby for it
+                        task.suspend();
+                        task.closeDirty();
+                    }
+                }
+            }
+        }
+    }
+
+    public boolean hasInitialTasks() {
+        return !tasksForLocalState.isEmpty();
+    }
+
+    public Task assignInitialTask(final TaskId taskId) {
+        final Task task = tasksForLocalState.remove(taskId);
+        if (task != null) {
+            lockedTasksToOwner.replace(taskId, Thread.currentThread());
+        }
+        return task;
+    }
+
+    public void closeInitialTasksIfLastAssginedThread() {
+        if (hasInitialTasks() && threadsWithAssignment.incrementAndGet() >= 
config.getInt(StreamsConfig.NUM_STREAM_THREADS_CONFIG)) {
+            // we need to be careful here, because other StreamThreads may 
still be assigning tasks (via assignInitialTask)
+            // so we first "drain" our Map of all remaining Tasks, and then 
close all the Tasks we successfully claimed from the Map
+            final Set<Task> tasksToClose = drainInitialTasks();
+            for (final Task task : tasksToClose) {
+                task.closeClean();
+            }
+        }
+    }
+
+    private void closeRemainingInitialTasks() {
+        closeRemainingInitialTasks(t -> true);
+    }
+
+    private void closeRemainingInitialTasks(final Predicate<Task> predicate) {
+        final Set<Task> drainedTasks = drainInitialTasks(predicate);
+        for (final Task task : drainedTasks) {
+            task.closeClean();
+        }
+    }
+
+    private Set<Task> drainInitialTasks() {
+        return drainInitialTasks(t -> true);
+    }
+
+    private Set<Task> drainInitialTasks(final Predicate<Task> predicate) {
+        final Set<Task> drainedTasks = new 
HashSet<>(tasksForLocalState.size());
+        for (final Map.Entry<TaskId, Task> entry : 
tasksForLocalState.entrySet()) {
+            if (predicate.test(entry.getValue()) && 
tasksForLocalState.remove(entry.getKey()) != null) {

Review Comment:
   While this specific method is not on the hot code path, `synchronized` 
methods lock the whole object, so any other `syncrhonized` method that _is_ on 
the hot code path, could be affected. This is why it's generally good practice 
to avoid `synchronized` methods, and instead use either concurrent data 
structures (as here), or a more fine-grained lock (from 
`java.util.concurrent.locks`) to lock only the resource that needs to be locked.
   
   I'll add a comment to explain what's going on here.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] KAFKA-17411: Create local state Standbys on start [kafka]

Reply via email to