ruanwenjun commented on code in PR #17005:
URL:
https://github.com/apache/dolphinscheduler/pull/17005#discussion_r1957241984
##########
dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/AbstractCommandExecutor.java:
##########
@@ -216,13 +216,30 @@ public void cancelApplication() throws
InterruptedException {
return;
}
- // soft kill
- log.info("Begin to kill process process, pid is : {}",
taskRequest.getProcessId());
- process.destroy();
- if (!process.waitFor(5, TimeUnit.SECONDS)) {
+ try {
+ // Try to kill process tree first
+ boolean killed = ProcessUtils.kill(taskRequest);
+ if (killed) {
+ log.info("Successfully killed process tree for task: {}, pid:
{}",
+ taskRequest.getTaskAppId(),
taskRequest.getProcessId());
+ return;
+ }
+
+ // If killing process tree fails, try to destroy the process
directly
+ log.info("Failed to kill process tree, trying to destroy process
directly");
+ process.destroy();
+ if (!process.waitFor(5, TimeUnit.SECONDS)) {
+ log.info("Process did not exit after destroy, forcing
termination");
+ process.destroyForcibly();
+ }
+ log.info("Successfully killed process tree for task: {}, pid: {}",
+ taskRequest.getTaskAppId(), taskRequest.getProcessId());
+
+ } catch (Exception e) {
+ log.error("Error while killing process, pid: {}",
taskRequest.getProcessId(), e);
+ // Try destroyForcibly as last resort
process.destroyForcibly();
Review Comment:
This is not needed.
##########
dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/utils/ProcessUtils.java:
##########
@@ -85,31 +85,105 @@ private ProcessUtils() {
private static final Pattern LINUXPATTERN =
Pattern.compile("\\((\\d+)\\)");
/**
- * kill tasks according to different task types.
+ * Terminate the task process, support multi-level signal processing and
fallback strategy
+ * @param request Task execution context
+ * @return Whether the process was successfully terminated
*/
- @Deprecated
public static boolean kill(@NonNull TaskExecutionContext request) {
try {
- log.info("Begin kill task instance, processId: {}",
request.getProcessId());
+ log.info("Begin killing task instance, processId: {}",
request.getProcessId());
int processId = request.getProcessId();
if (processId == 0) {
- log.error("Task instance kill failed, processId is not exist");
+ log.error("Task instance kill failed, processId is not
available");
return false;
}
- String cmd = String.format("kill -9 %s", getPidsStr(processId));
- cmd = OSUtils.getSudoCmd(request.getTenantCode(), cmd);
- log.info("process id:{}, cmd:{}", processId, cmd);
+ // Get all child processes
+ String pids = getPidsStr(processId);
+ String[] pidArray = pids.split("\\s+");
+ if (pidArray.length == 0) {
+ log.warn("No valid PIDs found for process: {}", processId);
+ return true;
+ }
+
+ // 1. Try to terminate gracefully (SIGINT)
+ boolean gracefulKillSuccess = sendKillSignal("SIGINT", pids,
request.getTenantCode(), 2000);
+ if (gracefulKillSuccess) {
+ log.info("Successfully killed process tree using SIGINT,
processId: {}", processId);
+ return true;
+ }
+
+ // 2. Try to terminate forcefully (SIGTERM)
+ boolean termKillSuccess = sendKillSignal("SIGTERM", pids,
request.getTenantCode(), 2000);
+ if (termKillSuccess) {
+ log.info("Successfully killed process tree using SIGTERM,
processId: {}", processId);
+ return true;
+ }
+
+ // 3. As a last resort, use `kill -9`
+ log.warn("SIGINT & SIGTERM failed, using SIGKILL as a last resort
for processId: {}", processId);
+ boolean forceKillSuccess = sendKillSignal("SIGKILL", pids,
request.getTenantCode(), 3000);
+ if (forceKillSuccess) {
+ log.info("Successfully killed process tree using SIGKILL,
processId: {}", processId);
+ return true;
+ }
- OSUtils.exeCmd(cmd);
- log.info("Success kill task instance, processId: {}",
request.getProcessId());
+ // 4. Finally, check if the process is still alive
+ for (String pid : pidArray) {
+ if (isProcessAlive(pid)) {
+ log.error("Failed to kill process {}, even after multiple
attempts", pid);
+ return false;
+ }
+ }
+
+ log.info("Success: process {} is no longer running", processId);
Review Comment:
Is this needed to do a check here, if the process has set graceful exist,
this check will false, but this doesn't mean we kill failed.
##########
dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/utils/ProcessUtils.java:
##########
@@ -85,31 +85,105 @@ private ProcessUtils() {
private static final Pattern LINUXPATTERN =
Pattern.compile("\\((\\d+)\\)");
/**
- * kill tasks according to different task types.
+ * Terminate the task process, support multi-level signal processing and
fallback strategy
+ * @param request Task execution context
+ * @return Whether the process was successfully terminated
*/
- @Deprecated
public static boolean kill(@NonNull TaskExecutionContext request) {
try {
- log.info("Begin kill task instance, processId: {}",
request.getProcessId());
+ log.info("Begin killing task instance, processId: {}",
request.getProcessId());
int processId = request.getProcessId();
if (processId == 0) {
- log.error("Task instance kill failed, processId is not exist");
+ log.error("Task instance kill failed, processId is not
available");
Review Comment:
We should return true here, the logic is equals with `pidArray` is empty.
##########
dolphinscheduler-task-plugin/dolphinscheduler-task-api/src/main/java/org/apache/dolphinscheduler/plugin/task/api/utils/ProcessUtils.java:
##########
@@ -85,31 +85,105 @@ private ProcessUtils() {
private static final Pattern LINUXPATTERN =
Pattern.compile("\\((\\d+)\\)");
/**
- * kill tasks according to different task types.
+ * Terminate the task process, support multi-level signal processing and
fallback strategy
+ * @param request Task execution context
+ * @return Whether the process was successfully terminated
*/
- @Deprecated
public static boolean kill(@NonNull TaskExecutionContext request) {
try {
- log.info("Begin kill task instance, processId: {}",
request.getProcessId());
+ log.info("Begin killing task instance, processId: {}",
request.getProcessId());
int processId = request.getProcessId();
if (processId == 0) {
- log.error("Task instance kill failed, processId is not exist");
+ log.error("Task instance kill failed, processId is not
available");
return false;
}
- String cmd = String.format("kill -9 %s", getPidsStr(processId));
- cmd = OSUtils.getSudoCmd(request.getTenantCode(), cmd);
- log.info("process id:{}, cmd:{}", processId, cmd);
+ // Get all child processes
+ String pids = getPidsStr(processId);
+ String[] pidArray = pids.split("\\s+");
+ if (pidArray.length == 0) {
+ log.warn("No valid PIDs found for process: {}", processId);
+ return true;
+ }
+
+ // 1. Try to terminate gracefully (SIGINT)
+ boolean gracefulKillSuccess = sendKillSignal("SIGINT", pids,
request.getTenantCode(), 2000);
Review Comment:
Don't add wait time, this will make RPC failed.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]