xy720 commented on a change in pull request #4383: URL: https://github.com/apache/incubator-doris/pull/4383#discussion_r477165212
########## File path: fe/fe-core/src/main/java/org/apache/doris/load/loadv2/SparkLauncherMonitors.java ########## @@ -0,0 +1,216 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.load.loadv2; + +import com.google.common.base.Preconditions; +import com.google.common.base.Splitter; +import com.google.common.base.Strings; + +import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; +import org.apache.hadoop.yarn.api.records.YarnApplicationState; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class SparkLauncherMonitors { + private static final Logger LOG = LogManager.getLogger(SparkLauncherMonitors.class); + // 5min + private static final long SUBMIT_APP_TIMEOUT_MS = 300 * 1000; + + private LogMonitor logMonitor; + + public static LogMonitor createLogMonitor(SparkLoadAppHandle handle) { + return new LogMonitor(handle); + } + + private static SparkLoadAppHandle.State fromYarnState(YarnApplicationState yarnState) { + switch (yarnState) { + case SUBMITTED: + case ACCEPTED: + return SparkLoadAppHandle.State.SUBMITTED; + case RUNNING: + return SparkLoadAppHandle.State.RUNNING; + case FINISHED: + return SparkLoadAppHandle.State.FINISHED; + case FAILED: + return SparkLoadAppHandle.State.FAILED; + case KILLED: + return SparkLoadAppHandle.State.KILLED; + default: + // NEW NEW_SAVING + return SparkLoadAppHandle.State.UNKNOWN; + } + } + + public static class LogMonitor extends Thread { + private final Process process; + private SparkLoadAppHandle handle; + private long submitTimeoutMs; + private boolean isStop; + + private static final String STATE = "state"; + private static final String QUEUE = "queue"; + private static final String START_TIME = "start time"; + private static final String FINAL_STATUS = "final status"; + private static final String URL = "tracking URL"; + private static final String USER = "user"; + + public LogMonitor(SparkLoadAppHandle handle) { + this.handle = handle; + this.process = handle.getProcess(); + this.isStop = false; + } + + public void setSubmitTimeoutMs(long submitTimeoutMs) { + this.submitTimeoutMs = submitTimeoutMs; + } + + // Monitor the process's output + @Override + public void run() { + BufferedReader outReader = null; + String line = null; + long startTime = System.currentTimeMillis(); + try { + Preconditions.checkState(process.isAlive()); + outReader = new BufferedReader(new InputStreamReader(process.getInputStream())); + while (!isStop && (line = outReader.readLine()) != null) { + LOG.info("Monitor Log: " + line); + // parse state and appId + if (line.contains(STATE)) { + SparkLoadAppHandle.State oldState = handle.getState(); + SparkLoadAppHandle.State newState = oldState; + // 1. state + String state = regexGetState(line); + if (state != null) { + YarnApplicationState yarnState = YarnApplicationState.valueOf(state); + newState = fromYarnState(yarnState); + if (newState != oldState) { + handle.setState(newState); + } + } + // 2. appId + String appId = regexGetAppId(line); + if (appId != null) { + if (!appId.equals(handle.getAppId())) { + handle.setAppId(appId); + } + } + + LOG.info("spark appId that handle get is {}, state: {}", handle.getAppId(), handle.getState().toString()); + switch (newState) { + case UNKNOWN: + case CONNECTED: + case SUBMITTED: + // If the app stays in the UNKNOWN/CONNECTED/SUBMITTED state for more than submitTimeoutMs + // stop monitoring and kill the process + if (System.currentTimeMillis() - startTime > submitTimeoutMs) { + isStop = true; + handle.kill(); + } + break; + case RUNNING: + case FINISHED: + // There's no need to parse all logs of handle process to get all the information. + // As soon as the state changes to RUNNING/KILLED/FAILED/FINISHED/LOST, + // stop monitoring but keep the process alive. + isStop = true; + break; + case KILLED: + case FAILED: + case LOST: + // If the state changes to KILLED/FAILED/LOST, + // stop monitoring and kill the process + isStop = true; + handle.kill(); + break; + default: + Preconditions.checkState(false, "wrong spark app state"); + } + } + // parse other values + else if (line.contains(QUEUE) || line.contains(START_TIME) || line.contains(FINAL_STATUS) || Review comment: The state's changing follows the rule of 1. `submited > running > finished / failed` 2. `submitted > killed` 3. `submitted > running > killed` Normally, the laucher will print the `queue`、`start time`、`final status`、`tracking url`、`user` logs in state submitted/runnning. So in case 2, the else if block may still not be ran when the while loop be broken. But it's not very terrible that this else if block not be ran, beacuse the necessary value we need only contains `appId` and `state`. In this case, `queue`、`start time`、`final status`、`tracking url`、`user` is just missing. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org