morningman commented on a change in pull request #4383: URL: https://github.com/apache/incubator-doris/pull/4383#discussion_r476446542
########## File path: fe/fe-core/src/main/java/org/apache/doris/load/loadv2/SparkEtlJobHandler.java ########## @@ -140,93 +136,96 @@ public void submitEtlJob(long loadJobId, String loadLabel, EtlJobConfig etlJobCo .setAppName(String.format(ETL_JOB_NAME, loadLabel)) .setSparkHome(sparkHome) .addAppArgs(jobConfigHdfsPath) - .redirectError() - .redirectOutput(new File(Config.sys_log_dir + "/spark-submitter.log")); + .redirectError(); + //.redirectOutput(new File(Config.sys_log_dir + "/spark-submitter.log")); Review comment: Remove unused code ########## File path: fe/fe-core/src/main/java/org/apache/doris/load/loadv2/SparkLauncherMonitors.java ########## @@ -0,0 +1,216 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.load.loadv2; + +import com.google.common.base.Preconditions; +import com.google.common.base.Splitter; +import com.google.common.base.Strings; + +import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; +import org.apache.hadoop.yarn.api.records.YarnApplicationState; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class SparkLauncherMonitors { + private static final Logger LOG = LogManager.getLogger(SparkLauncherMonitors.class); + // 5min + private static final long SUBMIT_APP_TIMEOUT_MS = 300 * 1000; + + private LogMonitor logMonitor; + + public static LogMonitor createLogMonitor(SparkLoadAppHandle handle) { + return new LogMonitor(handle); + } + + private static SparkLoadAppHandle.State fromYarnState(YarnApplicationState yarnState) { + switch (yarnState) { + case SUBMITTED: + case ACCEPTED: + return SparkLoadAppHandle.State.SUBMITTED; + case RUNNING: + return SparkLoadAppHandle.State.RUNNING; + case FINISHED: + return SparkLoadAppHandle.State.FINISHED; + case FAILED: + return SparkLoadAppHandle.State.FAILED; + case KILLED: + return SparkLoadAppHandle.State.KILLED; + default: + // NEW NEW_SAVING + return SparkLoadAppHandle.State.UNKNOWN; + } + } + + public static class LogMonitor extends Thread { + private final Process process; + private SparkLoadAppHandle handle; + private long submitTimeoutMs; + private boolean isStop; + + private static final String STATE = "state"; + private static final String QUEUE = "queue"; + private static final String START_TIME = "start time"; + private static final String FINAL_STATUS = "final status"; + private static final String URL = "tracking URL"; + private static final String USER = "user"; + + public LogMonitor(SparkLoadAppHandle handle) { + this.handle = handle; + this.process = handle.getProcess(); + this.isStop = false; + } + + public void setSubmitTimeoutMs(long submitTimeoutMs) { + this.submitTimeoutMs = submitTimeoutMs; + } + + // Monitor the process's output + @Override + public void run() { + BufferedReader outReader = null; + String line = null; + long startTime = System.currentTimeMillis(); + try { + Preconditions.checkState(process.isAlive()); + outReader = new BufferedReader(new InputStreamReader(process.getInputStream())); + while (!isStop && (line = outReader.readLine()) != null) { + LOG.info("Monitor Log: " + line); + // parse state and appId + if (line.contains(STATE)) { + SparkLoadAppHandle.State oldState = handle.getState(); + SparkLoadAppHandle.State newState = oldState; + // 1. state + String state = regexGetState(line); + if (state != null) { + YarnApplicationState yarnState = YarnApplicationState.valueOf(state); + newState = fromYarnState(yarnState); + if (newState != oldState) { + handle.setState(newState); + } + } + // 2. appId + String appId = regexGetAppId(line); + if (appId != null) { + if (!appId.equals(handle.getAppId())) { + handle.setAppId(appId); + } + } + + LOG.info("spark appId that handle get is {}, state: {}", handle.getAppId(), handle.getState().toString()); + switch (newState) { + case UNKNOWN: + case CONNECTED: + case SUBMITTED: + // If the app stays in the UNKNOWN/CONNECTED/SUBMITTED state for more than submitTimeoutMs + // stop monitoring and kill the process + if (System.currentTimeMillis() - startTime > submitTimeoutMs) { + isStop = true; + handle.kill(); + } + break; + case RUNNING: + case FINISHED: + // There's no need to parse all logs of handle process to get all the information. + // As soon as the state changes to RUNNING/KILLED/FAILED/FINISHED/LOST, + // stop monitoring but keep the process alive. + isStop = true; + break; + case KILLED: + case FAILED: + case LOST: + // If the state changes to KILLED/FAILED/LOST, + // stop monitoring and kill the process + isStop = true; + handle.kill(); + break; + default: + Preconditions.checkState(false, "wrong spark app state"); + } + } + // parse other values + else if (line.contains(QUEUE) || line.contains(START_TIME) || line.contains(FINAL_STATUS) || Review comment: if the line contains "STATE", the while loop may be broken. So how to guarantee that this `else if` block can be ran? ########## File path: fe/fe-core/src/main/java/org/apache/doris/load/loadv2/SparkLauncherMonitors.java ########## @@ -0,0 +1,216 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.load.loadv2; + +import com.google.common.base.Preconditions; +import com.google.common.base.Splitter; +import com.google.common.base.Strings; + +import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; +import org.apache.hadoop.yarn.api.records.YarnApplicationState; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class SparkLauncherMonitors { Review comment: Add comment to explain the function of this class ########## File path: fe/fe-core/src/main/java/org/apache/doris/load/loadv2/SparkLauncherMonitors.java ########## @@ -0,0 +1,216 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.load.loadv2; + +import com.google.common.base.Preconditions; +import com.google.common.base.Splitter; +import com.google.common.base.Strings; + +import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; +import org.apache.hadoop.yarn.api.records.YarnApplicationState; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class SparkLauncherMonitors { + private static final Logger LOG = LogManager.getLogger(SparkLauncherMonitors.class); + // 5min + private static final long SUBMIT_APP_TIMEOUT_MS = 300 * 1000; + + private LogMonitor logMonitor; + + public static LogMonitor createLogMonitor(SparkLoadAppHandle handle) { + return new LogMonitor(handle); + } + + private static SparkLoadAppHandle.State fromYarnState(YarnApplicationState yarnState) { + switch (yarnState) { + case SUBMITTED: + case ACCEPTED: + return SparkLoadAppHandle.State.SUBMITTED; + case RUNNING: + return SparkLoadAppHandle.State.RUNNING; + case FINISHED: + return SparkLoadAppHandle.State.FINISHED; + case FAILED: + return SparkLoadAppHandle.State.FAILED; + case KILLED: + return SparkLoadAppHandle.State.KILLED; + default: + // NEW NEW_SAVING + return SparkLoadAppHandle.State.UNKNOWN; + } + } + + public static class LogMonitor extends Thread { + private final Process process; + private SparkLoadAppHandle handle; + private long submitTimeoutMs; + private boolean isStop; + + private static final String STATE = "state"; + private static final String QUEUE = "queue"; + private static final String START_TIME = "start time"; + private static final String FINAL_STATUS = "final status"; + private static final String URL = "tracking URL"; + private static final String USER = "user"; + + public LogMonitor(SparkLoadAppHandle handle) { + this.handle = handle; + this.process = handle.getProcess(); + this.isStop = false; + } + + public void setSubmitTimeoutMs(long submitTimeoutMs) { + this.submitTimeoutMs = submitTimeoutMs; + } + + // Monitor the process's output + @Override + public void run() { + BufferedReader outReader = null; + String line = null; + long startTime = System.currentTimeMillis(); + try { + Preconditions.checkState(process.isAlive()); Review comment: How to make sure the process is still alive here? ########## File path: fe/fe-core/src/test/java/org/apache/doris/load/loadv2/SparkEtlJobHandlerTest.java ########## @@ -95,94 +132,103 @@ public void setUp() { .SparkLibrary("", "", SparkRepository.SparkLibrary.LibType.SPARK2X, 0L)); } - @Test - public void testSubmitEtlJob(@Mocked BrokerUtil brokerUtil, @Mocked SparkLauncher launcher, - @Injectable SparkAppHandle handle) throws IOException, LoadException { - new Expectations() { - { - launcher.startApplication((SparkAppHandle.Listener) any); - result = handle; - handle.getAppId(); - returns(null, null, appId); - handle.getState(); - returns(State.CONNECTED, State.SUBMITTED, State.RUNNING); - } - }; - - EtlJobConfig etlJobConfig = new EtlJobConfig(Maps.newHashMap(), etlOutputPath, label, null); - SparkResource resource = new SparkResource(resourceName); - new Expectations(resource) { - { - resource.prepareArchive(); - result = archive; - } - }; +// @Test Review comment: Remove unused code ########## File path: fe/fe-core/src/main/java/org/apache/doris/load/loadv2/SparkLauncherMonitors.java ########## @@ -0,0 +1,216 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.load.loadv2; + +import com.google.common.base.Preconditions; +import com.google.common.base.Splitter; +import com.google.common.base.Strings; + +import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; +import org.apache.hadoop.yarn.api.records.YarnApplicationState; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class SparkLauncherMonitors { + private static final Logger LOG = LogManager.getLogger(SparkLauncherMonitors.class); + // 5min + private static final long SUBMIT_APP_TIMEOUT_MS = 300 * 1000; + + private LogMonitor logMonitor; + + public static LogMonitor createLogMonitor(SparkLoadAppHandle handle) { + return new LogMonitor(handle); + } + + private static SparkLoadAppHandle.State fromYarnState(YarnApplicationState yarnState) { + switch (yarnState) { + case SUBMITTED: + case ACCEPTED: + return SparkLoadAppHandle.State.SUBMITTED; + case RUNNING: + return SparkLoadAppHandle.State.RUNNING; + case FINISHED: + return SparkLoadAppHandle.State.FINISHED; + case FAILED: + return SparkLoadAppHandle.State.FAILED; + case KILLED: + return SparkLoadAppHandle.State.KILLED; + default: + // NEW NEW_SAVING + return SparkLoadAppHandle.State.UNKNOWN; + } + } + + public static class LogMonitor extends Thread { + private final Process process; + private SparkLoadAppHandle handle; + private long submitTimeoutMs; + private boolean isStop; + + private static final String STATE = "state"; + private static final String QUEUE = "queue"; + private static final String START_TIME = "start time"; + private static final String FINAL_STATUS = "final status"; + private static final String URL = "tracking URL"; + private static final String USER = "user"; + + public LogMonitor(SparkLoadAppHandle handle) { + this.handle = handle; + this.process = handle.getProcess(); + this.isStop = false; + } + + public void setSubmitTimeoutMs(long submitTimeoutMs) { + this.submitTimeoutMs = submitTimeoutMs; + } + + // Monitor the process's output + @Override + public void run() { + BufferedReader outReader = null; + String line = null; + long startTime = System.currentTimeMillis(); + try { + Preconditions.checkState(process.isAlive()); + outReader = new BufferedReader(new InputStreamReader(process.getInputStream())); + while (!isStop && (line = outReader.readLine()) != null) { + LOG.info("Monitor Log: " + line); + // parse state and appId + if (line.contains(STATE)) { Review comment: You can add an example output line here, so that the reviewer can know what the line looks like. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org