This is an automated email from the ASF dual-hosted git repository. huajianlan pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new f1193f47b91 [enhancement](test) add flight record script/test to analyze memory consume (#48472) f1193f47b91 is described below commit f1193f47b915ac8fe4e87b4d58233c31e1945fdd Author: 924060929 <lanhuaj...@selectdb.com> AuthorDate: Mon Mar 3 22:41:44 2025 +0800 [enhancement](test) add flight record script/test to analyze memory consume (#48472) 1. add `flight_record_fe.sh` to listen the memory consume/gc events and generate jfr file 2. add regression test action `flightRecord` to analyze the record flight events for nereids (only support for jdk17+), if you need flightRecord test, you should add `nonConcurrent` group for this test --- LICENSE.txt | 8 + bin/{profile_fe.sh => flight_record_fe.sh} | 41 ++- bin/profile_fe.sh | 2 +- regression-test/framework/pom.xml | 6 +- .../regression/action/FlightRecordAction.groovy | 288 +++++++++++++++++++++ .../org/apache/doris/regression/suite/Suite.groovy | 5 + .../apache/doris/regression/util/JdbcUtils.groovy | 12 + .../framework/src/main/groovy/suite.gdsl | 1 + .../suites/demo_p0/test_flight_record.groovy | 62 +++++ 9 files changed, 410 insertions(+), 15 deletions(-) diff --git a/LICENSE.txt b/LICENSE.txt index 6f9b2963bba..7ee02c57977 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -734,3 +734,11 @@ under the Common Development and Distribution License (CDDL). Exceptions are noted within the associated source files. ---------------------------------------------------------------------------------- + +org.openjdk.jmc.flightrecorder: UPL 1.0 or BSD-style license + +The Mission Control source code is made available under the Universal Permissive License (UPL), +Version 1.0 or a BSD-style license, alternatively. The full open source license text is available +at license/LICENSE.txt in the JMC project. + +---------------------------------------------------------------------------------- \ No newline at end of file diff --git a/bin/profile_fe.sh b/bin/flight_record_fe.sh similarity index 61% copy from bin/profile_fe.sh copy to bin/flight_record_fe.sh index 1481cce4069..e9a72af18fe 100755 --- a/bin/profile_fe.sh +++ b/bin/flight_record_fe.sh @@ -26,17 +26,17 @@ export DORIS_HOME echo "DORIS_HOME: ${DORIS_HOME}" if [[ -z "${JAVA_HOME}" ]]; then - if ! command -v java &>/dev/null; then - JAVA="" + if ! command -v jcmd &>/dev/null; then + JCMD="" else - JAVA="$(command -v java)" + JCMD="$(command -v jcmd)" fi else - JAVA="${JAVA_HOME}/bin/java" + JCMD="${JAVA_HOME}/bin/jcmd" fi -echo "JAVA: ${JAVA}" +echo "JCMD: ${JCMD}" -if [[ ! -x "${JAVA}" ]]; then +if [[ ! -x "${JCMD}" ]]; then echo "The JAVA_HOME environment variable is not set correctly" echo "This environment variable is required to run this program" echo "Note: JAVA_HOME should point to a JDK and not a JRE" @@ -44,7 +44,7 @@ if [[ ! -x "${JAVA}" ]]; then exit 1 fi -FE_PID=$(jps | grep DorisFE | awk '{print $1}') +FE_PID=$(${JAVA_HOME}/bin/jps | grep DorisFE | awk '{print $1}') if [[ -z "${FE_PID}" ]]; then echo "DorisFe not started" exit 1 @@ -53,11 +53,26 @@ echo "DorisFE pid: ${FE_PID}" mkdir -p "${DORIS_HOME}/log" NOW=$(date +'%Y%m%d%H%M%S') -PROFILE_OUTPUT="${DORIS_HOME}/log/profile_${NOW}.html" -if [[ -z "${PROFILE_SECONDS}" ]]; then - PROFILE_SECONDS="10" +RECORD_OUTPUT="${DORIS_HOME}/log/flight_record_${NOW}.jfr" +if [[ -z "${RECORD_SECONDS}" ]]; then + RECORD_SECONDS="30" fi -echo "Begin profiling ${PROFILE_SECONDS} seconds and generate flame graph to ${PROFILE_OUTPUT}..." -${JAVA} -jar "${DORIS_HOME}"/lib/ap-loader-all-*.jar profiler -a -n -l -i 200us -d "${PROFILE_SECONDS}" -f "${PROFILE_OUTPUT}" "${FE_PID}" -echo "Generated flame graph to ${PROFILE_OUTPUT}" +# add shutdown hook to stop flight record +cleanup() { + echo "Exec shutdown hook" + ${JCMD} "${FE_PID}" JFR.stop name="jfr_${NOW}" + echo "Generated flight record to ${RECORD_OUTPUT}" + exit 1 +} + +trap cleanup SIGINT + +echo "Begin flight record ${RECORD_SECONDS} seconds and generate to ${RECORD_OUTPUT}..." +${JCMD} "${FE_PID}" JFR.start name="jfr_${NOW}" settings=profile filename="${RECORD_OUTPUT}" + +echo "wait ${RECORD_SECONDS} seconds..." +sleep "${RECORD_SECONDS}" +${JCMD} "${FE_PID}" JFR.stop name="jfr_${NOW}" + +echo "Generated flight record to ${RECORD_OUTPUT}" diff --git a/bin/profile_fe.sh b/bin/profile_fe.sh index 1481cce4069..d8a15f31fd9 100755 --- a/bin/profile_fe.sh +++ b/bin/profile_fe.sh @@ -44,7 +44,7 @@ if [[ ! -x "${JAVA}" ]]; then exit 1 fi -FE_PID=$(jps | grep DorisFE | awk '{print $1}') +FE_PID=$(${JAVA_HOME}/bin/jps | grep DorisFE | awk '{print $1}') if [[ -z "${FE_PID}" ]]; then echo "DorisFe not started" exit 1 diff --git a/regression-test/framework/pom.xml b/regression-test/framework/pom.xml index 6fb22e66d83..b860d95cc8c 100644 --- a/regression-test/framework/pom.xml +++ b/regression-test/framework/pom.xml @@ -414,6 +414,10 @@ under the License. <artifactId>aliyun-sdk-oss</artifactId> <version>3.18.1</version> </dependency> - + <dependency> + <groupId>org.openjdk.jmc</groupId> + <artifactId>flightrecorder</artifactId> + <version>9.0.0</version> + </dependency> </dependencies> </project> diff --git a/regression-test/framework/src/main/groovy/org/apache/doris/regression/action/FlightRecordAction.groovy b/regression-test/framework/src/main/groovy/org/apache/doris/regression/action/FlightRecordAction.groovy new file mode 100644 index 00000000000..b1ac7086afe --- /dev/null +++ b/regression-test/framework/src/main/groovy/org/apache/doris/regression/action/FlightRecordAction.groovy @@ -0,0 +1,288 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.regression.action + +import ch.qos.logback.core.rolling.RollingFileAppender +import groovy.transform.stc.ClosureParams +import groovy.transform.stc.FromString +import groovy.util.logging.Slf4j +import org.apache.doris.regression.suite.Suite +import org.apache.doris.regression.suite.SuiteContext +import org.openjdk.jmc.common.IMCFrame +import org.openjdk.jmc.common.IMCMethod +import org.openjdk.jmc.common.IMCStackTrace +import org.openjdk.jmc.common.item.IItem +import org.openjdk.jmc.common.item.IItemCollection +import org.openjdk.jmc.common.item.IItemIterable +import org.openjdk.jmc.common.item.ItemFilters +import org.openjdk.jmc.common.item.ItemToolkit +import org.openjdk.jmc.flightrecorder.JfrAttributes +import org.openjdk.jmc.flightrecorder.JfrLoaderToolkit +import org.openjdk.jmc.flightrecorder.jdk.JdkAttributes +import org.openjdk.jmc.flightrecorder.jdk.JdkTypeIDs +import org.slf4j.LoggerFactory + +@Slf4j +class FlightRecordAction implements SuiteAction { + private SuiteContext context + private boolean cleanUp = true + private Closure record + private Closure callback + + private static File logPath + private String processName = "DorisFE" + private List<String> extraConfig = [] + + static { + ch.qos.logback.classic.Logger loggerOfSuite = + LoggerFactory.getLogger(Suite.class) as ch.qos.logback.classic.Logger + def context = loggerOfSuite.getLoggerContext() + logPath = new File("log") + for (final def logger in context.getLoggerList()) { + def it = logger.iteratorForAppenders() + while (it.hasNext()) { + def appender = it.next() + if (appender instanceof RollingFileAppender) { + logPath = new File(new File(appender.getFile()).parentFile.absolutePath) + log.info("Log path: ${logPath.getPath()}") + break + } + } + } + } + + FlightRecordAction(SuiteContext context) { + this.context = context + } + + private void cleanUp(boolean cleanUp) { + this.cleanUp = cleanUp + } + + private void processName(String processName) { + this.processName = processName + } + + private void extraConfig(List<String> extraConfig) { + this.extraConfig = extraConfig + } + + void record(Closure record) { + this.record = record + } + + void callback(@ClosureParams(value = FromString, options = [ + "org.openjdk.jmc.common.item.IItemCollection", + "org.openjdk.jmc.common.item.IItemCollection,Throwable,Long,Long"]) Closure<Boolean> callback) { + this.callback = callback + } + + static long getAllocationBytes(IItemCollection collection, boolean filterNereids) { + IItemCollection allocSample = collection.apply(ItemFilters.type(JdkTypeIDs.OBJ_ALLOC_SAMPLE)) + + long allocationBytes = 0 + for (IItemIterable iItems : allocSample) { + for (IItem iItem : iItems) { + if (filterNereids) { + IMCStackTrace stackTrace = ItemToolkit.getItemType(iItem) + .getAccessor(JfrAttributes.EVENT_STACKTRACE.getKey()) + .getMember(iItem) + if (stackTrace == null) { + continue + } + + boolean isNereids = false + for (IMCFrame frame : stackTrace.getFrames()) { + IMCMethod method = frame.getMethod() + String methodName = method.getMethodName() + String fullName = method.getType().getFullName() + if (fullName.equals("org.apache.doris.qe.StmtExecutor") && methodName.equals("executeByNereids")) { + isNereids = true + break + } + } + if (!isNereids) { + continue + } + } + + long weightBytes = ItemToolkit.getItemType(iItem).getAccessor(JdkAttributes.SAMPLE_WEIGHT.getKey()) + .getMember(iItem).longValue() + allocationBytes += weightBytes + } + } + return allocationBytes + } + + @Override + void run() { + Optional<String> recordName = Optional.empty() + try { + def startTime = System.currentTimeMillis() + if (record == null || callback == null) { + callbackError(startTime, new IllegalStateException("record or callback is null")) + return + } + + def fePid = getFePid(startTime) + if (!fePid.isPresent()) { + return + } + + recordName = startRecord(startTime, fePid.get()) + if (!recordName.isPresent()) { + return + } + + try { + this.record.call() + } catch (Throwable t) { + callbackError(startTime, t) + return + } + + def stopResult = stopRecord(startTime, fePid.get(), recordName.get()) + if (!stopResult.isPresent()) { + return + } + + IItemCollection records = null + try { + records = JfrLoaderToolkit.loadEvents(getRecordFile(recordName.get())) + } catch (Throwable t) { + callbackError(startTime, new IllegalStateException("Parse records falied: " + t.toString(), t)) + return + } + + if (callback.parameterTypes.size() == 1) { + callback(records) + } else { + callback(records, null, startTime, System.currentTimeMillis()) + } + } finally { + if (cleanUp && recordName.isPresent()) { + def file = getRecordFile(recordName.get()) + log.info("cleanup: ${file.absolutePath}") + try { + file.delete() + } catch (Throwable t) { + // ignore + } + } + } + } + + private Optional<String> stopRecord(long startTime, int pid, String name) { + log.info("stop record") + def stopResult = exec(["jcmd", "${pid}", "JFR.stop", "name=${name}"].execute()) + if (stopResult.exception != null || stopResult.exitCode != 0) { + Throwable e = new IllegalStateException("Can not stop record flight: " + new String(stopResult.stderr), stopResult.exception) + callbackError(startTime, e) + return Optional.empty() + } + log.info(new String(stopResult.stdout)) + return Optional.of(new String(stopResult.stdout)) + } + + private Optional<String> startRecord(long startTime, int pid) { + String name = "flight_record_${startTime}".toString() + String file = getRecordFile(name).absolutePath + log.info("start record: ${file}") + List<String> commands = ["jcmd", "${pid}", "JFR.start", "name=${name}", "filename=${file}"] + if (extraConfig != null) { + commands.addAll(extraConfig) + } + def startResult = exec(commands.execute()) + if (startResult.exception != null || startResult.exitCode != 0) { + Throwable e = new IllegalStateException("Can not start record flight: " + new String(startResult.stderr), startResult.exception) + callbackError(startTime, e) + return Optional.empty() + } + log.info(new String(startResult.stdout)) + return Optional.of(name) + } + + private File getRecordFile(String name) { + return new File(logPath, name + ".jfr") + } + + private Optional<Integer> getFePid(long startTime) { + def fePidResult = exec(["jps"].execute() + .pipeTo(["grep", processName].execute()) + .pipeTo(["awk", "{print \$1}"].execute()) + .pipeTo(["head", "-1"].execute())) + if (fePidResult.exception != null || fePidResult.exitCode != 0) { + Throwable e = new IllegalStateException("Can not get frontend pid: " + new String(fePidResult.stderr), fePidResult.exception) + callbackError(startTime, e) + return Optional.empty() + } + + def pidStr = new String(fePidResult.stdout) + if (pidStr.isEmpty()) { + throw new IllegalStateException("Can not found process: ${processName}") + } + int fePid = pidStr.toInteger() + log.info("fe pid: ${fePid}") + return Optional.of(fePid) + } + + private void callbackError(long startTime, Throwable exception) { + if (callback.parameterTypes.size() == 1) { + throw exception + } + callback(null, exception, startTime, System.currentTimeMillis()) + } + + private ProcessResult exec(Process process) { + def exitCode = 0 + try { + exitCode = process.waitFor() + return new ProcessResult( + process, + exitCode, + process.inputStream.bytes, + process.errorStream.bytes, + null + ) + } catch (Throwable t) { + return new ProcessResult( + process, + exitCode, + null, + process.errorStream.bytes, + t + ) + } + } + + private static class ProcessResult { + Process process + int exitCode + byte[] stdout + byte[] stderr + Throwable exception + + ProcessResult(Process process, int exitCode, byte[] stdout, byte[] stderr, Throwable exception) { + this.process = process + this.exitCode = exitCode + this.stdout = stdout + this.stderr = stderr + this.exception = exception + } + } +} diff --git a/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy b/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy index f567cd57d8f..831554361fe 100644 --- a/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy +++ b/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy @@ -41,6 +41,7 @@ import org.awaitility.Awaitility import org.apache.commons.lang3.ObjectUtils import org.apache.doris.regression.Config import org.apache.doris.regression.RegressionTest +import org.apache.doris.regression.action.FlightRecordAction import org.apache.doris.regression.action.BenchmarkAction import org.apache.doris.regression.action.ProfileAction import org.apache.doris.regression.action.WaitForAction @@ -757,6 +758,10 @@ class Suite implements GroovyInterceptable { } } + void flightRecord(Closure actionSupplier) { + runAction(new FlightRecordAction(context), actionSupplier) + } + void profile(String tag, Closure<String> actionSupplier) { runAction(new ProfileAction(context, tag), actionSupplier) } diff --git a/regression-test/framework/src/main/groovy/org/apache/doris/regression/util/JdbcUtils.groovy b/regression-test/framework/src/main/groovy/org/apache/doris/regression/util/JdbcUtils.groovy index 821b80c3365..670f6ad4e91 100644 --- a/regression-test/framework/src/main/groovy/org/apache/doris/regression/util/JdbcUtils.groovy +++ b/regression-test/framework/src/main/groovy/org/apache/doris/regression/util/JdbcUtils.groovy @@ -26,6 +26,18 @@ import java.sql.ResultSet import java.sql.ResultSetMetaData class JdbcUtils { + static String replaceHostUrl(String originUri, String newHost) { + def prefix = originUri.substring(0, originUri.indexOf("://") + 3) + def postIndex = originUri.indexOf(":", originUri.indexOf("://") + 3) + if (postIndex == -1) { + postIndex = originUri.indexOf("/", originUri.indexOf("://") + 3) + } + if (postIndex == -1) { + postIndex = originUri.length() + } + return prefix + newHost + originUri.substring(postIndex) + } + static Tuple2<List<List<Object>>, ResultSetMetaData> executeToList(Connection conn, String sql) { conn.prepareStatement(sql).withCloseable { stmt -> boolean hasResultSet = stmt.execute() diff --git a/regression-test/framework/src/main/groovy/suite.gdsl b/regression-test/framework/src/main/groovy/suite.gdsl index bf8da34c2c6..80d52f6784b 100644 --- a/regression-test/framework/src/main/groovy/suite.gdsl +++ b/regression-test/framework/src/main/groovy/suite.gdsl @@ -51,6 +51,7 @@ bindAction("httpTest", "org.apache.doris.regression.action.HttpCliAction") bindAction("benchmark", "org.apache.doris.regression.action.BenchmarkAction") bindAction("waitForSchemaChangeDone", "org.apache.doris.regression.action.WaitForAction") bindAction("profile", "org.apache.doris.regression.action.ProfileAction") +bindAction("flightRecord", "org.apache.doris.regression.action.FlightRecordAction") // bind qt_xxx and order_qt_xxx methods contributor([suiteContext]) { diff --git a/regression-test/suites/demo_p0/test_flight_record.groovy b/regression-test/suites/demo_p0/test_flight_record.groovy new file mode 100644 index 00000000000..652faf31305 --- /dev/null +++ b/regression-test/suites/demo_p0/test_flight_record.groovy @@ -0,0 +1,62 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.apache.doris.regression.util.JdbcUtils + +// need nonConcurrent group +suite("test_flight_record", "nonConcurrent") { + // only support jdk17+ + + String version = System.getProperty("java.version") + if (version.startsWith("1.") || Integer.valueOf(version.split("\\.")[0]) < 17) { + logger.info("Only support jdk17+, current is ${version}, skip test") + return + } + + flightRecord { + // whether delete jfr file after callback, default is true + cleanUp true + + // the process name, default is DorisFE + processName "DorisFE" + + // the jcmd extra config, default is empty + extraConfig(["jdk.ObjectAllocationSample#throttle=\"100 /ns\""]) + + + // these sql will allocate some objects in localhost frontend, + // and we start to record flight in localhost frontend + record { + def localhostFrontendUrl = JdbcUtils.replaceHostUrl(context.config.jdbcUrl, "127.0.0.1") + connect(context.config.jdbcUser, context.config.jdbcPassword, localhostFrontendUrl) { + for (def i in 0..10) { + sql "select 100" + } + } + } + + // after record finished, we start to analyze the record events(items) + callback { items, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + boolean filterNereids = true + def allocationBytes = getAllocationBytes(items, filterNereids) + logger.info("allocation bytes: " + allocationBytes) + } + } +} \ No newline at end of file --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org