This is an automated email from the ASF dual-hosted git repository. xxyu pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/kylin.git
The following commit(s) were added to refs/heads/main by this push: new 857aaeaf3c Minor, using default log4j properties for spark driver and exectutor (#1891) 857aaeaf3c is described below commit 857aaeaf3c6dba0ad2587761f917ddcc62844932 Author: Tengting Xu <34978943+muk...@users.noreply.github.com> AuthorDate: Tue Jun 21 17:50:36 2022 +0800 Minor, using default log4j properties for spark driver and exectutor (#1891) * using default log4j properties for spark driver and executor * Incompatible to hadoop 2.x env Revert "KYLIN-3610 bump the version of curator and zookeeper" This reverts commit 8cde6dda * minor, adapt default properties --- build/conf/spark-driver-log4j-default.properties | 31 ++++++++++++++++++++++ ...ies => spark-executor-log4j-default.properties} | 23 ++-------------- build/conf/spark-executor-log4j.properties | 2 +- .../org/apache/kylin/common/KylinConfigBase.java | 25 +++++++++++++---- .../src/main/resources/kylin-defaults.properties | 8 ++++-- .../kylin/engine/spark/job/NSparkExecutable.java | 12 +++++++++ pom.xml | 4 +-- 7 files changed, 74 insertions(+), 31 deletions(-) diff --git a/build/conf/spark-driver-log4j-default.properties b/build/conf/spark-driver-log4j-default.properties new file mode 100644 index 0000000000..064ae0a082 --- /dev/null +++ b/build/conf/spark-driver-log4j-default.properties @@ -0,0 +1,31 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# It's called spark-driver-log4j-default.properties so that it won't distract users from the other more important log4j config file: kylin-server-log4j.properties +# enable this by -Dlog4j.configuration=spark-driver-log4j-default.properties + +#overall config +log4j.rootLogger=INFO,stderr +log4j.logger.org.apache.kylin=DEBUG +log4j.logger.org.springframework=WARN +log4j.logger.org.apache.spark=WARN + +log4j.appender.stderr=org.apache.log4j.ConsoleAppender +log4j.appender.stderr.layout=org.apache.kylin.common.logging.SensitivePatternLayout +log4j.appender.stderr.target=System.err +log4j.appender.stderr.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2} : %m%n diff --git a/build/conf/spark-executor-log4j.properties b/build/conf/spark-executor-log4j-default.properties similarity index 58% copy from build/conf/spark-executor-log4j.properties copy to build/conf/spark-executor-log4j-default.properties index fb5b7e3c1f..ef280d4e70 100644 --- a/build/conf/spark-executor-log4j.properties +++ b/build/conf/spark-executor-log4j-default.properties @@ -17,30 +17,11 @@ # # It's called spark-executor-log4j.properties so that it won't distract users from the other more important log4j config file: kylin-server-log4j.properties -# enable this by -Dlog4j.configuration=spark-executor-log4j.properties -log4j.rootLogger=INFO,stderr,hdfs +# enable this by -Dlog4j.configuration=spark-executor-log4j-default.properties +log4j.rootLogger=INFO,stderr log4j.appender.stderr=org.apache.log4j.ConsoleAppender log4j.appender.stderr.layout=org.apache.kylin.common.logging.SensitivePatternLayout log4j.appender.stderr.target=System.err #Don't add line number (%L) as it's too costly! log4j.appender.stderr.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2} : %m%n - - -log4j.appender.hdfs=org.apache.kylin.engine.spark.common.logging.SparkExecutorHdfsAppender - -log4j.appender.hdfs.hdfsWorkingDir=${kylin.hdfs.working.dir} -log4j.appender.hdfs.metadataIdentifier=${kylin.metadata.identifier} -log4j.appender.hdfs.category=${kylin.spark.category} -log4j.appender.hdfs.identifier=${kylin.spark.identifier} -log4j.appender.hdfs.jobName=${kylin.spark.jobName} -log4j.appender.hdfs.project=${kylin.spark.project} - -log4j.appender.hdfs.rollingPeriod=5 -log4j.appender.hdfs.logQueueCapacity=5000 -#flushPeriod count as millis -log4j.appender.hdfs.flushInterval=5000 - -log4j.appender.hdfs.layout=org.apache.kylin.common.logging.SensitivePatternLayout -#Don't add line number (%L) as it's too costly! -log4j.appender.hdfs.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2} : %m%n \ No newline at end of file diff --git a/build/conf/spark-executor-log4j.properties b/build/conf/spark-executor-log4j.properties index fb5b7e3c1f..d176ce7940 100644 --- a/build/conf/spark-executor-log4j.properties +++ b/build/conf/spark-executor-log4j.properties @@ -43,4 +43,4 @@ log4j.appender.hdfs.flushInterval=5000 log4j.appender.hdfs.layout=org.apache.kylin.common.logging.SensitivePatternLayout #Don't add line number (%L) as it's too costly! -log4j.appender.hdfs.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2} : %m%n \ No newline at end of file +log4j.appender.hdfs.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2} : %m%n diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java index f1eb39c3d0..f800dfd21c 100644 --- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java +++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java @@ -265,7 +265,6 @@ public abstract class KylinConfigBase implements Serializable { final protected void reloadKylinConfig(Properties properties) { this.properties = BCC.check(properties); setProperty("kylin.metadata.url.identifier", getMetadataUrlPrefix()); - setProperty("kylin.log.spark-executor-properties-file", getLogSparkExecutorPropertiesFile()); } private Map<Integer, String> convertKeyToInteger(Map<String, String> map) { @@ -2521,11 +2520,27 @@ public abstract class KylinConfigBase implements Serializable { } public String getLogSparkDriverPropertiesFile() { - return getLogPropertyFile("spark-driver-log4j.properties"); + return getLogPropertyFile(getLogSparkDriverProperties()); + } + + public boolean isDefaultLogSparkDriverProperties() { + return "spark-driver-log4j-default.properties".equals(getLogSparkDriverProperties()); + } + + public String getLogSparkDriverProperties() { + return getOptional("kylin.spark.driver.log4j.properties", "spark-driver-log4j-default.properties"); } public String getLogSparkExecutorPropertiesFile() { - return getLogPropertyFile("spark-executor-log4j.properties"); + return getLogPropertyFile(getLogSparkExecutorProperties()); + } + + public boolean isDefaultLogSparkExecutorProperties() { + return "spark-executor-log4j-default.properties".equals(getLogSparkExecutorProperties()); + } + + public String getLogSparkExecutorProperties() { + return getOptional("kylin.spark.executor.log4j.properties", "spark-executor-log4j-default.properties"); } private String getLogPropertyFile(String filename) { @@ -2627,14 +2642,14 @@ public abstract class KylinConfigBase implements Serializable { String executorLogPath = ""; String driverLogPath = ""; File executorLogFile = FileUtils.findFile(KylinConfigBase.getKylinHome() + "/conf", - "spark-executor-log4j.properties"); + getLogSparkExecutorProperties()); if (executorLogFile != null) { executorLogPath = executorLogFile.getCanonicalPath(); } path = executorLogPath; if (isYarnCluster) { File driverLogFile = FileUtils.findFile(KylinConfigBase.getKylinHome() + "/conf", - "spark-driver-log4j.properties"); + getLogSparkDriverProperties()); if (driverLogFile != null) { driverLogPath = driverLogFile.getCanonicalPath(); } diff --git a/core-common/src/main/resources/kylin-defaults.properties b/core-common/src/main/resources/kylin-defaults.properties index ae6c00f2a5..423ea90594 100644 --- a/core-common/src/main/resources/kylin-defaults.properties +++ b/core-common/src/main/resources/kylin-defaults.properties @@ -222,6 +222,10 @@ kylin.spark-conf.auto.prior=true # Read-Write separation deployment for Kylin 4, please check https://cwiki.apache.org/confluence/display/KYLIN/Read-Write+Separation+Deployment+for+Kylin+4.0 #kylin.engine.submit-hadoop-conf-dir= +# log4j properties file for spark +kylin.spark.driver.log4j.properties=spark-driver-log4j-default.properties +kylin.spark.executor.log4j.properties=spark-executor-log4j-default.properties + # Spark conf (default is in spark/conf/spark-defaults.conf) kylin.engine.spark-conf.spark.master=yarn kylin.engine.spark-conf.spark.submit.deployMode=client @@ -238,7 +242,7 @@ kylin.engine.spark-conf.spark.eventLog.enabled=true kylin.engine.spark-conf.spark.eventLog.dir=hdfs\:///kylin/spark-history kylin.engine.spark-conf.spark.history.fs.logDirectory=hdfs\:///kylin/spark-history kylin.engine.spark-conf.spark.hadoop.yarn.timeline-service.enabled=false -kylin.engine.spark-conf.spark.executor.extraJavaOptions=-Dfile.encoding=UTF-8 -Dhdp.version=current -Dlog4j.configuration=spark-executor-log4j.properties -Dlog4j.debug -Dkylin.hdfs.working.dir=${hdfs.working.dir} -Dkylin.metadata.identifier=${kylin.metadata.url.identifier} -Dkylin.spark.category=job -Dkylin.spark.project=${job.project} -Dkylin.spark.identifier=${job.id} -Dkylin.spark.jobName=${job.stepId} -Duser.timezone=${user.timezone} +kylin.engine.spark-conf.spark.executor.extraJavaOptions=-Dfile.encoding=UTF-8 -Dhdp.version=current -Dlog4j.configuration=${kylin.spark.executor.log4j.properties} -Dlog4j.debug -Dkylin.hdfs.working.dir=${hdfs.working.dir} -Dkylin.metadata.identifier=${kylin.metadata.url.identifier} -Dkylin.spark.category=job -Dkylin.spark.project=${job.project} -Dkylin.spark.identifier=${job.id} -Dkylin.spark.jobName=${job.stepId} -Duser.timezone=${user.timezone} #kylin.engine.spark-conf.spark.sql.shuffle.partitions=1 # manually upload spark-assembly jar to HDFS and then set this property will avoid repeatedly uploading jar at runtime @@ -272,7 +276,7 @@ kylin.query.spark-conf.spark.serializer=org.apache.spark.serializer.JavaSerializ #kylin.query.spark-conf.spark.yarn.jars=hdfs://localhost:9000/spark2_jars/* kylin.query.spark-conf.spark.hadoop.yarn.timeline-service.enabled=false -kylin.query.spark-conf.spark.executor.extraJavaOptions=-Dhdp.version=current -Dlog4j.configuration=spark-executor-log4j.properties -Dlog4j.debug -Dkylin.hdfs.working.dir=${kylin.env.hdfs-working-dir} -Dkylin.metadata.identifier=${kylin.metadata.url.identifier} -Dkylin.spark.category=sparder -Dkylin.spark.identifier={{APP_ID}} +kylin.query.spark-conf.spark.executor.extraJavaOptions=-Dhdp.version=current -Dlog4j.configuration=${kylin.spark.executor.log4j.properties} -Dlog4j.debug -Dkylin.hdfs.working.dir=${kylin.env.hdfs-working-dir} -Dkylin.metadata.identifier=${kylin.metadata.url.identifier} -Dkylin.spark.category=sparder -Dkylin.spark.identifier={{APP_ID}} # uncomment for HDP #kylin.query.spark-conf.spark.driver.extraJavaOptions=-Dhdp.version=current #kylin.query.spark-conf.spark.yarn.am.extraJavaOptions=-Dhdp.version=current diff --git a/kylin-spark-project/kylin-spark-engine/src/main/java/org/apache/kylin/engine/spark/job/NSparkExecutable.java b/kylin-spark-project/kylin-spark-engine/src/main/java/org/apache/kylin/engine/spark/job/NSparkExecutable.java index 89c136e199..af2314563d 100644 --- a/kylin-spark-project/kylin-spark-engine/src/main/java/org/apache/kylin/engine/spark/job/NSparkExecutable.java +++ b/kylin-spark-project/kylin-spark-engine/src/main/java/org/apache/kylin/engine/spark/job/NSparkExecutable.java @@ -447,6 +447,18 @@ public class NSparkExecutable extends AbstractExecutable { } private void wrapLog4jConf(StringBuilder sb, KylinConfig config) { + if (config.isDefaultLogSparkDriverProperties()) { + logger.info("Current using default log4j properties for spark driver in using `ConsoleAppender`." + + "Please modify `kylin.spark.driver.log4j.properties` to be `spark-driver-log4j.properties`" + + "for uploading log file to hdfs."); + } + + if (config.isDefaultLogSparkExecutorProperties()) { + logger.info("Current using default log4j properties for spark executor in using `ConsoleAppender`." + + "Please modify `kylin.spark.executor.log4j.properties` to be `spark-executor-log4j.properties`" + + "for uploading log file to hdfs."); + } + final String localLog4j = config.getLogSparkDriverPropertiesFile(); final String log4jName = Paths.get(localLog4j).getFileName().toString(); if (isYarnCluster) { diff --git a/pom.xml b/pom.xml index f26bf87264..c21a85fe22 100644 --- a/pom.xml +++ b/pom.xml @@ -102,8 +102,8 @@ <avatica.version>1.12.0</avatica.version> <!-- Hadoop Common deps, keep compatible with hadoop2.version --> - <zookeeper.version>3.6.3</zookeeper.version> - <curator.version>5.2.1</curator.version> + <zookeeper.version>3.4.13</zookeeper.version> + <curator.version>2.12.0</curator.version> <jsr305.version>3.0.1</jsr305.version> <!-- kylin use shaded-guava, this version for unify guava version of other dependencies --> <guava.version>14.0</guava.version>