[kylin] branch main updated: Minor, using default log4j properties for spark driver and exectutor (#1891)

xxyu Tue, 21 Jun 2022 02:50:47 -0700

This is an automated email from the ASF dual-hosted git repository.

xxyu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/kylin.git



The following commit(s) were added to refs/heads/main by this push:
     new 857aaeaf3c Minor, using default log4j properties for spark driver and 
exectutor (#1891)
857aaeaf3c is described below

commit 857aaeaf3c6dba0ad2587761f917ddcc62844932
Author: Tengting Xu <34978943+muk...@users.noreply.github.com>
AuthorDate: Tue Jun 21 17:50:36 2022 +0800

    Minor, using default log4j properties for spark driver and exectutor (#1891)
    
    * using default log4j properties for spark driver and executor
    
    * Incompatible to hadoop 2.x env
    
    Revert "KYLIN-3610 bump the version of curator and zookeeper"
    
    This reverts commit 8cde6dda
    
    * minor, adapt default properties
---
 build/conf/spark-driver-log4j-default.properties   | 31 ++++++++++++++++++++++
 ...ies => spark-executor-log4j-default.properties} | 23 ++--------------
 build/conf/spark-executor-log4j.properties         |  2 +-
 .../org/apache/kylin/common/KylinConfigBase.java   | 25 +++++++++++++----
 .../src/main/resources/kylin-defaults.properties   |  8 ++++--
 .../kylin/engine/spark/job/NSparkExecutable.java   | 12 +++++++++
 pom.xml                                            |  4 +--
 7 files changed, 74 insertions(+), 31 deletions(-)

diff --git a/build/conf/spark-driver-log4j-default.properties 
b/build/conf/spark-driver-log4j-default.properties
new file mode 100644
index 0000000000..064ae0a082
--- /dev/null
+++ b/build/conf/spark-driver-log4j-default.properties
@@ -0,0 +1,31 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# It's called spark-driver-log4j-default.properties so that it won't distract 
users from the other more important log4j config file: 
kylin-server-log4j.properties
+# enable this by -Dlog4j.configuration=spark-driver-log4j-default.properties
+
+#overall config
+log4j.rootLogger=INFO,stderr
+log4j.logger.org.apache.kylin=DEBUG
+log4j.logger.org.springframework=WARN
+log4j.logger.org.apache.spark=WARN
+
+log4j.appender.stderr=org.apache.log4j.ConsoleAppender
+log4j.appender.stderr.layout=org.apache.kylin.common.logging.SensitivePatternLayout
+log4j.appender.stderr.target=System.err
+log4j.appender.stderr.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2} : 
%m%n
diff --git a/build/conf/spark-executor-log4j.properties 
b/build/conf/spark-executor-log4j-default.properties
similarity index 58%
copy from build/conf/spark-executor-log4j.properties
copy to build/conf/spark-executor-log4j-default.properties
index fb5b7e3c1f..ef280d4e70 100644
--- a/build/conf/spark-executor-log4j.properties
+++ b/build/conf/spark-executor-log4j-default.properties
@@ -17,30 +17,11 @@
 #
 
 # It's called spark-executor-log4j.properties so that it won't distract users 
from the other more important log4j config file: kylin-server-log4j.properties
-# enable this by -Dlog4j.configuration=spark-executor-log4j.properties
-log4j.rootLogger=INFO,stderr,hdfs
+# enable this by -Dlog4j.configuration=spark-executor-log4j-default.properties
+log4j.rootLogger=INFO,stderr
 
 log4j.appender.stderr=org.apache.log4j.ConsoleAppender
 
log4j.appender.stderr.layout=org.apache.kylin.common.logging.SensitivePatternLayout
 log4j.appender.stderr.target=System.err
 #Don't add line number (%L) as it's too costly!
 log4j.appender.stderr.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2} : 
%m%n
-
-
-log4j.appender.hdfs=org.apache.kylin.engine.spark.common.logging.SparkExecutorHdfsAppender
-
-log4j.appender.hdfs.hdfsWorkingDir=${kylin.hdfs.working.dir}
-log4j.appender.hdfs.metadataIdentifier=${kylin.metadata.identifier}
-log4j.appender.hdfs.category=${kylin.spark.category}
-log4j.appender.hdfs.identifier=${kylin.spark.identifier}
-log4j.appender.hdfs.jobName=${kylin.spark.jobName}
-log4j.appender.hdfs.project=${kylin.spark.project}
-
-log4j.appender.hdfs.rollingPeriod=5
-log4j.appender.hdfs.logQueueCapacity=5000
-#flushPeriod count as millis
-log4j.appender.hdfs.flushInterval=5000
-
-log4j.appender.hdfs.layout=org.apache.kylin.common.logging.SensitivePatternLayout
-#Don't add line number (%L) as it's too costly!
-log4j.appender.hdfs.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2} : %m%n
\ No newline at end of file
diff --git a/build/conf/spark-executor-log4j.properties 
b/build/conf/spark-executor-log4j.properties
index fb5b7e3c1f..d176ce7940 100644
--- a/build/conf/spark-executor-log4j.properties
+++ b/build/conf/spark-executor-log4j.properties
@@ -43,4 +43,4 @@ log4j.appender.hdfs.flushInterval=5000
 
 
log4j.appender.hdfs.layout=org.apache.kylin.common.logging.SensitivePatternLayout
 #Don't add line number (%L) as it's too costly!
-log4j.appender.hdfs.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2} : %m%n
\ No newline at end of file
+log4j.appender.hdfs.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2} : %m%n
diff --git 
a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java 
b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
index f1eb39c3d0..f800dfd21c 100644
--- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
+++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
@@ -265,7 +265,6 @@ public abstract class KylinConfigBase implements 
Serializable {
     final protected void reloadKylinConfig(Properties properties) {
         this.properties = BCC.check(properties);
         setProperty("kylin.metadata.url.identifier", getMetadataUrlPrefix());
-        setProperty("kylin.log.spark-executor-properties-file", 
getLogSparkExecutorPropertiesFile());
     }
 
     private Map<Integer, String> convertKeyToInteger(Map<String, String> map) {
@@ -2521,11 +2520,27 @@ public abstract class KylinConfigBase implements 
Serializable {
     }
 
     public String getLogSparkDriverPropertiesFile() {
-        return getLogPropertyFile("spark-driver-log4j.properties");
+        return getLogPropertyFile(getLogSparkDriverProperties());
+    }
+
+    public boolean isDefaultLogSparkDriverProperties() {
+        return 
"spark-driver-log4j-default.properties".equals(getLogSparkDriverProperties());
+    }
+
+    public String getLogSparkDriverProperties() {
+        return getOptional("kylin.spark.driver.log4j.properties", 
"spark-driver-log4j-default.properties");
     }
 
     public String getLogSparkExecutorPropertiesFile() {
-        return getLogPropertyFile("spark-executor-log4j.properties");
+        return getLogPropertyFile(getLogSparkExecutorProperties());
+    }
+
+    public boolean isDefaultLogSparkExecutorProperties() {
+        return 
"spark-executor-log4j-default.properties".equals(getLogSparkExecutorProperties());
+    }
+
+    public String getLogSparkExecutorProperties() {
+        return getOptional("kylin.spark.executor.log4j.properties", 
"spark-executor-log4j-default.properties");
     }
 
     private String getLogPropertyFile(String filename) {
@@ -2627,14 +2642,14 @@ public abstract class KylinConfigBase implements 
Serializable {
                 String executorLogPath = "";
                 String driverLogPath = "";
                 File executorLogFile = 
FileUtils.findFile(KylinConfigBase.getKylinHome() + "/conf",
-                        "spark-executor-log4j.properties");
+                        getLogSparkExecutorProperties());
                 if (executorLogFile != null) {
                     executorLogPath = executorLogFile.getCanonicalPath();
                 }
                 path = executorLogPath;
                 if (isYarnCluster) {
                     File driverLogFile = 
FileUtils.findFile(KylinConfigBase.getKylinHome() + "/conf",
-                            "spark-driver-log4j.properties");
+                            getLogSparkDriverProperties());
                     if (driverLogFile != null) {
                         driverLogPath = driverLogFile.getCanonicalPath();
                     }
diff --git a/core-common/src/main/resources/kylin-defaults.properties 
b/core-common/src/main/resources/kylin-defaults.properties
index ae6c00f2a5..423ea90594 100644
--- a/core-common/src/main/resources/kylin-defaults.properties
+++ b/core-common/src/main/resources/kylin-defaults.properties
@@ -222,6 +222,10 @@ kylin.spark-conf.auto.prior=true
 # Read-Write separation deployment for Kylin 4, please check 
https://cwiki.apache.org/confluence/display/KYLIN/Read-Write+Separation+Deployment+for+Kylin+4.0
 #kylin.engine.submit-hadoop-conf-dir=
 
+# log4j properties file for spark
+kylin.spark.driver.log4j.properties=spark-driver-log4j-default.properties
+kylin.spark.executor.log4j.properties=spark-executor-log4j-default.properties
+
 # Spark conf (default is in spark/conf/spark-defaults.conf)
 kylin.engine.spark-conf.spark.master=yarn
 kylin.engine.spark-conf.spark.submit.deployMode=client
@@ -238,7 +242,7 @@ kylin.engine.spark-conf.spark.eventLog.enabled=true
 kylin.engine.spark-conf.spark.eventLog.dir=hdfs\:///kylin/spark-history
 
kylin.engine.spark-conf.spark.history.fs.logDirectory=hdfs\:///kylin/spark-history
 kylin.engine.spark-conf.spark.hadoop.yarn.timeline-service.enabled=false
-kylin.engine.spark-conf.spark.executor.extraJavaOptions=-Dfile.encoding=UTF-8 
-Dhdp.version=current -Dlog4j.configuration=spark-executor-log4j.properties 
-Dlog4j.debug -Dkylin.hdfs.working.dir=${hdfs.working.dir} 
-Dkylin.metadata.identifier=${kylin.metadata.url.identifier} 
-Dkylin.spark.category=job -Dkylin.spark.project=${job.project} 
-Dkylin.spark.identifier=${job.id} -Dkylin.spark.jobName=${job.stepId} 
-Duser.timezone=${user.timezone}
+kylin.engine.spark-conf.spark.executor.extraJavaOptions=-Dfile.encoding=UTF-8 
-Dhdp.version=current 
-Dlog4j.configuration=${kylin.spark.executor.log4j.properties} -Dlog4j.debug 
-Dkylin.hdfs.working.dir=${hdfs.working.dir} 
-Dkylin.metadata.identifier=${kylin.metadata.url.identifier} 
-Dkylin.spark.category=job -Dkylin.spark.project=${job.project} 
-Dkylin.spark.identifier=${job.id} -Dkylin.spark.jobName=${job.stepId} 
-Duser.timezone=${user.timezone}
 #kylin.engine.spark-conf.spark.sql.shuffle.partitions=1
 
 # manually upload spark-assembly jar to HDFS and then set this property will 
avoid repeatedly uploading jar at runtime
@@ -272,7 +276,7 @@ 
kylin.query.spark-conf.spark.serializer=org.apache.spark.serializer.JavaSerializ
 #kylin.query.spark-conf.spark.yarn.jars=hdfs://localhost:9000/spark2_jars/*
 kylin.query.spark-conf.spark.hadoop.yarn.timeline-service.enabled=false
 
-kylin.query.spark-conf.spark.executor.extraJavaOptions=-Dhdp.version=current 
-Dlog4j.configuration=spark-executor-log4j.properties -Dlog4j.debug 
-Dkylin.hdfs.working.dir=${kylin.env.hdfs-working-dir} 
-Dkylin.metadata.identifier=${kylin.metadata.url.identifier} 
-Dkylin.spark.category=sparder -Dkylin.spark.identifier={{APP_ID}}
+kylin.query.spark-conf.spark.executor.extraJavaOptions=-Dhdp.version=current 
-Dlog4j.configuration=${kylin.spark.executor.log4j.properties} -Dlog4j.debug 
-Dkylin.hdfs.working.dir=${kylin.env.hdfs-working-dir} 
-Dkylin.metadata.identifier=${kylin.metadata.url.identifier} 
-Dkylin.spark.category=sparder -Dkylin.spark.identifier={{APP_ID}}
 # uncomment for HDP
 #kylin.query.spark-conf.spark.driver.extraJavaOptions=-Dhdp.version=current
 #kylin.query.spark-conf.spark.yarn.am.extraJavaOptions=-Dhdp.version=current
diff --git 
a/kylin-spark-project/kylin-spark-engine/src/main/java/org/apache/kylin/engine/spark/job/NSparkExecutable.java
 
b/kylin-spark-project/kylin-spark-engine/src/main/java/org/apache/kylin/engine/spark/job/NSparkExecutable.java
index 89c136e199..af2314563d 100644
--- 
a/kylin-spark-project/kylin-spark-engine/src/main/java/org/apache/kylin/engine/spark/job/NSparkExecutable.java
+++ 
b/kylin-spark-project/kylin-spark-engine/src/main/java/org/apache/kylin/engine/spark/job/NSparkExecutable.java
@@ -447,6 +447,18 @@ public class NSparkExecutable extends AbstractExecutable {
     }
 
     private void wrapLog4jConf(StringBuilder sb, KylinConfig config) {
+        if (config.isDefaultLogSparkDriverProperties()) {
+            logger.info("Current using default log4j properties for spark 
driver in using `ConsoleAppender`." +
+                    "Please modify `kylin.spark.driver.log4j.properties` to be 
`spark-driver-log4j.properties`" +
+                    "for uploading log file to hdfs.");
+        }
+
+        if (config.isDefaultLogSparkExecutorProperties()) {
+            logger.info("Current using default log4j properties for spark 
executor in using `ConsoleAppender`." +
+                    "Please modify `kylin.spark.executor.log4j.properties` to 
be `spark-executor-log4j.properties`" +
+                    "for uploading log file to hdfs.");
+        }
+
         final String localLog4j = config.getLogSparkDriverPropertiesFile();
         final String log4jName = 
Paths.get(localLog4j).getFileName().toString();
         if (isYarnCluster) {
diff --git a/pom.xml b/pom.xml
index f26bf87264..c21a85fe22 100644
--- a/pom.xml
+++ b/pom.xml
@@ -102,8 +102,8 @@
     <avatica.version>1.12.0</avatica.version>
 
     <!-- Hadoop Common deps, keep compatible with hadoop2.version -->
-    <zookeeper.version>3.6.3</zookeeper.version>
-    <curator.version>5.2.1</curator.version>
+    <zookeeper.version>3.4.13</zookeeper.version>
+    <curator.version>2.12.0</curator.version>
     <jsr305.version>3.0.1</jsr305.version>
     <!-- kylin use shaded-guava, this version for unify guava version of other 
dependencies -->
     <guava.version>14.0</guava.version>

[kylin] branch main updated: Minor, using default log4j properties for spark driver and exectutor (#1891)

Reply via email to