This is an automated email from the ASF dual-hosted git repository. xxyu pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/kylin.git
The following commit(s) were added to refs/heads/main by this push: new 77526b2 KYLIN-5076 Fix spark tracking url missing on spark standalone mode 77526b2 is described below commit 77526b253ad12e1c6f93520244a187f161e1638a Author: yaqian.zhang <598593...@qq.com> AuthorDate: Fri Sep 10 18:19:18 2021 +0800 KYLIN-5076 Fix spark tracking url missing on spark standalone mode --- .../engine/spark/application/SparkApplication.java | 8 ++++++-- .../org/apache/spark/deploy/StandaloneAppClient.scala | 18 +++++++++++++++++- .../scala/org/apache/spark/sql/SparderContext.scala | 6 ++++++ 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/kylin-spark-project/kylin-spark-engine/src/main/java/org/apache/kylin/engine/spark/application/SparkApplication.java b/kylin-spark-project/kylin-spark-engine/src/main/java/org/apache/kylin/engine/spark/application/SparkApplication.java index 0d4352b..362ef07 100644 --- a/kylin-spark-project/kylin-spark-engine/src/main/java/org/apache/kylin/engine/spark/application/SparkApplication.java +++ b/kylin-spark-project/kylin-spark-engine/src/main/java/org/apache/kylin/engine/spark/application/SparkApplication.java @@ -55,6 +55,7 @@ import org.apache.kylin.common.util.HadoopUtil; import org.apache.kylin.common.util.JsonUtil; import org.apache.kylin.metadata.MetadataConstants; import org.apache.spark.SparkConf; +import org.apache.spark.deploy.StandaloneAppClient; import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.hive.utils.ResourceDetectUtils; import org.apache.spark.util.Utils; @@ -197,11 +198,14 @@ public abstract class SparkApplication { } } - private Map<String, String> getTrackingInfo(boolean ipAddressPreferred) { + private Map<String, String> getTrackingInfo(boolean ipAddressPreferred, String sparkMaster) { String applicationId = ss.sparkContext().applicationId(); Map<String, String> extraInfo = new HashMap<>(); try { String trackingUrl = getTrackingUrl(applicationId); + if (sparkMaster.startsWith("spark")) { + trackingUrl = StandaloneAppClient.getAppUrl(applicationId, sparkMaster); + } if (StringUtils.isBlank(trackingUrl)) { logger.warn("Get tracking url of application {}, but empty url found.", applicationId); return extraInfo; @@ -290,7 +294,7 @@ public abstract class SparkApplication { if (isJobOnCluster(sparkConf)) { updateSparkJobExtraInfo("/kylin/api/jobs/spark", project, jobId, - getTrackingInfo(config.isTrackingUrlIpAddressEnabled())); + getTrackingInfo(config.isTrackingUrlIpAddressEnabled(), sparkConf.get("spark.master"))); } // for spark metrics //JobMetricsUtils.registerListener(ss); diff --git a/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/spark/deploy/StandaloneAppClient.scala b/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/spark/deploy/StandaloneAppClient.scala index 15f456a..e42a76b 100644 --- a/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/spark/deploy/StandaloneAppClient.scala +++ b/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/spark/deploy/StandaloneAppClient.scala @@ -38,7 +38,6 @@ object StandaloneAppClient extends Logging { private val cacheTtl = 3600 * 1000 * 24 * 5 private val cacheMaxSize = 30000 - // private val masterUrlHtml: String = KylinConfig.getInstanceFromEnv.getSparkStandaloneMasterWebUI + "/app/?appId=" private val masterUrlJson: String = KylinConfig.getInstanceFromEnv.getSparkStandaloneMasterWebUI + "/json" private val restService: RestService = new RestService(10000, 10000) @@ -81,6 +80,23 @@ object StandaloneAppClient extends Logging { } } + def getAppUrl(appId: String, standaloneMaster: String): String = { + var sparkUI = KylinConfig.getInstanceFromEnv.getSparkStandaloneMasterWebUI + if (sparkUI.isEmpty) { + sparkUI = "http://" + getMasterHost(standaloneMaster) + ":8080/" + logWarning("Parameter 'kylin.engine.spark.standalone.master.httpUrl' is not configured. Use " + + sparkUI + " as the spark standalone Web UI address.") + } + if (!sparkUI.endsWith("/")) { + sparkUI = sparkUI + "/" + } + val sparkApp = sparkUI + "app/?appId=" + sparkApp + appId + } + + def getMasterHost(master: String): String = { + master.split("(://|:)").tail.head + } def parseApplicationState(responseStr: String): Unit = { val curr = System.currentTimeMillis() diff --git a/kylin-spark-project/kylin-spark-query/src/main/scala/org/apache/spark/sql/SparderContext.scala b/kylin-spark-project/kylin-spark-query/src/main/scala/org/apache/spark/sql/SparderContext.scala index 5ae1961..47d0c9b 100644 --- a/kylin-spark-project/kylin-spark-query/src/main/scala/org/apache/spark/sql/SparderContext.scala +++ b/kylin-spark-project/kylin-spark-query/src/main/scala/org/apache/spark/sql/SparderContext.scala @@ -36,6 +36,8 @@ import org.apache.kylin.common.KylinConfig import org.apache.kylin.common.util.ToolUtil import org.apache.kylin.query.monitor.SparderContextCanary import org.apache.kylin.spark.classloader.ClassLoaderUtils +import org.apache.spark.deploy.StandaloneAppClient +import org.apache.spark.sql.SparderContext.master_app_url import org.apache.spark.{SparkConf, SparkContext, SparkEnv} import org.apache.spark.sql.execution.datasource.{KylinSourceStrategy, ShardFileStatusCache} import org.apache.spark.sql.metrics.SparderMetricsListener @@ -183,6 +185,10 @@ object SparderContext extends Logging { } master_app_url = "http://" + hostName + ":" + sparkSession.sparkContext.getConf .get("spark.ui.port", "4040") + case mode: String if mode.startsWith("spark") => + val hostName = StandaloneAppClient.getMasterHost(kylinConf.getSparderConfigOverrideWithSpecificName("spark.master")) + master_app_url = "http://" + hostName + ":" + sparkSession.sparkContext.getConf + .get("spark.ui.port", "4040") case _ => master_app_url = YarnInfoFetcherUtils.getTrackingUrl(appid) }