This is an automated email from the ASF dual-hosted git repository.

xxyu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/kylin.git


The following commit(s) were added to refs/heads/main by this push:
     new 77526b2  KYLIN-5076 Fix spark tracking url missing on spark standalone 
mode
77526b2 is described below

commit 77526b253ad12e1c6f93520244a187f161e1638a
Author: yaqian.zhang <598593...@qq.com>
AuthorDate: Fri Sep 10 18:19:18 2021 +0800

    KYLIN-5076 Fix spark tracking url missing on spark standalone mode
---
 .../engine/spark/application/SparkApplication.java     |  8 ++++++--
 .../org/apache/spark/deploy/StandaloneAppClient.scala  | 18 +++++++++++++++++-
 .../scala/org/apache/spark/sql/SparderContext.scala    |  6 ++++++
 3 files changed, 29 insertions(+), 3 deletions(-)

diff --git 
a/kylin-spark-project/kylin-spark-engine/src/main/java/org/apache/kylin/engine/spark/application/SparkApplication.java
 
b/kylin-spark-project/kylin-spark-engine/src/main/java/org/apache/kylin/engine/spark/application/SparkApplication.java
index 0d4352b..362ef07 100644
--- 
a/kylin-spark-project/kylin-spark-engine/src/main/java/org/apache/kylin/engine/spark/application/SparkApplication.java
+++ 
b/kylin-spark-project/kylin-spark-engine/src/main/java/org/apache/kylin/engine/spark/application/SparkApplication.java
@@ -55,6 +55,7 @@ import org.apache.kylin.common.util.HadoopUtil;
 import org.apache.kylin.common.util.JsonUtil;
 import org.apache.kylin.metadata.MetadataConstants;
 import org.apache.spark.SparkConf;
+import org.apache.spark.deploy.StandaloneAppClient;
 import org.apache.spark.sql.SparkSession;
 import org.apache.spark.sql.hive.utils.ResourceDetectUtils;
 import org.apache.spark.util.Utils;
@@ -197,11 +198,14 @@ public abstract class SparkApplication {
         }
     }
 
-    private Map<String, String> getTrackingInfo(boolean ipAddressPreferred) {
+    private Map<String, String> getTrackingInfo(boolean ipAddressPreferred, 
String sparkMaster) {
         String applicationId = ss.sparkContext().applicationId();
         Map<String, String> extraInfo = new HashMap<>();
         try {
             String trackingUrl = getTrackingUrl(applicationId);
+            if (sparkMaster.startsWith("spark")) {
+                trackingUrl = StandaloneAppClient.getAppUrl(applicationId, 
sparkMaster);
+            }
             if (StringUtils.isBlank(trackingUrl)) {
                 logger.warn("Get tracking url of application {}, but empty url 
found.", applicationId);
                 return extraInfo;
@@ -290,7 +294,7 @@ public abstract class SparkApplication {
 
             if (isJobOnCluster(sparkConf)) {
                 updateSparkJobExtraInfo("/kylin/api/jobs/spark", project, 
jobId,
-                        
getTrackingInfo(config.isTrackingUrlIpAddressEnabled()));
+                        
getTrackingInfo(config.isTrackingUrlIpAddressEnabled(), 
sparkConf.get("spark.master")));
             }
             // for spark metrics
             //JobMetricsUtils.registerListener(ss);
diff --git 
a/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/spark/deploy/StandaloneAppClient.scala
 
b/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/spark/deploy/StandaloneAppClient.scala
index 15f456a..e42a76b 100644
--- 
a/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/spark/deploy/StandaloneAppClient.scala
+++ 
b/kylin-spark-project/kylin-spark-engine/src/main/scala/org/apache/spark/deploy/StandaloneAppClient.scala
@@ -38,7 +38,6 @@ object StandaloneAppClient extends Logging {
   private val cacheTtl = 3600 * 1000 * 24 * 5
   private val cacheMaxSize = 30000
 
-  // private val masterUrlHtml: String = 
KylinConfig.getInstanceFromEnv.getSparkStandaloneMasterWebUI + "/app/?appId="
   private val masterUrlJson: String = 
KylinConfig.getInstanceFromEnv.getSparkStandaloneMasterWebUI + "/json"
 
   private val restService: RestService = new RestService(10000, 10000)
@@ -81,6 +80,23 @@ object StandaloneAppClient extends Logging {
     }
   }
 
+  def getAppUrl(appId: String, standaloneMaster: String): String = {
+    var sparkUI = KylinConfig.getInstanceFromEnv.getSparkStandaloneMasterWebUI
+    if (sparkUI.isEmpty) {
+      sparkUI = "http://"; + getMasterHost(standaloneMaster) + ":8080/"
+      logWarning("Parameter 'kylin.engine.spark.standalone.master.httpUrl' is 
not configured. Use " +
+        sparkUI + " as the spark standalone Web UI address.")
+    }
+    if (!sparkUI.endsWith("/")) {
+      sparkUI = sparkUI + "/"
+    }
+    val sparkApp = sparkUI + "app/?appId="
+    sparkApp + appId
+  }
+
+  def getMasterHost(master: String): String = {
+    master.split("(://|:)").tail.head
+  }
 
   def parseApplicationState(responseStr: String): Unit = {
     val curr = System.currentTimeMillis()
diff --git 
a/kylin-spark-project/kylin-spark-query/src/main/scala/org/apache/spark/sql/SparderContext.scala
 
b/kylin-spark-project/kylin-spark-query/src/main/scala/org/apache/spark/sql/SparderContext.scala
index 5ae1961..47d0c9b 100644
--- 
a/kylin-spark-project/kylin-spark-query/src/main/scala/org/apache/spark/sql/SparderContext.scala
+++ 
b/kylin-spark-project/kylin-spark-query/src/main/scala/org/apache/spark/sql/SparderContext.scala
@@ -36,6 +36,8 @@ import org.apache.kylin.common.KylinConfig
 import org.apache.kylin.common.util.ToolUtil
 import org.apache.kylin.query.monitor.SparderContextCanary
 import org.apache.kylin.spark.classloader.ClassLoaderUtils
+import org.apache.spark.deploy.StandaloneAppClient
+import org.apache.spark.sql.SparderContext.master_app_url
 import org.apache.spark.{SparkConf, SparkContext, SparkEnv}
 import org.apache.spark.sql.execution.datasource.{KylinSourceStrategy, 
ShardFileStatusCache}
 import org.apache.spark.sql.metrics.SparderMetricsListener
@@ -183,6 +185,10 @@ object SparderContext extends Logging {
                   }
                   master_app_url = "http://"; + hostName + ":" + 
sparkSession.sparkContext.getConf
                     .get("spark.ui.port", "4040")
+                case mode: String if mode.startsWith("spark") =>
+                  val hostName = 
StandaloneAppClient.getMasterHost(kylinConf.getSparderConfigOverrideWithSpecificName("spark.master"))
+                  master_app_url = "http://"; + hostName + ":" + 
sparkSession.sparkContext.getConf
+                    .get("spark.ui.port", "4040")
                 case _ =>
                   master_app_url = YarnInfoFetcherUtils.getTrackingUrl(appid)
               }

Reply via email to