Author: lidong
Date: Fri Mar 18 14:13:30 2022
New Revision: 1899035

URL: http://svn.apache.org/viewvc?rev=1899035&view=rev
Log:
# add blog: kylin4 now is supporting aws glue

Added:
    kylin/site/blog/2022/03/
    kylin/site/blog/2022/03/17/
    kylin/site/blog/2022/03/17/kylin4-now-supporting-aws-glue-catalog/
    kylin/site/blog/2022/03/17/kylin4-now-supporting-aws-glue-catalog/index.html
    kylin/site/cn_blog/2022/03/
    kylin/site/cn_blog/2022/03/17/
    kylin/site/cn_blog/2022/03/17/kylin4-now-supporting-aws-glue-catalog/
    
kylin/site/cn_blog/2022/03/17/kylin4-now-supporting-aws-glue-catalog/index.html
    kylin/site/images/blog/kylin4_support_aws_glue/
    
kylin/site/images/blog/kylin4_support_aws_glue/10_kylin_start_up_script_en.png  
 (with props)
    
kylin/site/images/blog/kylin4_support_aws_glue/11_kylin_start_up_script_en.png  
 (with props)
    kylin/site/images/blog/kylin4_support_aws_glue/12_start_kylin_en.png   
(with props)
    kylin/site/images/blog/kylin4_support_aws_glue/13_start_kylin_en.png   
(with props)
    kylin/site/images/blog/kylin4_support_aws_glue/14_load_glue_meta_en.png   
(with props)
    kylin/site/images/blog/kylin4_support_aws_glue/15_load_glue_meta_en.png   
(with props)
    kylin/site/images/blog/kylin4_support_aws_glue/16_load_glue_meta_en.png   
(with props)
    kylin/site/images/blog/kylin4_support_aws_glue/17_verify_query_en.png   
(with props)
    
kylin/site/images/blog/kylin4_support_aws_glue/1_prepare_aws_glue_table_en.png  
 (with props)
    
kylin/site/images/blog/kylin4_support_aws_glue/2_prepare_aws_glue_table_en.png  
 (with props)
    
kylin/site/images/blog/kylin4_support_aws_glue/3_prepare_hadoop_cluster_en.png  
 (with props)
    
kylin/site/images/blog/kylin4_support_aws_glue/4_prepare_hadoop_cluster_en.png  
 (with props)
    kylin/site/images/blog/kylin4_support_aws_glue/5_test_sparksql_glue_en.png  
 (with props)
    kylin/site/images/blog/kylin4_support_aws_glue/6_test_sparksql_glue_en.png  
 (with props)
    kylin/site/images/blog/kylin4_support_aws_glue/7_test_sparksql_glue_en.png  
 (with props)
    
kylin/site/images/blog/kylin4_support_aws_glue/8_kylin_start_up_script_en.png   
(with props)
    
kylin/site/images/blog/kylin4_support_aws_glue/9_kylin_start_up_script_en.png   
(with props)
Modified:
    kylin/site/blog/index.html
    kylin/site/cn/blog/index.html
    kylin/site/feed.xml

Added: 
kylin/site/blog/2022/03/17/kylin4-now-supporting-aws-glue-catalog/index.html
URL: 
http://svn.apache.org/viewvc/kylin/site/blog/2022/03/17/kylin4-now-supporting-aws-glue-catalog/index.html?rev=1899035&view=auto
==============================================================================
--- 
kylin/site/blog/2022/03/17/kylin4-now-supporting-aws-glue-catalog/index.html 
(added)
+++ 
kylin/site/blog/2022/03/17/kylin4-now-supporting-aws-glue-catalog/index.html 
Fri Mar 18 14:13:30 2022
@@ -0,0 +1,638 @@
+<!--
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+-->
+<!doctype html>
+<html>
+       <!--
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+-->
+
+<head>
+  <meta charset="utf-8">
+  <meta http-equiv="X-UA-Compatible" content="IE=edge">
+  <meta name="viewport" content="width=device-width, initial-scale=1">
+
+  <title>Apache Kylin | Kylin 4 now is supporting AWS Glue Catalog</title>
+  <meta name="description" content="Why does installing Kylin on EMR need to 
support AWS Glue?">
+  <meta name="author"      content="Apache Kylin">
+  <link rel="shortcut icon" href="fav.png" type="image/png">
+
+
+
+<link rel="stylesheet" href="/assets/css/animate.css">
+<!-- Bootstrap -->
+<link rel="stylesheet" href="/assets/css/bootstrap.min.css">
+
+<!-- Fonts -->
+<!-- <link rel="stylesheet" 
href="http://fonts.googleapis.com/css?family=Alice|Open+Sans:400,300,700"> -->
+
+<!-- Icons -->
+<link rel="stylesheet" href="/assets/css/font-awesome.min.css">
+
+  <!-- Custom styles -->
+  <link rel="stylesheet" href="/assets/css/styles.css">
+  <link rel="stylesheet" href="/assets/css/docs.css">
+  <link rel="stylesheet" href="/assets/css/pygments.css">
+
+  <link rel="canonical" 
href="http://kylin.apache.org/blog/2022/03/17/kylin4-now-supporting-aws-glue-catalog/";>
+  <link rel="alternate" type="application/rss+xml" title="Apache Kylin" 
href="http://kylin.apache.org/feed.xml"; />
+
+<!--[if lt IE 9]> <script src="assets/js/html5shiv.js"></script> <![endif]-->
+<!-- Global site tag (gtag.js) - Google Analytics -->
+<script async 
src="https://www.googletagmanager.com/gtag/js?id=UA-120788561-1";></script>
+<script>
+  window.dataLayer = window.dataLayer || [];
+  function gtag(){dataLayer.push(arguments);}
+  gtag('js', new Date());
+
+  gtag('config', 'UA-120788561-1');
+</script>
+<script type="text/javascript" src="/assets/js/jquery-1.9.1.min.js"></script>
+<script type="text/javascript" src="/assets/js/nside.js"></script> </script>
+<script type="text/javascript" src="/assets/js/nnav.js"></script> </script>
+<script>
+var _hmt = _hmt || [];
+(function() {
+  var hm = document.createElement("script");
+  hm.src = "https://hm.baidu.com/hm.js?bdc5e03add430c0b72cc0eb91eabfa99";;
+  var s = document.getElementsByTagName("script")[0]; 
+  s.parentNode.insertBefore(hm, s);
+})();
+</script>
+
+</head>
+
+       <body>
+               <!--
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+-->
+
+<header id="header" >
+  
+  <!-- Main Menu -->
+  <nav class="navbar navbar-default" role="navigation" id="nav-wrapper">
+    <div class="container-fluid" id="nav">
+      <!--
+      <img class="img-circle" width="40px" height="40px" id="circlelogo" 
src="/assets/images/kylin_logo.jpg">
+      -->
+      <!-- Brand and toggle get grouped for better mobile display -->
+      <div class="navbar-header">
+        <img class="navbar-logo" width="46" 
src="/assets/images/kylin_logo.png" ></img>
+        <button type="button" class="navbar-toggle collapsed" 
data-toggle="collapse" data-target="#bs-example-navbar-collapse-1">
+          <span class="sr-only">Toggle navigation</span>
+          <span class="icon-bar"></span>
+          <span class="icon-bar"></span>
+          <span class="icon-bar"></span>
+        </button>
+        <ul class="nav icon-navbar">
+            <li><a href="https://twitter.com/apachekylin"; target="_blank" 
class="fa fa-twitter fa-lg"  title="Twitter: @ApacheKylin" ></a></li>
+            <li><a href="https://github.com/apache/kylin"; target="_blank" 
class="fa fa-github-alt fa-lg" title="Github: apache/kylin" ></a></li>
+            <li><a href="https://www.facebook.com/kylinio"; target="_blank" 
class="fa fa-facebook fa-lg" title="Facebook: kylin.io" ></a></li>
+        </ul>
+      </div>
+
+      <!-- Collect the nav links, forms, and other content for toggling -->
+      <div class="navbar-collapse collapse" id="bs-example-navbar-collapse-1">
+
+        <ul class="nav navbar-nav">
+
+          <li><a href="/">Home</a></li>
+          <li>
+            <a href="/docs" class="dropdown-toggle" data-toggle="dropdown" 
role="button" aria-haspopup="true" aria-expanded="false">Docs<span 
class="caret"></span></a>
+            <ul class="dropdown-menu">
+              <li><a href="/docs/">Latest Release(Kylin 4.0.1)</a></li>
+              <li><a href="/docs31/">Kylin 3.1.3</a></li>
+              <li><a href="/docs24/">Kylin 2.4.0</a></li>
+              <li><a href="/archive/">Archive</a></li>
+            </ul>
+          </li>
+          <li><a href="/download">Download</a></li>
+          <li><a href="/community" >Community</a></li>
+          <li>
+            <a href="/development" class="dropdown-toggle" 
data-toggle="dropdown" role="button" aria-haspopup="true" 
aria-expanded="false">Development<span class="caret"></span></a>
+            <ul class="dropdown-menu">
+              <li><a href="/development40/">Kylin 4.x</a></li>
+              <li><a href="/development/">Kylin 3.x And Older Versions</a></li>
+            </ul>
+          </li>
+          <li><a href="/blog">Blog</a></li>
+          <li><a href="/cn" >中文版</a></li> 
+        </ul>     
+      </div><!-- /.navbar-collapse -->
+    </div><!-- /.container-fluid -->
+  </nav>
+
+  <div id="head" class="parallax normal-header" >
+    <div class="text-center header-apache">
+      <a href="http://apache.org/foundation/contributing.html"; title="Support 
Apache" style="margin-left: 150px;">
+        <div>
+          <img src="https://www.apache.org/images/SupportApache-small.png"; >
+        </div>
+      </a>
+    </div>  
+  </div>
+  
+ </header>
+
+               <div class="page-content main">
+                       <header style=" padding:2em 0 0 ">
+                       <div class="container" >
+                         <div style=" padding:0 4em">
+                <div class="blog-icon">
+                  <img width="30" src="/assets/images/icon_blog_w.png">
+                </div>
+                               <h4 class="index-title" style=" 
float:left;"><span>Apache Kylin™ Technical Blog</span></h4>
+                         </div>
+                       </div>
+               </div>
+
+               <div class="container blog">
+                       <div>
+                               <article class="post-content" > 
+                               <!--
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+-->
+
+<div class="post" style=" padding:2em 4em 4em 4em">
+
+  <header class="post-header">
+    <h1 class="post-title">Kylin 4 now is supporting AWS Glue Catalog</h1>
+    <p class="post-meta" >Mar 17, 2022 • Xiaoxiang Yu</p>
+  </header>
+
+  <article class="post-content" >
+    <h2 id="why-does-installing-kylin-on-emr-need-to-support-aws-glue">Why 
does installing Kylin on EMR need to support AWS Glue?</h2>
+
+<h3 id="what-is-aws-glue">What is AWS Glue?</h3>
+
+<p>AWS Glue is a fully hosted ETL (Extract, Transform, and Load) service that 
enables AWS users to easily and cost-effectively classify, cleanse, enrich data 
and move data between various data storages. AWS Glue consists of a central 
metastore called AWS Glue Data Catalog, an ETL engine that can automatically 
generate code and a flexible scheduler that can handle dependency resolution, 
monitor jobs and retry. AWS Glue is a serverless service, so there is no 
infrastructure to set up or manage.</p>
+
+<h3 id="why-does-kylin-need-aws-glue-catalog">Why does Kylin need AWS Glue 
Catalog?</h3>
+
+<p>At present, many users in the Kylin community use AWS EMR for running 
large-scale distributed data processing jobs on Hadoop, Spark, Hive, Presto, 
etc. Without AWS Glue Data Catalog, tables built on these data warehouse 
components (like Hive, Spark and Presto) can not be used by any other 
components. As the data warehouse needs to answer requirements from various 
business departments, they use AWS Glue Data Catalog for metadata storage when 
creating the AWS EMR clusters, to share the data sources among different 
components and business departments. That is, to build one data cube with data 
from each business department, so they can provide quick responses to different 
business requirements.<br />
+In modern companies, data is saved on cloud object storage and big data teams 
use AWS EMR for data processing, data analysis and model training. But with 
data explosion, it becomes really difficult to extract data and the response 
time is too long. In other words, the solution of EMR + Spark/Hive cannot meet 
the speedy data query requirements from data analysts, O&amp;M personnel and 
sales. So some users turn to Apache Kylin as their open-source OLAP 
solution.<br />
+Recently, our users approached us with the request that Kylin 4 could directly 
read table metadata from AWS Glue. After some collaboration, now Kylin 4 
supports AWS Glue Catalog, making it possible for tables and data to be shared 
among Hive, Presto, Spark and Kylin. This helps to break down the metadata 
barrier, so different topics can be combined to form a big data analysis 
platform.</p>
+
+<h3 id="does-kylin-support-aws-glue">Does Kylin support AWS Glue?</h3>
+
+<table>
+  <thead>
+    <tr>
+      <th> </th>
+      <th>Kylin version which supports Glue</th>
+      <th>Issue Link</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td>Kylin on HBase (Before Kylin 4)</td>
+      <td>2.6.6 or higher<br />3.1.0 or higher</td>
+      <td>https://issues.apache.org/jira/browse/KYLIN-4206<br 
/>https://zhuanlan.zhihu.com/p/99481373</td>
+    </tr>
+    <tr>
+      <td>Kylin on Parquet</td>
+      <td>4.0.1 or higher</td>
+      <td>This article.</td>
+    </tr>
+  </tbody>
+</table>
+
+<h2 id="prerequisites-for-deployment">Prerequisites for deployment</h2>
+
+<h3 id="software-version">Software Version</h3>
+
+<table>
+  <thead>
+    <tr>
+      <th><strong>Software</strong></th>
+      <th><strong>Version</strong></th>
+      <th>Reference</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td>Apache Kylin</td>
+      <td>4.0.1 or higher</td>
+      <td><a 
href="https://cwiki.apache.org/confluence/display/KYLIN/KIP+10+refactor+hive+and+hadoop+dependency";>KIP
 10 refactor hive and hadoop dependency</a>.</td>
+    </tr>
+    <tr>
+      <td>AWS EMR</td>
+      <td>6.5.0 or higher<br />5.33.1 or higher</td>
+      <td><a 
href="https://docs.amazonaws.cn/en_us/emr/latest/ReleaseGuide/emr-650-release.html";>Amazon
 EMR release 6.5.0 - Amazon EMR</a>.</td>
+    </tr>
+  </tbody>
+</table>
+
+<h3 id="prepare-aws-glue-database-and-tables">Prepare AWS Glue database and 
tables</h3>
+
+<p><img 
src="/images/blog/kylin4_support_aws_glue/1_prepare_aws_glue_table_en.png" 
alt="" /></p>
+
+<p><img 
src="/images/blog/kylin4_support_aws_glue/2_prepare_aws_glue_table_en.png" 
alt="" /></p>
+
+<ul>
+  <li>Create an EMR cluster.</li>
+</ul>
+
+<p>Note: Parameter hive.metastore.client.factory.class is configured to enable 
AWS Glue. For details, you may refer to the commands below.</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>aws emr 
create-cluster --applications <span class="nv">Name</span><span 
class="o">=</span>Hadoop <span class="nv">Name</span><span 
class="o">=</span>Hive <span class="nv">Name</span><span 
class="o">=</span>Spark <span class="nv">Name</span><span 
class="o">=</span>ZooKeeper <span class="nv">Name</span><span 
class="o">=</span>Tez <span class="nv">Name</span><span 
class="o">=</span>Ganglia <span class="se">\</span>
+  --ec2-attributes <span class="k">${}</span> <span class="se">\</span>
+  --release-label emr-6.5.0 <span class="se">\</span>
+  --log-uri <span class="k">${}</span> <span class="se">\</span>
+  --instance-groups <span class="k">${}</span> <span class="se">\</span>
+  --configurations <span 
class="s1">'[{"Classification":"hive-site","Properties":{"hive.metastore.client.factory.class":"com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory"}}]'</span>
 <span class="se">\</span>
+  --auto-scaling-role EMR_AutoScaling_DefaultRole <span class="se">\</span>
+  --ebs-root-volume-size 100 <span class="se">\</span>
+  --service-role EMR_DefaultRole <span class="se">\</span>
+  --enable-debugging <span class="se">\</span>
+  --name <span class="s1">'Kylin4_on_EMR65_with_Glue'</span> <span 
class="se">\</span>
+  --region cn-northwest-1
+</code></pre>
+</div>
+
+<ul>
+  <li>Log in to the Master node. Check the Hadoop version and whether the 
Hadoop cluster is successfully started.</li>
+</ul>
+
+<p><img 
src="/images/blog/kylin4_support_aws_glue/3_prepare_hadoop_cluster_en.png" 
alt="" /></p>
+
+<p><img 
src="/images/blog/kylin4_support_aws_glue/4_prepare_hadoop_cluster_en.png" 
alt="" /></p>
+
+<h3 id="optionalget-environmental-information">(Optional)Get environmental 
information</h3>
+
+<blockquote>
+  <p>If you are using RDS or other metadata storage, you may skip this 
step.</p>
+</blockquote>
+
+<p>RDBMS is recommended for metastore in Kylin 4. So for testing purposes, in 
this article, we use MariaDB which comes with the Master node for metastore; 
for hostname, account and password of MariaDB, see <code 
class="highlighter-rouge">/etc/hive/conf/hive-site.xml</code>.</p>
+
+<div class="highlighter-rouge"><pre 
class="highlight"><code>kylin.metadata.url<span 
class="o">=</span>kylin4_on_cloud@jdbc,url<span 
class="o">=</span>jdbc:mysql://<span class="k">${</span><span 
class="nv">HOSTNAME</span><span class="k">}</span>:3306/hue,username<span 
class="o">=</span>hive,password<span class="o">=</span><span 
class="k">${</span><span class="nv">PASSWORD</span><span 
class="k">}</span>,maxActive<span class="o">=</span>10,maxIdle<span 
class="o">=</span>10,driverClassName<span 
class="o">=</span>org.mariadb.jdbc.Driver  
+kylin.env.zookeeper-connect-string<span class="o">=</span><span 
class="k">${</span><span class="nv">HOSTNAME</span><span class="k">}</span>
+</code></pre>
+</div>
+
+<p>Configure the variables as per the actual information, for example, replace 
 ${PASSWORD} with the real password, save it locally and it will be used to 
start Kylin.</p>
+
+<h3 id="test-the-connectivity-between-spark-sql-and-aws-glue">Test the 
connectivity between Spark SQL and AWS Glue</h3>
+
+<p>Test whether AWS Spark SQL can access databases and table metadata through 
AWS Glue with Spark-SQL. For the first test, you will find that the startup 
fails with an error.</p>
+
+<p><img src="/images/blog/kylin4_support_aws_glue/5_test_sparksql_glue_en.png" 
alt="" /></p>
+
+<p>Replace <code class="highlighter-rouge">hive-site.xml</code> used by Spark 
with the following commands.</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code><span 
class="nb">cd</span> /etc/spark/conf
+sudo mv hive-site.xml hive-site.xml.bak
+sudo cp /etc/hive/conf/hive-site.xml .
+</code></pre>
+</div>
+
+<p>Then change the value of <code 
class="highlighter-rouge">hive.execution.engine</code> in file <code 
class="highlighter-rouge">/etc/spark/conf/hive-site.xml</code> to <code 
class="highlighter-rouge">mr</code>, restart Spark-SQL CLI and verify whether 
the query for AWS Glue’s table data is successful.</p>
+
+<p><img src="/images/blog/kylin4_support_aws_glue/6_test_sparksql_glue_en.png" 
alt="" /></p>
+
+<p><img src="/images/blog/kylin4_support_aws_glue/7_test_sparksql_glue_en.png" 
alt="" /></p>
+
+<h3 id="optional-prepare-kylin-spark-enginejar">(Optional) Prepare 
kylin-spark-engine.jar</h3>
+
+<blockquote>
+  <p>This issue will be fixed in Apache Kylin 4.0.2. So you can skip this step 
after updating to Apache Kylin 4.0.2. For users with Kylin 4.0.1, please refer 
to the following steps to replace kylin-spark-engine.jar:</p>
+</blockquote>
+
+<p>Clone Kylin git repository, execute <code class="highlighter-rouge">mvn 
clean package -DskipTests</code> to build a new <code 
class="highlighter-rouge">kylin-spark-project/kylin-spark-engine/target/kylin-spark-engine-4.0.0-SNAPSHOT.jar</code>
 .</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>git clone 
https://github.com/hit-lacus/kylin.git
+<span class="nb">cd </span>kylin
+git checkout KYLIN-5160
+mvn clean package -DskipTests
+
+<span class="c"># find -name kylin-spark-engine-4.0.0-SNAPSHOT.jar 
kylin-spark-project/kylin-spark-engine/target</span>
+</code></pre>
+</div>
+
+<p>Patch link: <a 
href="https://github.com/apache/kylin/pull/1819";>https://github.com/apache/kylin/pull/1819</a></p>
+
+<h2 id="deploy-kylin-and-connect-to-aws-glue">Deploy Kylin and connect to AWS 
Glue</h2>
+
+<h3 id="download-kylin">Download Kylin</h3>
+
+<ol>
+  <li>
+    <p>Download and decompress Kylin. Please download the corresponding Kylin 
package according to your EMR version. That is, with EMR 5.X you can download 
Spark 2 package; with EMR 6.X you can download Spark 3 package.<br />
+ <code class="highlighter-rouge">shell
+ # aws s3 cp s3://${BUCKET}/apache-kylin-4.0.1-bin-spark3.tar.gz .
+ # wget apache-kylin-4.0.1-bin-spark3.tar.gz
+ tar zxvf apache-kylin-4.0.1-bin-spark3.tar.gz .
+ cd apache-kylin-4.0.1-bin-spark3
+ export KYLIN_HOME=/home/hadoop/apache-kylin-4.0.1-bin-spark3
+</code></p>
+  </li>
+  <li>
+    <p>(Optional) Get MariaDB driver jar<br />
+ &gt; If you are using other databases for metastore, please skip this 
step.</p>
+
+    <p><code class="highlighter-rouge">shell
+ cd $KYLIN_HOME
+ mkdir ext
+ cp /usr/lib/hive/lib/mariadb-connector-java.jar $KYLIN_HOME/ext
+</code></p>
+  </li>
+</ol>
+
+<h3 id="prepare-spark">Prepare Spark</h3>
+
+<p>AWS Spark has built-in support of AWS Glue, so you will use AWS Spark when 
loading table metadata and building jobs. Kylin 4.0.1 supports Apache Spark 
officially. Because the compatibility between Apache Spark and AWS Spark is not 
very good, we will use Apache Spark for cube queries. To sum up, you need to 
switch between AWS Spark and Apache Spark according to your task (query task or 
build task).</p>
+
+<ul>
+  <li>Prepare AWS Spark</li>
+</ul>
+
+<div class="highlighter-rouge"><pre class="highlight"><code><span 
class="nb">cd</span> <span class="nv">$KYLIN_HOME</span>
+mkdir ext
+cp /usr/lib/hive/lib/mariadb-connector-java.jar <span 
class="nv">$KYLIN_HOME</span>/ext
+</code></pre>
+</div>
+
+<ul>
+  <li>Download Apache Spark
+    <ul>
+      <li>Please download the corresponding Spark installation package 
according to your EMR version. That is, with EMR 5.X you can download Spark 
2.4.7 and with EMR 6.X you can download Spark 3.1.2.<br />
+<code class="highlighter-rouge">shell
+cd $KYLIN_HOME
+aws s3 cp s3://${BUCKET}/spark-2.4.7-bin-hadoop2.7.tgz $KYLIN_HOME # Or 
downloads spark-2.4.7-bin-hadoop2.7.tgz from offical website
+tar zxvf spark-2.4.7-bin-hadoop2.7.tgz
+mv spark-2.4.7-bin-hadoop2.7 spark-apache
+</code></li>
+    </ul>
+  </li>
+  <li>First, you need to load AWS Glue table, so direct <code 
class="highlighter-rouge">$KYLIN_HOME/spark</code> to AWS Spark with soft link. 
Note: you do not need to set up <code 
class="highlighter-rouge">SPARK_HOME</code>, because if <code 
class="highlighter-rouge">$KYLIN_HOME/spark</code> exists and <code 
class="highlighter-rouge">SPARK_HOME</code> is not set up, Kylin will use <code 
class="highlighter-rouge">$KYLIN_HOME/spark</code> as <code 
class="highlighter-rouge">SPARK_HOME</code> by default.</li>
+</ul>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>ln -s spark-aws 
spark
+</code></pre>
+</div>
+
+<h3 id="modify-kylin-startup-script">Modify Kylin startup script</h3>
+
+<ol>
+  <li>Start Spark SQL CLI and keep it in running status.</li>
+  <li>
+    <p>Acquire PID of <code class="highlighter-rouge">SparkSQLCLIDriver</code> 
with <code class="highlighter-rouge">jps -ml ${PID}</code>. Then acquire <code 
class="highlighter-rouge">spark.driver.extraClasspath</code> of 
<strong>Driver</strong>. Or, you can acquire these from 
/etc/spark/conf/spark-defaults.conf.<br />
+ <code class="highlighter-rouge">shell
+ jps -ml | grep SparkSubmit
+ jinfo ${PID} | grep "spark.driver.extraClassPath"
+</code><br />
+ <img 
src="/images/blog/kylin4_support_aws_glue/8_kylin_start_up_script_en.png" 
alt="" /></p>
+  </li>
+  <li>Edit <code class="highlighter-rouge">bin/kylin.sh</code>, modify <code 
class="highlighter-rouge">KYLIN_TOMCAT_CLASSPATH</code>  and add <code 
class="highlighter-rouge">kylin_driver_classpath</code>; save bin/kylin.sh, 
then exit Spark SQL CLI.</li>
+</ol>
+
+<ul>
+  <li>kylin.sh before modifying</li>
+</ul>
+
+<p><img 
src="/images/blog/kylin4_support_aws_glue/9_kylin_start_up_script_en.png" 
alt="" /></p>
+
+<ul>
+  <li>For EMR 6.5.0, in the modified <code 
class="highlighter-rouge">kylin.sh</code>, <code 
class="highlighter-rouge">kylin_driver_classpath</code> is at the end of the 
code.</li>
+</ul>
+
+<p><img 
src="/images/blog/kylin4_support_aws_glue/10_kylin_start_up_script_en.png" 
alt="" /></p>
+
+<ul>
+  <li>For EMR 5.33.1, in the modified <code 
class="highlighter-rouge">kylin.sh</code>, <code 
class="highlighter-rouge">kylin_driver_classpath</code> is placed before <code 
class="highlighter-rouge">$SPARK_HOME/jars</code>.</li>
+</ul>
+
+<p><img 
src="/images/blog/kylin4_support_aws_glue/11_kylin_start_up_script_en.png" 
alt="" /></p>
+
+<h3 id="configure-kylin">Configure Kylin</h3>
+
+<div class="highlighter-rouge"><pre class="highlight"><code><span 
class="nb">cd</span> <span class="nv">$KYLIN_HOME</span>
+vim conf/kylin.properties 
+</code></pre>
+</div>
+
+<h4 id="minimal-kylin-configuration">Minimal Kylin Configuration</h4>
+
+<table>
+  <thead>
+    <tr>
+      <th>Property Key</th>
+      <th>Property Value(Example)</th>
+      <th>Notes</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td>kylin.metadata.url</td>
+      
<td>kylin4_on_cloud@jdbc,url=jdbc:mysql://${HOSTNAME}:3306/hue,username=hive,password=${PASSWORD},maxActive=10,maxIdle=10,driverClassName=org.mariadb.jdbc.Driver</td>
+      <td>N/A</td>
+    </tr>
+    <tr>
+      <td>kylin.env.zookeeper-connect-string</td>
+      <td>${HOSTNAME}</td>
+      <td>N/A</td>
+    </tr>
+    <tr>
+      <td>kylin.engine.spark-conf.spark.driver.extraClassPath</td>
+      
<td>/usr/lib/hadoop-lzo/lib/<em>:/usr/lib/hadoop/hadoop-aws.jar:/usr/share/aws/aws-java-sdk/</em>:/usr/share/aws/emr/goodies/lib/emr-spark-goodies.jar:/usr/share/aws/emr/security/conf:/usr/share/aws/emr/security/lib/*:/usr/share/aws/hmclient/lib/aws-glue-datacatalog-spark-client.jar:/usr/share/java/Hive-JSON-Serde/hive-openx-serde.jar:/usr/share/aws/sagemaker-spark-sdk/lib/sagemaker-spark-sdk.jar:/usr/share/aws/emr/s3select/lib/emr-s3-select-spark-connector.jar</td>
+      <td>Copied from spark.driver.extraClasspath in 
/etc/spark/conf/spark-default.conf</td>
+    </tr>
+  </tbody>
+</table>
+
+<h3 id="start-kylin-and-verify-the-building-job">Start Kylin and verify the 
building job</h3>
+
+<h4 id="start-kylin">Start Kylin</h4>
+
+<div class="highlighter-rouge"><pre class="highlight"><code><span 
class="nb">cd</span> <span class="nv">$KYLIN_HOME</span>
+ln -s spark spark_aws <span class="c"># skip this step if soft link 'spark' 
exists </span>
+bin/kylin.sh restart
+</code></pre>
+</div>
+
+<p><img src="/images/blog/kylin4_support_aws_glue/12_start_kylin_en.png" 
alt="" /></p>
+
+<p><img src="/images/blog/kylin4_support_aws_glue/13_start_kylin_en.png" 
alt="" /></p>
+
+<h4 id="optional-replace-kylin-spark-enginejar">(Optional) Replace 
kylin-spark-engine.jar</h4>
+
+<blockquote>
+  <p>This step is only required for Kylin 4.0.1 users.</p>
+</blockquote>
+
+<div class="highlighter-rouge"><pre class="highlight"><code><span 
class="nb">cd</span> <span 
class="nv">$KYLIN_HOME</span>/tomcat/webapps/kylin/WEB-INF/lib/
+mv kylin-spark-engine-4.0.1.jar kylin-spark-engine-4.0.1.jar.bak <span 
class="c"># remove old one </span>
+cp kylin-spark-engine-4.0.0-SNAPSHOT.jar  .
+
+bin/kylin.sh restart <span class="c"># restart kylin to make new jar be 
loaded</span>
+</code></pre>
+</div>
+
+<h4 id="load-aws-glue-table-and-build">Load AWS Glue table and build</h4>
+
+<ul>
+  <li>Load AWS Glue table metadata</li>
+</ul>
+
+<p><img src="/images/blog/kylin4_support_aws_glue/14_load_glue_meta_en.png" 
alt="" /></p>
+
+<p><img src="/images/blog/kylin4_support_aws_glue/15_load_glue_meta_en.png" 
alt="" /></p>
+
+<ul>
+  <li>Create Model and Cube, then trigger a building job.</li>
+</ul>
+
+<p><img src="/images/blog/kylin4_support_aws_glue/16_load_glue_meta_en.png" 
alt="" /></p>
+
+<h3 id="verify-the-query">Verify the query</h3>
+
+<p>Switch the Spark used by Kylin and restart Kylin.</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code><span 
class="nb">cd</span> <span class="nv">$KYLIN_HOME</span>
+rm spark <span class="c"># 'spark' is a soft link, it is point to aws 
spark</span>
+ln -s spark_apache spark <span class="c"># switch from aws spark to apache 
spark</span>
+bin/kylin.sh restart
+</code></pre>
+</div>
+
+<p>Perform a test query and this query is successful.</p>
+
+<p><img src="/images/blog/kylin4_support_aws_glue/17_verify_query_en.png" 
alt="" /></p>
+
+<h2 id="discussion-and-qa">Discussion and Q&amp;A</h2>
+
+<h3 id="why-we-must-use-both-aws-spark-and-apache-spark">Why we must use both 
AWS Spark and Apache Spark?</h3>
+
+<p>AWS Spark has built-in support for AWS Glue so you will use AWS Spark when 
loading table metadata and building jobs;  Kylin 4.0.1 supports Apache Spark.  
Because the compatibility between Apache Spark and AWS Spark is not very good, 
we will use Apache Spark for cube query. To sum up, you need to switch between 
AWS Spark and Apache Spark according to your task (query task or build 
task).</p>
+
+<h3 id="why-do-users-need-to-modify-kylinsh">Why do users need to modify 
kylin.sh?</h3>
+
+<p>As Spark Driver, Kylin needs to load table metadata through <code 
class="highlighter-rouge">aws-glue-datacatalog-spark-client.jar</code>, so you 
need to modify kylin.sh and load the relevant jar into classpath of Kylin 
process.</p>
+
+<h3 id="if-i-faced-more-questions-where-should-i-asked">If I faced more 
questions, where should I asked?</h3>
+
+<p>If you have any questions about using Kylin on AWS, please contact us via 
mailling list(<a 
href="&#109;&#097;&#105;&#108;&#116;&#111;:&#117;&#115;&#101;&#114;&#064;&#107;&#121;&#108;&#105;&#110;&#046;&#097;&#112;&#097;&#099;&#104;&#101;&#046;&#111;&#114;&#103;">&#117;&#115;&#101;&#114;&#064;&#107;&#121;&#108;&#105;&#110;&#046;&#097;&#112;&#097;&#099;&#104;&#101;&#046;&#111;&#114;&#103;</a>),
 please check for detail <a 
href="https://kylin.apache.org/community/";>https://kylin.apache.org/community/</a>
 .</p>
+
+  </article>
+
+</div>
+
+
+
+
+
+                               </article>
+                       </div>
+               </div>          
+               <!--
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+-->
+
+<footer id="underfooter">
+    <div>
+        <div class="row">
+            <div class="col-md-12 widget">
+                <div class="widget-body">
+                    <div class="footer-img">
+                        <a href="http://www.apache.org";>
+                            <img id="asf-logo" height="78px" alt="Apache 
Software Foundation" src="/assets/images/apache_footer.png">
+                        </a>
+                    </div>
+                    <p style="padding-top: 11px;">
+                        The contents of this website are © 2015 Apache 
Software Foundation under the terms of the 
+                        <a href="http://www.apache.org/licenses/LICENSE-2.0";> 
Apache License v2 </a>. 
+                    </p>
+                    <p style="margin-bottom: 11px;">    
+                        Apache Kylin and its logo are trademarks of the Apache 
Software Foundation.
+                    </div>
+
+                </div>
+            </div>
+        </div>
+        <!-- /row of widgets -->
+
+    </div>
+    <div></div>
+
+</footer>
+
+       <script src="/assets/js/jquery-1.9.1.min.js"></script> 
+       <script src="/assets/js/bootstrap.min.js"></script> 
+       <script src="/assets/js/main.js"></script>
+       </body>
+</html>
+
+
+
+

Modified: kylin/site/blog/index.html
URL: 
http://svn.apache.org/viewvc/kylin/site/blog/index.html?rev=1899035&r1=1899034&r2=1899035&view=diff
==============================================================================
--- kylin/site/blog/index.html (original)
+++ kylin/site/blog/index.html Fri Mar 18 14:13:30 2022
@@ -197,6 +197,16 @@ var _hmt = _hmt || [];
             
             
             <div class="col-md-6 col-lg-6 col-xs-12">
+              <a class="blog-card" 
href="/blog/2022/03/17/kylin4-now-supporting-aws-glue-catalog/">
+                <div class="blog-pic">
+                  <img width="20" src="../assets/images/icon_blog_w.png" />
+                </div>
+                <p class="blog-title">Kylin 4 now is supporting AWS Glue 
Catalog</p>
+                <p align="left" class="post-meta">posted: Mar 17, 2022</p>
+              </a>
+            </div>
+      
+            <div class="col-md-6 col-lg-6 col-xs-12">
               <a class="blog-card" 
href="/blog/2022/01/12/The-Future-Of-Kylin/">
                 <div class="blog-pic">
                   <img width="20" src="../assets/images/icon_blog_w.png" />

Modified: kylin/site/cn/blog/index.html
URL: 
http://svn.apache.org/viewvc/kylin/site/cn/blog/index.html?rev=1899035&r1=1899034&r2=1899035&view=diff
==============================================================================
--- kylin/site/cn/blog/index.html (original)
+++ kylin/site/cn/blog/index.html Fri Mar 18 14:13:30 2022
@@ -199,6 +199,16 @@ var _hmt = _hmt || [];
             
             
             <div class="col-md-6 col-lg-6 col-xs-12">
+              <a class="blog-card" 
href="/cn_blog/2022/03/17/kylin4-now-supporting-aws-glue-catalog/">
+                <div class="blog-pic">
+                  <img width="20" src="/assets/images/icon_blog_w.png" />
+                </div>
+                <p class="blog-title">安排!Kylin 4 现已支持 AWS Glue 
Catalog</p>
+                <p align="left" class="post-meta">posted: Mar 17, 2022</p>
+              </a>
+            </div>
+      
+            <div class="col-md-6 col-lg-6 col-xs-12">
               <a class="blog-card" 
href="/cn_blog/2022/01/12/The-Future-Of-Kylin/">
                 <div class="blog-pic">
                   <img width="20" src="/assets/images/icon_blog_w.png" />

Added: 
kylin/site/cn_blog/2022/03/17/kylin4-now-supporting-aws-glue-catalog/index.html
URL: 
http://svn.apache.org/viewvc/kylin/site/cn_blog/2022/03/17/kylin4-now-supporting-aws-glue-catalog/index.html?rev=1899035&view=auto
==============================================================================
--- 
kylin/site/cn_blog/2022/03/17/kylin4-now-supporting-aws-glue-catalog/index.html 
(added)
+++ 
kylin/site/cn_blog/2022/03/17/kylin4-now-supporting-aws-glue-catalog/index.html 
Fri Mar 18 14:13:30 2022
@@ -0,0 +1,638 @@
+<!--
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+-->
+<!doctype html>
+<html>
+       <!--
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+-->
+
+<head>
+  <meta charset="utf-8">
+  <meta http-equiv="X-UA-Compatible" content="IE=edge">
+  <meta name="viewport" content="width=device-width, initial-scale=1">
+
+  <title>Apache Kylin | 安排!Kylin 4 现已支持 AWS Glue Catalog</title>
+  <meta name="description" content="为什么在 EMR 部署 Kylin 需要支持 
Glue ?">
+  <meta name="author"      content="Apache Kylin">
+  <link rel="shortcut icon" href="fav.png" type="image/png">
+
+
+
+<link rel="stylesheet" href="/assets/css/animate.css">
+<!-- Bootstrap -->
+<link rel="stylesheet" href="/assets/css/bootstrap.min.css">
+
+<!-- Fonts -->
+<!-- <link rel="stylesheet" 
href="http://fonts.googleapis.com/css?family=Alice|Open+Sans:400,300,700"> -->
+
+<!-- Icons -->
+<link rel="stylesheet" href="/assets/css/font-awesome.min.css">
+
+  <!-- Custom styles -->
+  <link rel="stylesheet" href="/assets/css/styles.css">
+  <link rel="stylesheet" href="/assets/css/docs.css">
+  <link rel="stylesheet" href="/assets/css/pygments.css">
+
+  <link rel="canonical" 
href="http://kylin.apache.org/cn_blog/2022/03/17/kylin4-now-supporting-aws-glue-catalog/";>
+  <link rel="alternate" type="application/rss+xml" title="Apache Kylin" 
href="http://kylin.apache.org/feed.xml"; />
+
+<!--[if lt IE 9]> <script src="assets/js/html5shiv.js"></script> <![endif]-->
+<!-- Global site tag (gtag.js) - Google Analytics -->
+<script async 
src="https://www.googletagmanager.com/gtag/js?id=UA-120788561-1";></script>
+<script>
+  window.dataLayer = window.dataLayer || [];
+  function gtag(){dataLayer.push(arguments);}
+  gtag('js', new Date());
+
+  gtag('config', 'UA-120788561-1');
+</script>
+<script type="text/javascript" src="/assets/js/jquery-1.9.1.min.js"></script>
+<script type="text/javascript" src="/assets/js/nside.js"></script> </script>
+<script type="text/javascript" src="/assets/js/nnav.js"></script> </script>
+<script>
+var _hmt = _hmt || [];
+(function() {
+  var hm = document.createElement("script");
+  hm.src = "https://hm.baidu.com/hm.js?bdc5e03add430c0b72cc0eb91eabfa99";;
+  var s = document.getElementsByTagName("script")[0]; 
+  s.parentNode.insertBefore(hm, s);
+})();
+</script>
+
+</head>
+
+       <body>
+               <!--
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+-->
+
+<header id="header" >
+  
+  <!-- Main Menu -->
+  <nav class="navbar navbar-default" role="navigation" id="nav-wrapper">
+    <div class="container-fluid" id="nav">
+      <!--
+      <img class="img-circle" width="40px" height="40px" id="circlelogo" 
src="/assets/images/kylin_logo.jpg">
+      -->
+      <!-- Brand and toggle get grouped for better mobile display -->
+      <div class="navbar-header">
+        <img class="navbar-logo" width="46" 
src="/assets/images/kylin_logo.png" ></img>
+        <button type="button" class="navbar-toggle collapsed" 
data-toggle="collapse" data-target="#bs-example-navbar-collapse-1">
+          <span class="sr-only">Toggle navigation</span>
+          <span class="icon-bar"></span>
+          <span class="icon-bar"></span>
+          <span class="icon-bar"></span>
+        </button>
+        <ul class="nav icon-navbar">
+            <li><a href="https://twitter.com/apachekylin"; target="_blank" 
class="fa fa-twitter fa-lg"  title="Twitter: @ApacheKylin" ></a></li>
+            <li><a href="https://github.com/apache/kylin"; target="_blank" 
class="fa fa-github-alt fa-lg" title="Github: apache/kylin" ></a></li>
+            <li><a href="https://www.facebook.com/kylinio"; target="_blank" 
class="fa fa-facebook fa-lg" title="Facebook: kylin.io" ></a></li>
+        </ul>
+      </div>
+
+      <!-- Collect the nav links, forms, and other content for toggling -->
+      <div class="navbar-collapse collapse" id="bs-example-navbar-collapse-1">
+
+        <ul class="nav navbar-nav">
+
+          <li><a href="/">Home</a></li>
+          <li>
+            <a href="/docs" class="dropdown-toggle" data-toggle="dropdown" 
role="button" aria-haspopup="true" aria-expanded="false">Docs<span 
class="caret"></span></a>
+            <ul class="dropdown-menu">
+              <li><a href="/docs/">Latest Release(Kylin 4.0.1)</a></li>
+              <li><a href="/docs31/">Kylin 3.1.3</a></li>
+              <li><a href="/docs24/">Kylin 2.4.0</a></li>
+              <li><a href="/archive/">Archive</a></li>
+            </ul>
+          </li>
+          <li><a href="/download">Download</a></li>
+          <li><a href="/community" >Community</a></li>
+          <li>
+            <a href="/development" class="dropdown-toggle" 
data-toggle="dropdown" role="button" aria-haspopup="true" 
aria-expanded="false">Development<span class="caret"></span></a>
+            <ul class="dropdown-menu">
+              <li><a href="/development40/">Kylin 4.x</a></li>
+              <li><a href="/development/">Kylin 3.x And Older Versions</a></li>
+            </ul>
+          </li>
+          <li><a href="/blog">Blog</a></li>
+          <li><a href="/cn" >中文版</a></li> 
+        </ul>     
+      </div><!-- /.navbar-collapse -->
+    </div><!-- /.container-fluid -->
+  </nav>
+
+  <div id="head" class="parallax normal-header" >
+    <div class="text-center header-apache">
+      <a href="http://apache.org/foundation/contributing.html"; title="Support 
Apache" style="margin-left: 150px;">
+        <div>
+          <img src="https://www.apache.org/images/SupportApache-small.png"; >
+        </div>
+      </a>
+    </div>  
+  </div>
+  
+ </header>
+
+               <div class="page-content main">
+                       <header style=" padding:2em 0 0 ">
+                       <div class="container" >
+                         <div style=" padding:0 4em">
+                <div class="blog-icon">
+                  <img width="30" src="/assets/images/icon_blog_w.png">
+                </div>
+                               <h4 class="index-title" style=" 
float:left;"><span>Apache Kylin™ Technical Blog</span></h4>
+                         </div>
+                       </div>
+               </div>
+
+               <div class="container blog">
+                       <div>
+                               <article class="post-content" > 
+                               <!--
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+-->
+
+<div class="post" style=" padding:2em 4em 4em 4em">
+
+  <header class="post-header">
+    <h1 class="post-title">安排!Kylin 4 现已支持 AWS Glue Catalog</h1>
+    <p class="post-meta" >Mar 17, 2022 • Xiaoxiang Yu</p>
+  </header>
+
+  <article class="post-content" >
+    <h2 id="emr--kylin--glue-">为什么在 EMR 部署 Kylin 需要支持 Glue 
?</h2>
+
+<h3 id="aws-glue">什么是 AWS Glue?</h3>
+
+<p>AWS Glue 是一项完全托管的 ETL(提取、转换和加
载)服务,使 AWS 
用户能够轻松而经济高效地对数据进行分类、清理和扩充
,并在各种数据存储之间可靠地移动数据。AWS Glue 
由一个称为 AWS Glue 数据目录的中央å…
ƒæ•°æ®å­˜å‚¨åº“、一个自动生成代码的 ETL 
引擎以及一个处理依赖项解析、作业监控和重试的灵活计划程序组成。AWS
 Glue 是无服务器服务,因此无需设置或管理基础设施。</p>
+
+<h3 id="kylin--aws-glue-catalog">Kylin 为什么需要支持 AWS Glue 
Catalog?</h3>
+
+<p>目前社区有很多 Kylin 用户在使用 AWS EMR,组件主要包括 
Hadoop、Spark、Hive、Presto 等,如果没有配置使用 AWS Glue data 
Catalog,那么在各个数据仓库组件如 Hive、Spark、Presto 
建的数据表,在其它组件上是找不到的,也就不能使用,å…
¬å¸åº•层的数据仓库是提供给各个业务部门来进行使用,为了解决这个问题,在创建
 AWS EMR 集群时就可以使用 AWS Glue data Catalog 来存储å…
ƒæ•°æ®ï¼Œå¯¹å„个组件共享数据源,对各个业务部门进行å…
±äº«æ•°æ�
 
�®æºï¼Œå°†å„个业务部门的数据构建成一个大的数据立方体,能够快速响应å
…¬å¸é«˜é€Ÿå‘展的业务需求。<br />
+现代公司的数据都是基于云平台搭建,大数据团队使用的 
AWS EMR 来进行数据加
工、数据分析、以及模型训练,随着数据暴增带来提数æ…
¢ã€ææ•°éš¾ï¼ŒEMR/Spark/Hive 
很难满足数据分析师、运营人员、销售的快速查询数据的需求,于是一些用户选择了
 Apache Kylin 作为开源 OLAP 解决方案。<br />
+但是最近社区用户联系到我们,告知 Kylin 4 还不支持从 Glue 
读取表å…
ƒæ•°æ®ï¼Œæ‰€ä»¥æˆ‘们和社区用户合作一起检查这里遇到的问题并最终解决了问题,从而使得
 Kylin 4 支持了 AWS Glue Catalog,这样带来的好处在于 
Hive、Presto、Spark、Kylin 中可以å…
±äº«è¡¨å’Œæ•°æ®ï¼Œä½¿å¾—每个主题都串联起来形成一个大的数据分析平台,打ç
 ´å…ƒæ•°æ®éšœç¢ã€‚</p>
+
+<h3 id="apache-kylin--aws-glue-">Apache Kylin 支持 AWS Glue 吗?</h3>
+
+<table>
+  <thead>
+    <tr>
+      <th> </th>
+      <th>支持 Glue 的 Kylin 版本</th>
+      <th>Issue Link</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td>Kylin on HBase (Before Kylin 4)</td>
+      <td>2.6.6 or higher<br /> 3.1.0 or higher</td>
+      <td>https://issues.apache.org/jira/browse/KYLIN-4206<br 
/>https://zhuanlan.zhihu.com/p/99481373</td>
+    </tr>
+    <tr>
+      <td>Kylin on Parquet</td>
+      <td>4.0.1 or higher</td>
+      <td>本文。</td>
+    </tr>
+  </tbody>
+</table>
+
+<h2 id="section">部署前准备</h2>
+
+<h3 id="section-1">软件信息一览</h3>
+
+<table>
+  <thead>
+    <tr>
+      <th><strong>Software</strong></th>
+      <th><strong>Version</strong></th>
+      <th>Reference</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td>Apache Kylin</td>
+      <td>4.0.1 or higher</td>
+      <td>必须是 4.0.1 以及上,详情参考 <a 
href="https://cwiki.apache.org/confluence/display/KYLIN/KIP+10+refactor+hive+and+hadoop+dependency";>KIP
 10 refactor hive and hadoop dependency</a>.</td>
+    </tr>
+    <tr>
+      <td>AWS EMR</td>
+      <td>6.5.0 or higher<br />5.33.1 or higher</td>
+      <td>覆盖EMR 6 / EMR 5 的较新版本,<a 
href="https://docs.amazonaws.cn/en_us/emr/latest/ReleaseGuide/emr-650-release.html";>Amazon
 EMR release 6.5.0 - Amazon EMR</a>.</td>
+    </tr>
+  </tbody>
+</table>
+
+<h3 id="glue-">准备 Glue 数据库和表</h3>
+
+<p><img 
src="/images/blog/kylin4_support_aws_glue/1_prepare_aws_glue_table_en.png" 
alt="" /></p>
+
+<p><img 
src="/images/blog/kylin4_support_aws_glue/2_prepare_aws_glue_table_en.png" 
alt="" /></p>
+
+<ul>
+  <li>创建 AWS EMR 集群。</li>
+</ul>
+
+<p>这里启动一个 EMR 的集群,需要注意的是,这里通过配置 
<code class="highlighter-rouge">hive.metastore.client.factory.class</code> 
启动了 Glue 外部元数据。以下命令可以作为参考。</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>aws emr 
create-cluster --applications <span class="nv">Name</span><span 
class="o">=</span>Hadoop <span class="nv">Name</span><span 
class="o">=</span>Hive <span class="nv">Name</span><span 
class="o">=</span>Spark <span class="nv">Name</span><span 
class="o">=</span>ZooKeeper <span class="nv">Name</span><span 
class="o">=</span>Tez <span class="nv">Name</span><span 
class="o">=</span>Ganglia <span class="se">\</span>
+  --ec2-attributes <span class="k">${}</span> <span class="se">\</span>
+  --release-label emr-6.5.0 <span class="se">\</span>
+  --log-uri <span class="k">${}</span> <span class="se">\</span>
+  --instance-groups <span class="k">${}</span> <span class="se">\</span>
+  --configurations <span 
class="s1">'[{"Classification":"hive-site","Properties":{"hive.metastore.client.factory.class":"com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory"}}]'</span>
 <span class="se">\</span>
+  --auto-scaling-role EMR_AutoScaling_DefaultRole <span class="se">\</span>
+  --ebs-root-volume-size 100 <span class="se">\</span>
+  --service-role EMR_DefaultRole <span class="se">\</span>
+  --enable-debugging <span class="se">\</span>
+  --name <span class="s1">'Kylin4_on_EMR65_with_Glue'</span> <span 
class="se">\</span>
+  --region cn-northwest-1
+</code></pre>
+</div>
+
+<ul>
+  <li>登录 Master 节点,并且检查 Hadoop 版本 和 Hadoop 
集群是否启动成功。</li>
+</ul>
+
+<p><img 
src="/images/blog/kylin4_support_aws_glue/3_prepare_hadoop_cluster_en.png" 
alt="" /></p>
+
+<p><img 
src="/images/blog/kylin4_support_aws_glue/4_prepare_hadoop_cluster_en.png" 
alt="" /></p>
+
+<h3 id="optional">获取环境信息(Optional)</h3>
+
+<blockquote>
+  <p>如果你使用 RDS 或者其他元数据存储,请酌情
跳过此步。</p>
+</blockquote>
+
+<p>由于 Kylin 4.X 推荐使用 RDBMS 作为å…
ƒæ•°æ®å­˜å‚¨ï¼Œå¤„于测试目的,这里使用 Master 节点自带的 
MariaDB 作为元数据存储;关于 MariaDB 的主机名称、账号、密ç 
ç­‰ä¿¡æ¯ï¼Œå¯ä»¥ä»Ž <code 
class="highlighter-rouge">/etc/hive/conf/hive-site.xml</code> 获取。</p>
+
+<div class="highlighter-rouge"><pre 
class="highlight"><code>kylin.metadata.url<span 
class="o">=</span>kylin4_on_cloud@jdbc,url<span 
class="o">=</span>jdbc:mysql://<span class="k">${</span><span 
class="nv">HOSTNAME</span><span class="k">}</span>:3306/hue,username<span 
class="o">=</span>hive,password<span class="o">=</span><span 
class="k">${</span><span class="nv">PASSWORD</span><span 
class="k">}</span>,maxActive<span class="o">=</span>10,maxIdle<span 
class="o">=</span>10,driverClassName<span 
class="o">=</span>org.mariadb.jdbc.Driver  
+kylin.env.zookeeper-connect-string<span class="o">=</span><span 
class="k">${</span><span class="nv">HOSTNAME</span><span class="k">}</span>
+</code></pre>
+</div>
+
+<p>获取这些信息后,并且替换以上 Kylin é…
ç½®é¡¹é‡Œé¢çš„变量,如 <code 
class="highlighter-rouge">${PASSWORD}</code>,保存到本地,供下一步启动
 Kylin 进程使用。</p>
+
+<h3 id="spark-sql--aws-glue-">测试 Spark SQL 和 AWS Glue 的连通性</h3>
+
+<p>通过 spark-sql 来测试 AWS 的 Spark SQL 是否能够通过 Glue 
获取数据库和表的元数据,首次会发现启动报错失败。</p>
+
+<p><img src="/images/blog/kylin4_support_aws_glue/5_test_sparksql_glue_en.png" 
alt="" /></p>
+
+<p>其通过以下命令替换 Spark 使用的 <code 
class="highlighter-rouge">hive-site.xml</code>。</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code><span 
class="nb">cd</span> /etc/spark/conf
+sudo mv hive-site.xml hive-site.xml.bak
+sudo cp /etc/hive/conf/hive-site.xml .
+</code></pre>
+</div>
+
+<p>并且修改 <code 
class="highlighter-rouge">/etc/spark/conf/hive-site.xml</code> 文件中 <code 
class="highlighter-rouge">hive.execution.engine</code> 的值为<code 
class="highlighter-rouge">mr</code>,再次尝试启动 Spark-SQL 
CLI,验证对 Glue 的表数据执行查询成功。</p>
+
+<p><img src="/images/blog/kylin4_support_aws_glue/6_test_sparksql_glue_en.png" 
alt="" /></p>
+
+<p><img src="/images/blog/kylin4_support_aws_glue/7_test_sparksql_glue_en.png" 
alt="" /></p>
+
+<h3 id="kylin-spark-enginejaroptional">准备 
kylin-spark-engine.jar(Optional)</h3>
+
+<blockquote>
+  <p>如果 Apache Kylin 4.0.2 
已经发布,那么应该已经修改该问题,可以跳过此步。否则请参考以下步骤,替换
 <code class="highlighter-rouge">kylin-spark-engine.jar</code>:</p>
+</blockquote>
+
+<p>参考下面的命令,克隆 kylin 仓库,执行 <code 
class="highlighter-rouge">mvn clean package -DskipTests</code>,获取 <code 
class="highlighter-rouge">kylin-spark-project/kylin-spark-engine/target/kylin-spark-engine-4.0.0-SNAPSHOT.jar</code>
 。</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>git clone 
https://github.com/hit-lacus/kylin.git
+<span class="nb">cd </span>kylin
+git checkout KYLIN-5160
+mvn clean package -DskipTests
+
+<span class="c"># find -name kylin-spark-engine-4.0.0-SNAPSHOT.jar 
kylin-spark-project/kylin-spark-engine/target</span>
+</code></pre>
+</div>
+
+<p>Patch link: <a 
href="https://github.com/apache/kylin/pull/1819";>https://github.com/apache/kylin/pull/1819</a></p>
+
+<h2 id="kylin--glue">部署 Kylin 并连接 Glue</h2>
+
+<h3 id="kylin">下载 Kylin</h3>
+
+<ol>
+  <li>
+    <p>下载并解压 Kylin ,请根据 EMR 的版本选择对应的 Kylin 
package,具体来说,EMR 5.X 使用 spark2 的 package,EMR 6.X 使用 
spark3 的 package。<br />
+ <code class="highlighter-rouge">shell
+ # aws s3 cp s3://${BUCKET}/apache-kylin-4.0.1-bin-spark3.tar.gz .
+ # wget apache-kylin-4.0.1-bin-spark3.tar.gz
+ tar zxvf apache-kylin-4.0.1-bin-spark3.tar.gz .
+ cd apache-kylin-4.0.1-bin-spark3
+ export KYLIN_HOME=/home/hadoop/apache-kylin-4.0.1-bin-spark3
+</code></p>
+  </li>
+  <li>
+    <p>获取 RDBMS 的 驱动 jar(Optional)</p>
+
+    <blockquote>
+      <p>如果你是用别的 RDBMS 作为å…
ƒæ•°æ®å­˜å‚¨ï¼Œè¯·è·³è¿‡æ­¤æ­¥éª¤ã€‚</p>
+    </blockquote>
+
+    <p><code class="highlighter-rouge">shell
+ cd $KYLIN_HOME
+ mkdir ext
+ cp /usr/lib/hive/lib/mariadb-connector-java.jar $KYLIN_HOME/ext
+</code></p>
+  </li>
+</ol>
+
+<h3 id="spark">准备 Spark</h3>
+
+<p>由于 AWS Spark 内置对 AWS Glue 的支持,所以 <strong>加载表å…
ƒæ•°æ®å’Œæ‰§è¡Œæž„建需要使用 AWS Spark</strong>;但是考虑到 Kylin 
4.0.1 是支持 Apache Spark,并且 AWS Spark 相对 Apache Spark 
有比较大的代码修改,两者兼容性较差,所以<strong>查询 Cube 
需要使用 Apache Spark</strong>。综上所述,需要根据 Kylin 
需要执行查询任务还是构建任务,来切换所使用的的 
Spark。</p>
+
+<ul>
+  <li>准备 AWS Spark</li>
+</ul>
+
+<div class="highlighter-rouge"><pre class="highlight"><code><span 
class="nb">cd</span> <span class="nv">$KYLIN_HOME</span>
+mkdir ext
+cp /usr/lib/hive/lib/mariadb-connector-java.jar <span 
class="nv">$KYLIN_HOME</span>/ext
+</code></pre>
+</div>
+
+<ul>
+  <li>准备 Apache Spark
+    <ul>
+      <li>请根据 EMR 的版本选择对应的 Spark  版本安装包,å…
·ä½“来说,EMR 5.X 使用 <code class="highlighter-rouge">Spark 2.4.7</code> 
的 Spark 安装包,EMR 6.X 使用 <code class="highlighter-rouge">Spark 
3.1.2</code> 的 Spark 安装包。<br />
+<code class="highlighter-rouge">shell
+cd $KYLIN_HOME
+aws s3 cp s3://${BUCKET}/spark-2.4.7-bin-hadoop2.7.tgz $KYLIN_HOME # Or 
downloads spark-2.4.7-bin-hadoop2.7.tgz from offical website
+tar zxvf spark-2.4.7-bin-hadoop2.7.tgz
+mv spark-2.4.7-bin-hadoop2.7 spark-apache
+</code></li>
+    </ul>
+  </li>
+  <li>因为要先加载 Glue 表,所以这里通过软链接将<code 
class="highlighter-rouge">$KYLIN_HOME/spark</code>指向 AWS 
Spark;请注意无需设置 <code 
class="highlighter-rouge">SPARK_HOME</code>,因为在 <code 
class="highlighter-rouge">$KYLIN_HOME/spark</code> 存在并且 <code 
class="highlighter-rouge">SPARK_HOME</code> 未设置的情况下,Kylin 
会默认使用 <code class="highlighter-rouge">$KYLIN_HOME/spark</code> 
。</li>
+</ul>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>ln -s spark-aws 
spark
+</code></pre>
+</div>
+
+<h3 id="kylin-">修改 Kylin 启动脚本</h3>
+
+<ol>
+  <li>启动 Spark SQL CLI,不退出</li>
+  <li>
+    <p>通过 <code class="highlighter-rouge">jps -ml ${PID}</code> 获取 
<code class="highlighter-rouge">SparkSQLCLIDriver</code> 的 PID,然后获取 
Driver 的 <code 
class="highlighter-rouge">spark.driver.extraClasspath</code>。或者
也可以从 <code 
class="highlighter-rouge">/etc/spark/conf/spark-defaults.conf</code> 
获取。<br />
+ <code class="highlighter-rouge">shell
+ jps -ml | grep SparkSubmit
+ jinfo ${PID} | grep "spark.driver.extraClassPath"
+</code><br />
+ <img 
src="/images/blog/kylin4_support_aws_glue/8_kylin_start_up_script_en.png" 
alt="" /></p>
+  </li>
+  <li>编辑 <code class="highlighter-rouge">bin/kylin.sh</code>,修改 
<code class="highlighter-rouge">KYLIN_TOMCAT_CLASSPATH</code> 变量,追加 
<code class="highlighter-rouge">kylin_driver_classpath</code> ;保存好 
<code class="highlighter-rouge">bin/kylin.sh</code> 后退出 Spark SQL CLI</li>
+</ol>
+
+<ul>
+  <li>修改前的 kylin.sh</li>
+</ul>
+
+<p><img 
src="/images/blog/kylin4_support_aws_glue/9_kylin_start_up_script_en.png" 
alt="" /></p>
+
+<ul>
+  <li>针对 EMR 6.5.0,修改后的 kylin.sh:<code 
class="highlighter-rouge">kylin_driver_classpath</code> 放到最后。</li>
+</ul>
+
+<p><img 
src="/images/blog/kylin4_support_aws_glue/10_kylin_start_up_script_en.png" 
alt="" /></p>
+
+<ul>
+  <li>针对 EMR 5.33.1,修改后的 kylin.sh:<code 
class="highlighter-rouge">kylin_driver_classpath</code> 放到 <code 
class="highlighter-rouge">$SPARK_HOME/jars</code> 之前。</li>
+</ul>
+
+<p><img 
src="/images/blog/kylin4_support_aws_glue/11_kylin_start_up_script_en.png" 
alt="" /></p>
+
+<h3 id="kylin-1">配置 Kylin</h3>
+
+<div class="highlighter-rouge"><pre class="highlight"><code><span 
class="nb">cd</span> <span class="nv">$KYLIN_HOME</span>
+vim conf/kylin.properties 
+</code></pre>
+</div>
+
+<h4 id="minimal-kylin-configuration">Minimal Kylin Configuration</h4>
+
+<table>
+  <thead>
+    <tr>
+      <th>Property Key</th>
+      <th>Property Value(Example)</th>
+      <th>Notes</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td>kylin.metadata.url</td>
+      
<td>kylin4_on_cloud@jdbc,url=jdbc:mysql://${HOSTNAME}:3306/hue,username=hive,password=${PASSWORD},maxActive=10,maxIdle=10,driverClassName=org.mariadb.jdbc.Driver</td>
+      <td>N/A</td>
+    </tr>
+    <tr>
+      <td>kylin.env.zookeeper-connect-string</td>
+      <td>${HOSTNAME}</td>
+      <td>N/A</td>
+    </tr>
+    <tr>
+      <td>kylin.engine.spark-conf.spark.driver.extraClassPath</td>
+      
<td>/usr/lib/hadoop-lzo/lib/<em>:/usr/lib/hadoop/hadoop-aws.jar:/usr/share/aws/aws-java-sdk/</em>:/usr/share/aws/emr/goodies/lib/emr-spark-goodies.jar:/usr/share/aws/emr/security/conf:/usr/share/aws/emr/security/lib/*:/usr/share/aws/hmclient/lib/aws-glue-datacatalog-spark-client.jar:/usr/share/java/Hive-JSON-Serde/hive-openx-serde.jar:/usr/share/aws/sagemaker-spark-sdk/lib/sagemaker-spark-sdk.jar:/usr/share/aws/emr/s3select/lib/emr-s3-select-spark-connector.jar</td>
+      <td>Copied from spark.driver.extraClasspath in 
/etc/spark/conf/spark-default.conf</td>
+    </tr>
+  </tbody>
+</table>
+
+<h3 id="kylin--1">启动 Kylin 并验证构建</h3>
+
+<h4 id="kylin-2">启动 Kylin</h4>
+
+<div class="highlighter-rouge"><pre class="highlight"><code><span 
class="nb">cd</span> <span class="nv">$KYLIN_HOME</span>
+ln -s spark spark_aws <span class="c"># skip this step if soft link 'spark' 
exists </span>
+bin/kylin.sh restart
+</code></pre>
+</div>
+
+<p><img src="/images/blog/kylin4_support_aws_glue/12_start_kylin_en.png" 
alt="" /></p>
+
+<p><img src="/images/blog/kylin4_support_aws_glue/13_start_kylin_en.png" 
alt="" /></p>
+
+<h4 id="kylin-spark-enginejar-optional">替换 kylin-spark-engine.jar 
(Optional)</h4>
+
+<blockquote>
+  <p>仅对于 4.0.1 需要操作该步骤。</p>
+</blockquote>
+
+<div class="highlighter-rouge"><pre class="highlight"><code><span 
class="nb">cd</span> <span 
class="nv">$KYLIN_HOME</span>/tomcat/webapps/kylin/WEB-INF/lib/
+mv kylin-spark-engine-4.0.1.jar kylin-spark-engine-4.0.1.jar.bak <span 
class="c"># remove old one </span>
+cp kylin-spark-engine-4.0.0-SNAPSHOT.jar  .
+
+bin/kylin.sh restart <span class="c"># restart kylin to make new jar be 
loaded</span>
+</code></pre>
+</div>
+
+<h4 id="glue--1">加载 Glue 表、构建</h4>
+
+<ul>
+  <li>加载 Glue 表元数据</li>
+</ul>
+
+<p><img src="/images/blog/kylin4_support_aws_glue/14_load_glue_meta_en.png" 
alt="" /></p>
+
+<p><img src="/images/blog/kylin4_support_aws_glue/15_load_glue_meta_en.png" 
alt="" /></p>
+
+<ul>
+  <li>创建 Model 和 Cube,然后触发构建</li>
+</ul>
+
+<p><img src="/images/blog/kylin4_support_aws_glue/16_load_glue_meta_en.png" 
alt="" /></p>
+
+<h3 id="section-2">验证查询</h3>
+
+<p>切换 Kylin 使用的 Spark,重启 Kylin。</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code><span 
class="nb">cd</span> <span class="nv">$KYLIN_HOME</span>
+rm spark <span class="c"># 'spark' is a soft link, it is point to aws 
spark</span>
+ln -s spark_apache spark <span class="c"># switch from aws spark to apache 
spark</span>
+bin/kylin.sh restart
+</code></pre>
+</div>
+
+<p>执行测试查询,查询成功</p>
+
+<p><img src="/images/blog/kylin4_support_aws_glue/17_verify_query_en.png" 
alt="" /></p>
+
+<h2 id="section-3">讨论和问答</h2>
+
+<h3 id="sparkaws-spark--apache-spark">为什么必须使用两个 Spark(AWS 
Spark &amp; Apache Spark)?</h3>
+
+<p>由于 AWS Spark 内置对 AWS Glue Catalog 的支持,并且加
载表和构建引擎需要获取表,所以<strong>加载表å…
ƒæ•°æ®å’Œæ‰§è¡Œæž„建需要使用 AWS Spark</strong>;但是考虑到 Kylin 
4.0.1 是支持 Apache Spark,并且 AWS Spark 相对 Apache Spark 
有比较大的代码修改,造成两者å…
¼å®¹æ€§è¾ƒå·®ï¼Œæ‰€ä»¥<strong>查询 Cube 需要使用 Apache 
Spark</strong>。综上所述,需要根据 Kylin 
需要执行查询任务还是构建任务,来切换所使用的的 
Spark。<br />
+在实际使用过程中,可以考虑 Job Node(构建任务)使用 AWS 
Spark,Query Node(查询任务)使用 Apache Spark。</p>
+
+<h3 id="kylinsh">为什么需要修改 kylin.sh?</h3>
+
+<p>Kylin 进程作为 Spark Driver 需要通过<code 
class="highlighter-rouge">aws-glue-datacatalog-spark-client.jar</code>加
载表元数据,所以这块需要修改 kylin.sh,将相关 jar 加载到 
Kylin 进程的 classpath。</p>
+
+  </article>
+
+</div>
+
+
+
+
+
+                               </article>
+                       </div>
+               </div>          
+               <!--
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+-->
+
+<footer id="underfooter">
+    <div>
+        <div class="row">
+            <div class="col-md-12 widget">
+                <div class="widget-body">
+                    <div class="footer-img">
+                        <a href="http://www.apache.org";>
+                            <img id="asf-logo" height="78px" alt="Apache 
Software Foundation" src="/assets/images/apache_footer.png">
+                        </a>
+                    </div>
+                    <p style="padding-top: 11px;">
+                        The contents of this website are © 2015 Apache 
Software Foundation under the terms of the 
+                        <a href="http://www.apache.org/licenses/LICENSE-2.0";> 
Apache License v2 </a>. 
+                    </p>
+                    <p style="margin-bottom: 11px;">    
+                        Apache Kylin and its logo are trademarks of the Apache 
Software Foundation.
+                    </div>
+
+                </div>
+            </div>
+        </div>
+        <!-- /row of widgets -->
+
+    </div>
+    <div></div>
+
+</footer>
+
+       <script src="/assets/js/jquery-1.9.1.min.js"></script> 
+       <script src="/assets/js/bootstrap.min.js"></script> 
+       <script src="/assets/js/main.js"></script>
+       </body>
+</html>
+
+
+
+


Reply via email to