Author: lidong
Date: Fri Oct 22 05:11:34 2021
New Revision: 1894464

URL: http://svn.apache.org/viewvc?rev=1894464&view=rev
Log:
Update kylin diagram and add new blog

Added:
    kylin/site/assets/images/Kylin_diagram.pptx   (with props)
    kylin/site/blog/2021/10/
    kylin/site/blog/2021/10/21/
    kylin/site/blog/2021/10/21/Local-Cache-and-Soft-Affinity-Scheduling/
    
kylin/site/blog/2021/10/21/Local-Cache-and-Soft-Affinity-Scheduling/index.html
    kylin/site/cn_blog/2021/10/
    kylin/site/cn_blog/2021/10/21/
    kylin/site/cn_blog/2021/10/21/Local-Cache-and-Soft-Affinity-Scheduling/
    
kylin/site/cn_blog/2021/10/21/Local-Cache-and-Soft-Affinity-Scheduling/index.html
    kylin/site/images/blog/local-cache/
    kylin/site/images/blog/local-cache/Local_cache_stage.png   (with props)
    kylin/site/images/blog/local-cache/kylin4_local_cache.png   (with props)
    kylin/site/images/blog/local-cache/local_cache_benchmark_result_ssb.png   
(with props)
    kylin/site/images/blog/local-cache/local_cache_benchmark_result_tpch1.png   
(with props)
    kylin/site/images/blog/local-cache/local_cache_benchmark_result_tpch4.png   
(with props)
Modified:
    kylin/site/assets/images/kylin_diagram.png
    kylin/site/blog/index.html
    kylin/site/cn/blog/index.html
    kylin/site/feed.xml

Added: kylin/site/assets/images/Kylin_diagram.pptx
URL: 
http://svn.apache.org/viewvc/kylin/site/assets/images/Kylin_diagram.pptx?rev=1894464&view=auto
==============================================================================
Binary file - no diff available.

Propchange: kylin/site/assets/images/Kylin_diagram.pptx
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Modified: kylin/site/assets/images/kylin_diagram.png
URL: 
http://svn.apache.org/viewvc/kylin/site/assets/images/kylin_diagram.png?rev=1894464&r1=1894463&r2=1894464&view=diff
==============================================================================
Binary files - no diff available.

Added: 
kylin/site/blog/2021/10/21/Local-Cache-and-Soft-Affinity-Scheduling/index.html
URL: 
http://svn.apache.org/viewvc/kylin/site/blog/2021/10/21/Local-Cache-and-Soft-Affinity-Scheduling/index.html?rev=1894464&view=auto
==============================================================================
--- 
kylin/site/blog/2021/10/21/Local-Cache-and-Soft-Affinity-Scheduling/index.html 
(added)
+++ 
kylin/site/blog/2021/10/21/Local-Cache-and-Soft-Affinity-Scheduling/index.html 
Fri Oct 22 05:11:34 2021
@@ -0,0 +1,356 @@
+<!--
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+-->
+<!doctype html>
+<html>
+       <!--
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+-->
+
+<head>
+  <meta charset="utf-8">
+  <meta http-equiv="X-UA-Compatible" content="IE=edge">
+  <meta name="viewport" content="width=device-width, initial-scale=1">
+
+  <title>Apache Kylin | Performance optimization of Kylin 4.0 in cloud -- 
local cache and soft affinity scheduling</title>
+  <meta name="description" content="01 Background IntroductionRecently, the 
Apache Kylin community released Kylin 4.0.0 with a new architecture. The 
architecture of Kylin 4.0 supports the separ...">
+  <meta name="author"      content="Apache Kylin">
+  <link rel="shortcut icon" href="fav.png" type="image/png">
+
+
+
+<link rel="stylesheet" href="/assets/css/animate.css">
+<!-- Bootstrap -->
+<link rel="stylesheet" href="/assets/css/bootstrap.min.css">
+
+<!-- Fonts -->
+<!-- <link rel="stylesheet" 
href="http://fonts.googleapis.com/css?family=Alice|Open+Sans:400,300,700"> -->
+
+<!-- Icons -->
+<link rel="stylesheet" href="/assets/css/font-awesome.min.css">
+
+  <!-- Custom styles -->
+  <link rel="stylesheet" href="/assets/css/styles.css">
+  <link rel="stylesheet" href="/assets/css/docs.css">
+  <link rel="stylesheet" href="/assets/css/pygments.css">
+
+  <link rel="canonical" 
href="http://kylin.apache.org/blog/2021/10/21/Local-Cache-and-Soft-Affinity-Scheduling/";>
+  <link rel="alternate" type="application/rss+xml" title="Apache Kylin" 
href="http://kylin.apache.org/feed.xml"; />
+
+<!--[if lt IE 9]> <script src="assets/js/html5shiv.js"></script> <![endif]-->
+<!-- Global site tag (gtag.js) - Google Analytics -->
+<script async 
src="https://www.googletagmanager.com/gtag/js?id=UA-120788561-1";></script>
+<script>
+  window.dataLayer = window.dataLayer || [];
+  function gtag(){dataLayer.push(arguments);}
+  gtag('js', new Date());
+
+  gtag('config', 'UA-120788561-1');
+</script>
+<script type="text/javascript" src="/assets/js/jquery-1.9.1.min.js"></script>
+<script type="text/javascript" src="/assets/js/nside.js"></script> </script>
+<script type="text/javascript" src="/assets/js/nnav.js"></script> </script>
+<script>
+var _hmt = _hmt || [];
+(function() {
+  var hm = document.createElement("script");
+  hm.src = "https://hm.baidu.com/hm.js?bdc5e03add430c0b72cc0eb91eabfa99";;
+  var s = document.getElementsByTagName("script")[0]; 
+  s.parentNode.insertBefore(hm, s);
+})();
+</script>
+
+</head>
+
+       <body>
+               <!--
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+-->
+
+<header id="header" >
+  
+  <!-- Main Menu -->
+  <nav class="navbar navbar-default" role="navigation" id="nav-wrapper">
+    <div class="container-fluid" id="nav">
+      <!--
+      <img class="img-circle" width="40px" height="40px" id="circlelogo" 
src="/assets/images/kylin_logo.jpg">
+      -->
+      <!-- Brand and toggle get grouped for better mobile display -->
+      <div class="navbar-header">
+        <img class="navbar-logo" width="46" 
src="/assets/images/kylin_logo.png" ></img>
+        <button type="button" class="navbar-toggle collapsed" 
data-toggle="collapse" data-target="#bs-example-navbar-collapse-1">
+          <span class="sr-only">Toggle navigation</span>
+          <span class="icon-bar"></span>
+          <span class="icon-bar"></span>
+          <span class="icon-bar"></span>
+        </button>
+        <ul class="nav icon-navbar">
+            <li><a href="https://twitter.com/apachekylin"; target="_blank" 
class="fa fa-twitter fa-lg"  title="Twitter: @ApacheKylin" ></a></li>
+            <li><a href="https://github.com/apache/kylin"; target="_blank" 
class="fa fa-github-alt fa-lg" title="Github: apache/kylin" ></a></li>
+            <li><a href="https://www.facebook.com/kylinio"; target="_blank" 
class="fa fa-facebook fa-lg" title="Facebook: kylin.io" ></a></li>
+        </ul>
+      </div>
+
+      <!-- Collect the nav links, forms, and other content for toggling -->
+      <div class="navbar-collapse collapse" id="bs-example-navbar-collapse-1">
+
+        <ul class="nav navbar-nav">
+
+          <li><a href="/">Home</a></li>
+          <li>
+            <a href="/docs" class="dropdown-toggle" data-toggle="dropdown" 
role="button" aria-haspopup="true" aria-expanded="false">Docs<span 
class="caret"></span></a>
+            <ul class="dropdown-menu">
+              <li><a href="/docs/">Latest Release(Kylin 4.0.0)</a></li>
+              <li><a href="/docs31/">Kylin 3.1.2</a></li>
+              <li><a href="/docs24/">Kylin 2.4.0</a></li>
+              <li><a href="/archive/">Archive</a></li>
+            </ul>
+          </li>
+          <li><a href="/download">Download</a></li>
+          <li><a href="/community" >Community</a></li>
+          <li>
+            <a href="/development" class="dropdown-toggle" 
data-toggle="dropdown" role="button" aria-haspopup="true" 
aria-expanded="false">Development<span class="caret"></span></a>
+            <ul class="dropdown-menu">
+              <li><a href="/development40/">Kylin 4.x</a></li>
+              <li><a href="/development/">Kylin 3.x And Older Versions</a></li>
+            </ul>
+          </li>
+          <li><a href="/blog">Blog</a></li>
+          <li><a href="/cn" >中文版</a></li> 
+        </ul>     
+      </div><!-- /.navbar-collapse -->
+    </div><!-- /.container-fluid -->
+  </nav>
+
+  <div id="head" class="parallax normal-header" >
+    <div class="text-center header-apache">
+      <a href="http://apache.org/foundation/contributing.html"; title="Support 
Apache" style="margin-left: 150px;">
+        <div>
+          <img src="https://www.apache.org/images/SupportApache-small.png"; >
+        </div>
+      </a>
+    </div>  
+  </div>
+  
+ </header>
+
+               <div class="page-content main">
+                       <header style=" padding:2em 0 0 ">
+                       <div class="container" >
+                         <div style=" padding:0 4em">
+                <div class="blog-icon">
+                  <img width="30" src="/assets/images/icon_blog_w.png">
+                </div>
+                               <h4 class="index-title" style=" 
float:left;"><span>Apache Kylin™ Technical Blog</span></h4>
+                         </div>
+                       </div>
+               </div>
+
+               <div class="container blog">
+                       <div>
+                               <article class="post-content" > 
+                               <!--
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+-->
+
+<div class="post" style=" padding:2em 4em 4em 4em">
+
+  <header class="post-header">
+    <h1 class="post-title">Performance optimization of Kylin 4.0 in cloud -- 
local cache and soft affinity scheduling</h1>
+    <p class="post-meta" >Oct 21, 2021 • Yaqian Zhang</p>
+  </header>
+
+  <article class="post-content" >
+    <h2 id="background-introduction">01 Background Introduction</h2>
+<p>Recently, the Apache Kylin community released Kylin 4.0.0 with a new 
architecture. The architecture of Kylin 4.0 supports the separation of storage 
and computing, which enables kylin users to run Kylin 4.0 in a more flexible 
cloud deployment mode with flexible computing resources. With the cloud 
infrastructure, users can choose to use cheap and reliable object storage to 
store cube data, such as S3. However, in the architecture of separation of 
storage and computing, we need to consider that reading data from remote 
storage by computing nodes through the network is still a costly operation, 
which often leads to performance loss.<br />
+In order to improve the query performance of Kylin 4.0 when using cloud object 
storage as the storage, we try to introduce the local cache mechanism into the 
Kylin 4.0 query engine. When executing the query, the frequently used data is 
cached on the local disk to reduce the delay caused by pulling data from the 
remote object storage and achieve faster query response. In addition, in order 
to avoid wasting disk space when the same data is cached on a large number of 
spark executors at the same time, and the computing node can read more required 
data from the local cache, we introduce the scheduling strategy of soft 
affinity. The soft affinity strategy is to establish a corresponding 
relationship between the spark executor and the data file through some method, 
In most cases, the same data can always be read on the same executor, so as to 
improve the hit rate of the cache.</p>
+
+<h2 id="implementation-principle">02 Implementation Principle</h2>
+
+<h4 id="local-cache">1. Local Cache</h4>
+
+<p>When Kylin 4.0 executes a query, it mainly goes through the following 
stages, in which the stages where local cache can be used to improve 
performance are marked with dotted lines:</p>
+
+<p><img src="/images/blog/local-cache/Local_cache_stage.png" alt="" /></p>
+
+<ul>
+  <li>File list cache:Cache the file status on the spark driver side. When 
executing the query, the spark driver needs to read the file list and obtain 
some file information for subsequent scheduling execution. Here, the file 
status information will be cached locally to avoid frequent reading of remote 
file directories.</li>
+  <li>Data cache:Cache the data on the spark executor side. You can set the 
data cache to memory or disk. If it is set to cache to memory, you need to 
appropriately increase the executor memory to ensure that the executor has 
enough memory for data cache; If it is cached to disk, you need to set the data 
cache directory, preferably SSD disk directory.</li>
+</ul>
+
+<p>Based on the above design, different types of caches are made on the driver 
side and the executor side of the query engine of kylin 4.0. The basic 
architecture is as follows:</p>
+
+<p><img src="/images/blog/local-cache/kylin4_local_cache.png" alt="" /></p>
+
+<h4 id="soft-affinity-scheduling">2. Soft Affinity Scheduling</h4>
+
+<p>When doing data cache on the executor side, if all data is cached on all 
executors, the size of cached data will be very considerable and a great waste 
of disk space, and it is easy to cause frequent evict cache data. In order to 
maximize the cache hit rate of the spark executor, the spark driver needs to 
schedule the tasks of the same file to the same executor as far as possible 
when the resource conditions are me, so as to ensure that the data of the same 
file can be cached on a specific one or several executors, and the data can be 
read through the cache when it is read again.<br />
+To this end, we calculate the target executor list by calculating the hash 
according to the file name and then modulo with the executor num. The number of 
executors to cache is determined by the number of data cache replications 
configured by the user. Generally, the larger the number of cache replications, 
the higher the probability of hitting the cache. When the target executors are 
unreachable or have no resources for scheduling, the scheduler will fall back 
to the random scheduling mechanism of spark. This scheduling method is called 
soft affinity scheduling strategy. Although it can not guarantee 100% hit to 
the cache, it can effectively improve the cache hit rate and avoid a large 
amount of disk space wasted by full cache on the premise of minimizing 
performance loss.</p>
+
+<h2 id="related-configuration">03 Related Configuration</h2>
+
+<p>According to the above principles, we implemented the basic function of 
local cache + soft affinity scheduling in Kylin 4.0, and tested the query 
performance based on SSB data set and TPCH data set respectively.<br />
+Several important configuration items are listed here for users to understand. 
The actual configuration will be given in the attachment at the end:</p>
+
+<ul>
+  <li>Enable soft affinity 
scheduling:kylin.query.spark-conf.spark.kylin.soft-affinity.enabled</li>
+  <li>Enable local 
cache:kylin.query.spark-conf.spark.hadoop.spark.kylin.local-cache.enabled</li>
+  <li>The number of data cache replications, that is, how many executors cache 
the same data 
file:kylin.query.spark-conf.spark.kylin.soft-affinity.replications.num</li>
+  <li>Cache to memory or local directory. Set cache to memory as buff and 
cache to local as local: 
kylin.query.spark-conf.spark.hadoop.alluxio.user.client.cache.store.type</li>
+  <li>Maximum cache 
capacity:kylin.query.spark-conf.spark.hadoop.alluxio.user.client.cache.size</li>
+</ul>
+
+<h2 id="performance-benchmark">04 Performance Benchmark</h2>
+
+<p>We conducted performance tests in three scenarios under AWS EMR 
environment. When scale factor = 10, we conducted single concurrent query test 
on SSB dataset, single concurrent query test and 4 concurrent query test on 
TPCH dataset. S3 was configured as storage in the experimental group and the 
control group. Local cache and soft affinity scheduling were enabled in the 
experimental group, but not in the control group. In addition, we also compare 
the results of the experimental group with the results when HDFS is used as 
storage in the same environment, so that users can intuitively feel the 
optimization effect of local cache + soft affinity scheduling on deploying 
Kylin 4.0 on the cloud and using object storage as storage.</p>
+
+<p><img src="/images/blog/local-cache/local_cache_benchmark_result_ssb.png" 
alt="" /></p>
+
+<p><img src="/images/blog/local-cache/local_cache_benchmark_result_tpch1.png" 
alt="" /></p>
+
+<p><img src="/images/blog/local-cache/local_cache_benchmark_result_tpch4.png" 
alt="" /></p>
+
+<p>As can be seen from the above results:</p>
+
+<ol>
+  <li>In the single concurrency scenario of SSB data set, when S3 is used as 
storage, turning on the local cache and soft affinity scheduling can achieve 
about three times the performance improvement, which can be the same as that of 
HDFS, or even improved.</li>
+  <li>Under TPCH data set, when S3 is used as storage, whether single 
concurrent query or multiple concurrent query, after local cache and soft 
affinity scheduling are enabled, the performance of all queries can be greatly 
improved.</li>
+</ol>
+
+<p>However, in the comparison results of Q21 under the 4 concurrent tests of 
TPCH dataset, we observed that the results of enabling local cache and soft 
affinity scheduling are lower than those when using S3 alone as storage. Here, 
it may be that the data is not read through the cache for some reason. The 
underlying reason is not further analyzed in this test, in the subsequent 
optimization process, we will gradually improve. Moreover, because the query of 
TPCH is complex and the SQL types are different, compared with the results of 
HDFS, the performance of some SQL is improved, while the performance of some 
SQL is slightly insufficient, but generally speaking, it is very close to the 
results of HDFS as storage.<br />
+The result of this performance test is a preliminary verification of the 
performance improvement effect of local cache + soft affinity scheduling. On 
the whole, local cache + soft affinity scheduling can achieve significant 
performance improvement for both simple queries and complex queries, but there 
is a certain performance loss in the scenario of high concurrent queries.<br />
+If users use cloud object storage as Kylin 4.0 storage, they can get a good 
performance experience when local cache + soft affinity scheduling is enabled, 
which provides performance guarantee for Kylin 4.0 to use the separation 
architecture of computing and storage in the cloud.</p>
+
+<h2 id="code-implementation">05 Code Implementation</h2>
+
+<p>Since the current code implementation is still in the basic stage, there 
are still many details to be improved, such as implementing consistent hash, 
how to deal with the existing cache when the number of executors changes, so 
the author has not submitted PR to the community code base. Developers who want 
to preview in advance can view the source code through the following link:</p>
+
+<p><a 
href="https://github.com/zzcclp/kylin/commit/4e75b7fa4059dd2eaed24061fda7797fecaf2e35";>The
 code implementation of local cache and soft affinity scheduling</a></p>
+
+<h2 id="related-link">06 Related Link</h2>
+
+<p>You can view the performance test result data and specific configuration 
through the link:<br />
+<a href="https://github.com/Kyligence/kylin-tpch/issues/9";>The benchmark of 
Kylin4.0 with local cache and soft affinity scheduling</a></p>
+
+  </article>
+
+</div>
+
+
+
+
+
+                               </article>
+                       </div>
+               </div>          
+               <!--
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+-->
+
+<footer id="underfooter">
+    <div>
+        <div class="row">
+            <div class="col-md-12 widget">
+                <div class="widget-body">
+                    <div class="footer-img">
+                        <a href="http://www.apache.org";>
+                            <img id="asf-logo" height="78px" alt="Apache 
Software Foundation" src="/assets/images/apache_footer.png">
+                        </a>
+                    </div>
+                    <p style="padding-top: 11px;">
+                        The contents of this website are © 2015 Apache 
Software Foundation under the terms of the 
+                        <a href="http://www.apache.org/licenses/LICENSE-2.0";> 
Apache License v2 </a>. 
+                    </p>
+                    <p style="margin-bottom: 11px;">    
+                        Apache Kylin and its logo are trademarks of the Apache 
Software Foundation.
+                    </div>
+
+                </div>
+            </div>
+        </div>
+        <!-- /row of widgets -->
+
+    </div>
+    <div></div>
+
+</footer>
+
+       <script src="/assets/js/jquery-1.9.1.min.js"></script> 
+       <script src="/assets/js/bootstrap.min.js"></script> 
+       <script src="/assets/js/main.js"></script>
+       </body>
+</html>
+
+
+
+

Modified: kylin/site/blog/index.html
URL: 
http://svn.apache.org/viewvc/kylin/site/blog/index.html?rev=1894464&r1=1894463&r2=1894464&view=diff
==============================================================================
--- kylin/site/blog/index.html (original)
+++ kylin/site/blog/index.html Fri Oct 22 05:11:34 2021
@@ -197,6 +197,16 @@ var _hmt = _hmt || [];
             
             
             <div class="col-md-6 col-lg-6 col-xs-12">
+              <a class="blog-card" 
href="/blog/2021/10/21/Local-Cache-and-Soft-Affinity-Scheduling/">
+                <div class="blog-pic">
+                  <img width="20" src="../assets/images/icon_blog_w.png" />
+                </div>
+                <p class="blog-title">Performance optimization of Kylin 4.0 in 
cloud -- local cache and soft affinity scheduling</p>
+                <p align="left" class="post-meta">posted: Oct 21, 2021</p>
+              </a>
+            </div>
+      
+            <div class="col-md-6 col-lg-6 col-xs-12">
               <a class="blog-card" 
href="/blog/2021/08/03/How-Meituan-Dominates-Online-Shopping-with-Apache-Kylin/">
                 <div class="blog-pic">
                   <img width="20" src="../assets/images/icon_blog_w.png" />

Modified: kylin/site/cn/blog/index.html
URL: 
http://svn.apache.org/viewvc/kylin/site/cn/blog/index.html?rev=1894464&r1=1894463&r2=1894464&view=diff
==============================================================================
--- kylin/site/cn/blog/index.html (original)
+++ kylin/site/cn/blog/index.html Fri Oct 22 05:11:34 2021
@@ -199,6 +199,16 @@ var _hmt = _hmt || [];
             
             
             <div class="col-md-6 col-lg-6 col-xs-12">
+              <a class="blog-card" 
href="/cn_blog/2021/10/21/Local-Cache-and-Soft-Affinity-Scheduling/">
+                <div class="blog-pic">
+                  <img width="20" src="/assets/images/icon_blog_w.png" />
+                </div>
+                <p class="blog-title">Kylin4 
云上性能优化:本地缓存和软亲和性调度</p>
+                <p align="left" class="post-meta">posted: Oct 21, 2021</p>
+              </a>
+            </div>
+      
+            <div class="col-md-6 col-lg-6 col-xs-12">
               <a class="blog-card" 
href="/cn_blog/2021/08/03/How-Meituan-Dominates-Online-Shopping-with-Apache-Kylin/">
                 <div class="blog-pic">
                   <img width="20" src="/assets/images/icon_blog_w.png" />

Added: 
kylin/site/cn_blog/2021/10/21/Local-Cache-and-Soft-Affinity-Scheduling/index.html
URL: 
http://svn.apache.org/viewvc/kylin/site/cn_blog/2021/10/21/Local-Cache-and-Soft-Affinity-Scheduling/index.html?rev=1894464&view=auto
==============================================================================
--- 
kylin/site/cn_blog/2021/10/21/Local-Cache-and-Soft-Affinity-Scheduling/index.html
 (added)
+++ 
kylin/site/cn_blog/2021/10/21/Local-Cache-and-Soft-Affinity-Scheduling/index.html
 Fri Oct 22 05:11:34 2021
@@ -0,0 +1,343 @@
+<!--
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+-->
+<!doctype html>
+<html>
+       <!--
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+-->
+
+<head>
+  <meta charset="utf-8">
+  <meta http-equiv="X-UA-Compatible" content="IE=edge">
+  <meta name="viewport" content="width=device-width, initial-scale=1">
+
+  <title>Apache Kylin | Kylin4 
云上性能优化:本地缓存和软亲和性调度</title>
+  <meta name="description" content="01 背景介绍日前,Apache Kylin 
社区发布了全新架构的 Kylin 4.0。Kylin 4.0 
的架构支持存储和计算分离,这使得 kylin 用户可以采取更加
灵活、计算资源可以弹性伸缩的云上部署方式来运行 Kylin 
4.0。借助云上的基础设施,用户可以选择使用便宜且可靠
的对象存储来储存 cube 数...">
+  <meta name="author"      content="Apache Kylin">
+  <link rel="shortcut icon" href="fav.png" type="image/png">
+
+
+
+<link rel="stylesheet" href="/assets/css/animate.css">
+<!-- Bootstrap -->
+<link rel="stylesheet" href="/assets/css/bootstrap.min.css">
+
+<!-- Fonts -->
+<!-- <link rel="stylesheet" 
href="http://fonts.googleapis.com/css?family=Alice|Open+Sans:400,300,700"> -->
+
+<!-- Icons -->
+<link rel="stylesheet" href="/assets/css/font-awesome.min.css">
+
+  <!-- Custom styles -->
+  <link rel="stylesheet" href="/assets/css/styles.css">
+  <link rel="stylesheet" href="/assets/css/docs.css">
+  <link rel="stylesheet" href="/assets/css/pygments.css">
+
+  <link rel="canonical" 
href="http://kylin.apache.org/cn_blog/2021/10/21/Local-Cache-and-Soft-Affinity-Scheduling/";>
+  <link rel="alternate" type="application/rss+xml" title="Apache Kylin" 
href="http://kylin.apache.org/feed.xml"; />
+
+<!--[if lt IE 9]> <script src="assets/js/html5shiv.js"></script> <![endif]-->
+<!-- Global site tag (gtag.js) - Google Analytics -->
+<script async 
src="https://www.googletagmanager.com/gtag/js?id=UA-120788561-1";></script>
+<script>
+  window.dataLayer = window.dataLayer || [];
+  function gtag(){dataLayer.push(arguments);}
+  gtag('js', new Date());
+
+  gtag('config', 'UA-120788561-1');
+</script>
+<script type="text/javascript" src="/assets/js/jquery-1.9.1.min.js"></script>
+<script type="text/javascript" src="/assets/js/nside.js"></script> </script>
+<script type="text/javascript" src="/assets/js/nnav.js"></script> </script>
+<script>
+var _hmt = _hmt || [];
+(function() {
+  var hm = document.createElement("script");
+  hm.src = "https://hm.baidu.com/hm.js?bdc5e03add430c0b72cc0eb91eabfa99";;
+  var s = document.getElementsByTagName("script")[0]; 
+  s.parentNode.insertBefore(hm, s);
+})();
+</script>
+
+</head>
+
+       <body>
+               <!--
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+-->
+
+<header id="header" >
+  
+  <!-- Main Menu -->
+  <nav class="navbar navbar-default" role="navigation" id="nav-wrapper">
+    <div class="container-fluid" id="nav">
+      <!--
+      <img class="img-circle" width="40px" height="40px" id="circlelogo" 
src="/assets/images/kylin_logo.jpg">
+      -->
+      <!-- Brand and toggle get grouped for better mobile display -->
+      <div class="navbar-header">
+        <img class="navbar-logo" width="46" 
src="/assets/images/kylin_logo.png" ></img>
+        <button type="button" class="navbar-toggle collapsed" 
data-toggle="collapse" data-target="#bs-example-navbar-collapse-1">
+          <span class="sr-only">Toggle navigation</span>
+          <span class="icon-bar"></span>
+          <span class="icon-bar"></span>
+          <span class="icon-bar"></span>
+        </button>
+        <ul class="nav icon-navbar">
+            <li><a href="https://twitter.com/apachekylin"; target="_blank" 
class="fa fa-twitter fa-lg"  title="Twitter: @ApacheKylin" ></a></li>
+            <li><a href="https://github.com/apache/kylin"; target="_blank" 
class="fa fa-github-alt fa-lg" title="Github: apache/kylin" ></a></li>
+            <li><a href="https://www.facebook.com/kylinio"; target="_blank" 
class="fa fa-facebook fa-lg" title="Facebook: kylin.io" ></a></li>
+        </ul>
+      </div>
+
+      <!-- Collect the nav links, forms, and other content for toggling -->
+      <div class="navbar-collapse collapse" id="bs-example-navbar-collapse-1">
+
+        <ul class="nav navbar-nav">
+
+          <li><a href="/">Home</a></li>
+          <li>
+            <a href="/docs" class="dropdown-toggle" data-toggle="dropdown" 
role="button" aria-haspopup="true" aria-expanded="false">Docs<span 
class="caret"></span></a>
+            <ul class="dropdown-menu">
+              <li><a href="/docs/">Latest Release(Kylin 4.0.0)</a></li>
+              <li><a href="/docs31/">Kylin 3.1.2</a></li>
+              <li><a href="/docs24/">Kylin 2.4.0</a></li>
+              <li><a href="/archive/">Archive</a></li>
+            </ul>
+          </li>
+          <li><a href="/download">Download</a></li>
+          <li><a href="/community" >Community</a></li>
+          <li>
+            <a href="/development" class="dropdown-toggle" 
data-toggle="dropdown" role="button" aria-haspopup="true" 
aria-expanded="false">Development<span class="caret"></span></a>
+            <ul class="dropdown-menu">
+              <li><a href="/development40/">Kylin 4.x</a></li>
+              <li><a href="/development/">Kylin 3.x And Older Versions</a></li>
+            </ul>
+          </li>
+          <li><a href="/blog">Blog</a></li>
+          <li><a href="/cn" >中文版</a></li> 
+        </ul>     
+      </div><!-- /.navbar-collapse -->
+    </div><!-- /.container-fluid -->
+  </nav>
+
+  <div id="head" class="parallax normal-header" >
+    <div class="text-center header-apache">
+      <a href="http://apache.org/foundation/contributing.html"; title="Support 
Apache" style="margin-left: 150px;">
+        <div>
+          <img src="https://www.apache.org/images/SupportApache-small.png"; >
+        </div>
+      </a>
+    </div>  
+  </div>
+  
+ </header>
+
+               <div class="page-content main">
+                       <header style=" padding:2em 0 0 ">
+                       <div class="container" >
+                         <div style=" padding:0 4em">
+                <div class="blog-icon">
+                  <img width="30" src="/assets/images/icon_blog_w.png">
+                </div>
+                               <h4 class="index-title" style=" 
float:left;"><span>Apache Kylin™ Technical Blog</span></h4>
+                         </div>
+                       </div>
+               </div>
+
+               <div class="container blog">
+                       <div>
+                               <article class="post-content" > 
+                               <!--
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+-->
+
+<div class="post" style=" padding:2em 4em 4em 4em">
+
+  <header class="post-header">
+    <h1 class="post-title">Kylin4 
云上性能优化:本地缓存和软亲和性调度</h1>
+    <p class="post-meta" >Oct 21, 2021 • 张亚倩</p>
+  </header>
+
+  <article class="post-content" >
+    <h2 id="section">01 背景介绍</h2>
+<p>日前,Apache Kylin 社区发布了全新架构的 Kylin 4.0。Kylin 4.0 
的架构支持存储和计算分离,这使得 kylin 用户可以采取更加
灵活、计算资源可以弹性伸缩的云上部署方式来运行 Kylin 
4.0。借助云上的基础设施,用户可以选择使用便宜且可靠
的对象存储来储存 cube 数据,比如 S3 
等。不过在存储与计算分离的架构下,我们需要考虑到,计算节点通过网络从远端存储读取数据仍然是一个代价较大的操作,往往ä¼
 šå¸¦æ¥æ€§èƒ½çš„æŸè€—。<br />
+为了提高 Kylin 4.0 
在使用云上对象存储作为存储时的查询性能,我们尝试在 
Kylin 4.0 的查询引擎中引入本地缓存(Local 
Cache)机制,在执行查询时,将经常使用的数据缓存在本地磁盘,减小从远程对象存储中拉取数据带来的延迟,实现更快的查询响应;除此之外,为了避å
…åŒæ ·çš„æ•°æ®åœ¨å¤§é‡ spark executor 
上同时缓存浪费磁盘空间,并且计算节点可以更多的从本地缓存读取所需数据,我们引å
…¥äº† 软äº�
 �和性(Soft Affinity 
)的调度策略,所谓软亲和性策略,就是通过某种方法在 
spark executor 和数据文件之间建立对应关系,使得同æ 
·çš„æ•°æ®åœ¨å¤§éƒ¨åˆ†æƒ…况下能够总是在同一个 executor 
上面读取,从而提高缓存的命中率。</p>
+
+<h2 id="section-1">02 实现原理</h2>
+
+<h4 id="section-2">1.本地缓存</h4>
+<p>在 Kylin 4.0 执行查询时,主要经过以下几个阶段,å…
¶ä¸­ç”¨è™šçº¿æ ‡æ³¨å‡ºäº†å¯ä»¥ä½¿ç”¨æœ¬åœ°ç¼“存来提升性能的阶段:</p>
+
+<p><img src="/images/blog/local-cache/Local_cache_stage.png" alt="" /></p>
+
+<ul>
+  <li>File list cache:在 spark driver 端对 file status 
进行缓存。在执行查询时,spark driver 
需要读取文件列表,获取一些文件信息进行后续的调度执行,这里会将
 file status 信息缓存到本地避免频繁读取远程文件目录。</li>
+  <li>Data cache:在 spark executor 
端对数据进行缓存。用户可以设置将数据缓存到内
存或是磁盘,若设置为缓存到内存,则需要适当调大 executor 
memory,保证 executor 有足够的内
存可以进行数据缓存;若是缓存到磁盘,需要用户设置数据缓存目录,最好设置为
 SSD 
磁盘目录。除此之外,缓存数据的最大容量、备份数量等均可由用户é
…ç½®è°ƒæ•´ã€‚</li>
+</ul>
+
+<p>基于以上设计,在 Kylin 4.0 的查询引擎 sparder 的 driver 
端和 executor 端分别做不同类型的缓存,基本架构如下:</p>
+
+<p><img src="/images/blog/local-cache/kylin4_local_cache.png" alt="" /></p>
+
+<h4 id="section-3">2.软亲和性调度</h4>
+<p>在 executor 端做 data cache 时,如果在所有的 executor 
上都缓存å…
¨éƒ¨çš„æ•°æ®ï¼Œé‚£ä¹ˆç¼“存数据的大小将会非常可观,极大的浪费磁盘空间,同时也容易导致缓存数据被频繁æ¸
…理。为了最大化 spark executor 的缓存命中率,spark driver 
需要将同一文件的 task 在资源条件满足的情
况下尽可能调度到同样的 executor,这æ 
·å¯ä»¥ä¿è¯ç›¸åŒæ–‡ä»¶çš„æ•°æ®èƒ½å¤Ÿç¼“存在特定的某个或者某几个 
executor 上,再次读取时便可以通过缓存读取数据ã
 €‚<br />
+为此,我们采取根据文件名计算 hash 之后再与 executors num 
取模的结果来计算目标 executor 列表,在多少个 executor 
上面做缓存由用户配置的缓存备份数量决定,一般情
况下,缓存备份数量越大,击中缓存的概率越高。当目标 
executor 均不可达或者没有资源供调度时,调度程序将回退到 
spark 
的随机调度机制上。这种调度方式便称为软亲和性调度策略,它虽然不能保证
 100% 击中缓存,但能够有效提高缓存命ä�
 �­çŽ‡ï¼Œåœ¨å°½é‡ä¸æŸå¤±æ€§èƒ½çš„å‰æä¸‹é¿å… full cache 
浪费大量磁盘空间。</p>
+
+<h2 id="section-4">03 相关配置</h2>
+<p>根据以上原理,我们在 Kylin 4.0 
中实现了本地缓存+软亲和性调度的基础功能,并分别基于 
ssb 数据集和 tpch 数据集做了查询性能测试。<br />
+这里列出几个比较重要的配置项供用户了解,实际使用的é…
ç½®å°†åœ¨ç»“尾链接中给出:<br />
+- 
是否开启软亲和性调度策略:kylin.query.spark-conf.spark.kylin.soft-affinity.enabled<br
 />
+- 
是否开启本地缓存:kylin.query.spark-conf.spark.hadoop.spark.kylin.local-cache.enabled<br
 />
+- Data cache 的备份数量,即在多少个 executor 
上对同一数据文件进行缓存:kylin.query.spark-conf.spark.kylin.soft-affinity.replications.num<br
 />
+- 缓存到内存中还是本地目录,缓存到内存设置为 
BUFF,缓存到本地设置为 
LOCAL:kylin.query.spark-conf.spark.hadoop.alluxio.user.client.cache.store.type<br
 />
+- 
最大缓存容量:kylin.query.spark-conf.spark.hadoop.alluxio.user.client.cache.size</p>
+
+<h2 id="section-5">04 性能对比</h2>
+<p>我们在 AWS EMR 环境下进行了 3 种场景的性能测试,在 scale 
factor = 10的情况下,对 ssb 数据集进行单并发查询测试、tpch 
数据集进行单并发查询以及 4 并发查询测试,实验组和对ç…
§ç»„均配置 s3 
作为存储,在实验组中开启本地缓存和软亲和性调度,对ç…
§ç»„则不开启。除此之外,我们还将实验组结果与相同环境下 
hdfs 
作为存储时的结果进行对比,以便用户可以直观的感受到 
本地缓存+软亲和性调度 对云ä
 ¸Šéƒ¨ç½² Kylin 4.0 
使用对象存储作为存储场景下的优化效果。</p>
+
+<p><img src="/images/blog/local-cache/local_cache_benchmark_result_ssb.png" 
alt="" /></p>
+
+<p><img src="/images/blog/local-cache/local_cache_benchmark_result_tpch1.png" 
alt="" /></p>
+
+<p><img src="/images/blog/local-cache/local_cache_benchmark_result_tpch4.png" 
alt="" /></p>
+
+<p>从以上结果可以看出:<br />
+1. 在 ssb 10 数据集单并发场景下,使用 s3 
作为存储时,开启本地缓存和软亲和性调度能够获得3倍左右的性能提升,可以达到与
 hdfs 作为存储时的相同性能甚至还有 5% 左右的提升。<br />
+2. 在 tpch 10 数据集下,使用 s3 作为存储时,无
论是单并发查询还是多并发查询,开启本地缓存和软亲和性调度后,基本在所有查询中都能够获得大å¹
…度的性能提升。</p>
+
+<p>不过在 tpch 10 数据集的 4 并发测试下的 Q21 
的对比结果中,我们观察到,开启本地缓存和软亲和性调度的结果反而比单独使用
 s3 作为存储时有所下降,这里可能是由于某种原因
导致没有通过缓存读取数据,深层原因
在此次测试中没有进行进一步的分析,在后续的优化过程中我们会逐步改进。由于
 tpch 的查询比较复杂且 SQL 类型各异,与 hdfs 
作为存储时的结果相比,仍然有部分 sql 的性能略有ä�
 �è¶³ï¼Œä¸è¿‡æ€»ä½“来说已经与 hdfs 的结果比较接近。<br />
+本次性能测试的结果是一次对 本地缓存+软亲和性调度 
性能提升效果的初步验证,从总体上来看,本地缓存+软亲和性调度
 无
论对于简单查询还是复杂查询都能够获得明显的性能提升,但是在高并发查询场景下存在一定的性能损失。<br
 />
+如果用户使用云上对象存储作为 Kylin 4.0 的存储,在开启 
本地缓存+ 软亲和性调度的情
况下,是可以获得很好的性能体验的,这为 Kylin 4.0 
在云上使用计算和存储分离架构提供了性能保障。</p>
+
+<h2 id="section-6">05 代码实现</h2>
+<p>由于目前的代ç 
å®žçŽ°è¿˜å¤„äºŽæ¯”è¾ƒåŸºç¡€çš„é˜¶æ®µï¼Œè¿˜æœ‰è®¸å¤šç»†èŠ‚éœ€è¦å®Œå–„ï¼Œæ¯”å¦‚å®žçŽ°ä¸€è‡´æ€§å“ˆå¸Œã€å½“
 executor 数量发生变化时如何处理已有 cache 等,所以作者
还未向社区代码库提交 PR,想要提前预览的开发者
可以通过下面的链接查看源码:<br />
+<a 
href="https://github.com/zzcclp/kylin/commit/4e75b7fa4059dd2eaed24061fda7797fecaf2e35";>Kylin4.0
 本地缓存+软亲和性调度代码实现</a></p>
+
+<h2 id="section-7">06 相关链接</h2>
+<p>通过链接可查阅性能测试结果数据和具体配置:<br />
+<a href="https://github.com/Kyligence/kylin-tpch/issues/9";>Kylin4.0 
本地缓存+软亲和性调度测试</a></p>
+
+  </article>
+
+</div>
+
+
+
+
+
+                               </article>
+                       </div>
+               </div>          
+               <!--
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+-->
+
+<footer id="underfooter">
+    <div>
+        <div class="row">
+            <div class="col-md-12 widget">
+                <div class="widget-body">
+                    <div class="footer-img">
+                        <a href="http://www.apache.org";>
+                            <img id="asf-logo" height="78px" alt="Apache 
Software Foundation" src="/assets/images/apache_footer.png">
+                        </a>
+                    </div>
+                    <p style="padding-top: 11px;">
+                        The contents of this website are © 2015 Apache 
Software Foundation under the terms of the 
+                        <a href="http://www.apache.org/licenses/LICENSE-2.0";> 
Apache License v2 </a>. 
+                    </p>
+                    <p style="margin-bottom: 11px;">    
+                        Apache Kylin and its logo are trademarks of the Apache 
Software Foundation.
+                    </div>
+
+                </div>
+            </div>
+        </div>
+        <!-- /row of widgets -->
+
+    </div>
+    <div></div>
+
+</footer>
+
+       <script src="/assets/js/jquery-1.9.1.min.js"></script> 
+       <script src="/assets/js/bootstrap.min.js"></script> 
+       <script src="/assets/js/main.js"></script>
+       </body>
+</html>
+
+
+
+


Reply via email to