Modified: zeppelin/site/docs/0.9.0-SNAPSHOT/search_data.json
URL: 
http://svn.apache.org/viewvc/zeppelin/site/docs/0.9.0-SNAPSHOT/search_data.json?rev=1876141&r1=1876140&r2=1876141&view=diff
==============================================================================
--- zeppelin/site/docs/0.9.0-SNAPSHOT/search_data.json (original)
+++ zeppelin/site/docs/0.9.0-SNAPSHOT/search_data.json Sun Apr  5 05:06:30 2020
@@ -1,5 +1,5 @@
 {
-  
+
 
     "/interpreter/livy.html": {
       "title": "Livy Interpreter for Apache Zeppelin",
@@ -9,8 +9,19 @@
       "excerpt": "Livy is an open source REST interface for interacting with 
Spark from anywhere. It supports executing snippets of code or programs in a 
Spark context that runs locally or in YARN."
     }
     ,
-    
-  
+
+
+
+    "/interpreter/ksql.html": {
+      "title": "KSQL Interpreter for Apache Zeppelin",
+      "content"  : "<!--Licensed under the Apache License, Version 2.0 (the 
"License");you may not use this file except in compliance with the 
License.You may obtain a copy of the License 
athttp://www.apache.org/licenses/LICENSE-2.0Unless required by applicable law 
or agreed to in writing, softwaredistributed under the License is distributed 
on an "AS IS" BASIS,WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
either express or implied.See the License for the specific language governing 
permissions andlimitations under the License.-->KSQL Interpreter for Apache 
ZeppelinOverviewKSQL is the streaming SQL engine for Apache Kafka®. It 
provides an easy-to-use yet powerful interactive SQL interface for stream 
processing on Kafka,Configuration            Property      Default      
Description                  ksql.url      http://localhost:8080      The KSQL 
Endpoint base URL      N.b. The interpreter supports all the KSQL properties, 
i.e. ksql.streams.auto.offset.
 reset.The full list of KSQL parameters is here.Using the KSQL InterpreterIn a 
paragraph, use %ksql and start your SQL query in order to start to interact 
with KSQL.Following some examples:%ksqlPRINT 
'orders';%ksqlCREATE STREAM ORDERS WITH  
(VALUE_FORMAT='AVRO',   KAFKA_TOPIC 
='orders');%ksqlSELECT *FROM ORDERSLIMIT 10",
+      "url": " /interpreter/ksql.html",
+      "group": "interpreter",
+      "excerpt": "SQL is the streaming SQL engine for Apache Kafka and 
provides an easy-to-use yet powerful interactive SQL interface for stream 
processing on Kafka."
+    }
+    ,
+
+
 
     "/interpreter/pig.html": {
       "title": "Pig Interpreter for Apache Zeppelin",
@@ -20,8 +31,8 @@
       "excerpt": "Apache Pig is a platform for analyzing large data sets that 
consists of a high-level language for expressing data analysis programs, 
coupled with infrastructure for evaluating these programs."
     }
     ,
-    
-  
+
+
 
     "/interpreter/markdown.html": {
       "title": "Markdown Interpreter for Apache Zeppelin",
@@ -31,8 +42,8 @@
       "excerpt": "Markdown is a plain text formatting syntax designed so that 
it can be converted to HTML. Apache Zeppelin uses markdown4j."
     }
     ,
-    
-  
+
+
 
     "/interpreter/submarine.html": {
       "title": "Apache Hadoop Submarine Interpreter for Apache Zeppelin",
@@ -42,8 +53,8 @@
       "excerpt": "Hadoop Submarine is the latest machine learning framework 
subproject in the Hadoop 3.1 release. It allows Hadoop to support Tensorflow, 
MXNet, Caffe, Spark, etc."
     }
     ,
-    
-  
+
+
 
     "/interpreter/mahout.html": {
       "title": "Mahout Interpreter for Apache Zeppelin",
@@ -53,30 +64,41 @@
       "excerpt": "Apache Mahout provides a unified API (the R-Like Scala DSL) 
for quickly creating machine learning algorithms on a variety of engines."
     }
     ,
-    
-  
+
+
+
+    "/interpreter/kotlin.html": {
+      "title": "Kotlin interpreter in Apache Zeppelin",
+      "content"  : "<!--Licensed under the Apache License, Version 2.0 (the 
"License");you may not use this file except in compliance with the 
License.You may obtain a copy of the License 
athttp://www.apache.org/licenses/LICENSE-2.0Unless required by applicable law 
or agreed to in writing, softwaredistributed under the License is distributed 
on an "AS IS" BASIS,WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
either express or implied.See the License for the specific language governing 
permissions andlimitations under the License.-->Kotlin interpreter for 
Apache ZeppelinOverviewKotlin is a cross-platform, statically typed, 
general-purpose programming language with type inference.It is designed to 
interoperate fully with Java, and the JVM version of its standard library 
depends on the Java Class Library, but type inference allows its syntax to be 
more concise.Configuration                Name        Default        
Description                        zeppelin.kot
 lin.maxResult        1000        Max n                    
zeppelin.kotlin.shortenTypes        true        Display shortened types instead 
of full, e.g. Int vs kotlin.Int        Example%kotlin fun square(n: Int): Int = 
n * nKotlin ContextKotlin context is accessible via kc object bound to the 
interpreter. It holds vars and functions fields that return all user-defined 
variables and functions present in the interpreter.You can also print variables 
or functions by calling kc.showVars() or kc.showFunctions().Examplefun 
square(n: Int): Int = n * nval greeter = { s: String -> 
println("Hello $s!") }val l = 
listOf("Drive", "to", 
"develop")kc.showVars()kc.showFunctions()Output:l: 
List<String> = [Drive, to, develop]greeter: (String) -> 
Unit = (kotlin.String) -> kotlin.Unitfun square(Int): Int",
+      "url": " /interpreter/kotlin.html",
+      "group": "interpreter",
+      "excerpt": "Kotlin is a cross-platform, statically typed, 
general-purpose programming language with type inference."
+    }
+    ,
+
+
 
     "/interpreter/spark.html": {
       "title": "Apache Spark Interpreter for Apache Zeppelin",
-      "content"  : "<!--Licensed under the Apache License, Version 2.0 (the 
"License");you may not use this file except in compliance with the 
License.You may obtain a copy of the License 
athttp://www.apache.org/licenses/LICENSE-2.0Unless required by applicable law 
or agreed to in writing, softwaredistributed under the License is distributed 
on an "AS IS" BASIS,WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
either express or implied.See the License for the specific language governing 
permissions andlimitations under the License.-->Spark Interpreter for Apache 
ZeppelinOverviewApache Spark is a fast and general-purpose cluster computing 
system.It provides high-level APIs in Java, Scala, Python and R, and an 
optimized engine that supports general execution graphs.Apache Spark is 
supported in Zeppelin with Spark interpreter group which consists of below five 
interpreters.      Name    Class    Description        %spark    
SparkInterpreter    Creates a SparkConte
 xt and provides a Scala environment        %spark.pyspark    
PySparkInterpreter    Provides a Python environment        %spark.r    
SparkRInterpreter    Provides an R environment with SparkR support        
%spark.sql    SparkSQLInterpreter    Provides a SQL environment        
%spark.dep    DepInterpreter    Dependency loader  ConfigurationThe Spark 
interpreter can be configured with properties provided by Zeppelin.You can also 
set other Spark properties which are not listed in the table. For a list of 
additional properties, refer to Spark Available Properties.      Property    
Default    Description        args        Spark commandline args      master    
local[*]    Spark master uri.  ex) spark://masterhost:7077      spark.app.name  
  Zeppelin    The name of spark application.        spark.cores.max        
Total number of cores to use.  Empty value uses all available core.        
spark.executor.memory     1g    Executor memory per worker instance.  ex) 512m, 
32g        zeppelin.dep
 .additionalRemoteRepository    spark-packages,  
http://dl.bintray.com/spark-packages/maven,  false;    A list of 
id,remote-repository-URL,is-snapshot;  for each remote repository.        
zeppelin.dep.localrepo    local-repo    Local repository for dependency loader  
      PYSPARK_PYTHON    python    Python binary executable to use for PySpark 
in both driver and workers (default is python).            Property 
spark.pyspark.python take precedence if it is set        PYSPARK_DRIVER_PYTHON  
  python    Python binary executable to use for PySpark in driver only (default 
is PYSPARK_PYTHON).            Property spark.pyspark.driver.python take 
precedence if it is set        zeppelin.spark.concurrentSQL    false    Execute 
multiple SQL concurrently if set true.        zeppelin.spark.concurrentSQL.max  
  10    Max number of SQL concurrently executed        zeppelin.spark.maxResult 
   1000    Max number of Spark SQL result to display.        
zeppelin.spark.printREPLOutput    true    Print RE
 PL output        zeppelin.spark.useHiveContext    true    Use HiveContext 
instead of SQLContext if it is true.        zeppelin.spark.importImplicit    
true    Import implicits, UDF collection, and sql if set true.        
zeppelin.spark.enableSupportedVersionCheck    true    Do not change - developer 
only setting, not for production use        zeppelin.spark.sql.interpolation    
false    Enable ZeppelinContext variable interpolation into paragraph text      
zeppelin.spark.uiWebUrl        Overrides Spark UI default URL. Value should be 
a full URL (ex: http://{hostName}/{uniquePath}    zeppelin.spark.scala.color    
true    Whether to enable color output of spark scala interpreter  Without any 
configuration, Spark interpreter works out of box in local mode. But if you 
want to connect to your Spark cluster, you'll need to follow below two 
simple steps.1. Export SPARK_HOMEIn conf/zeppelin-env.sh, export SPARK_HOME 
environment variable with your Spark installation path.For example,
 export SPARK_HOME=/usr/lib/sparkYou can optionally set more environment 
variables# set hadoop conf direxport HADOOP_CONF_DIR=/usr/lib/hadoop# set 
options to pass spark-submit commandexport 
SPARK_SUBMIT_OPTIONS="--packages 
com.databricks:spark-csv_2.10:1.2.0"# extra classpath. e.g. set 
classpath for hive-site.xmlexport 
ZEPPELIN_INTP_CLASSPATH_OVERRIDES=/etc/hive/confFor Windows, ensure you have 
winutils.exe in %HADOOP_HOME%bin. Please see Problems running Hadoop on Windows 
for the details.2. Set master in Interpreter menuAfter start Zeppelin, go to 
Interpreter menu and edit master property in your Spark interpreter setting. 
The value may vary depending on your Spark cluster deployment type.For 
example,local[*] in local modespark://master:7077 in standalone 
clusteryarn-client in Yarn client modeyarn-cluster in Yarn cluster 
modemesos://host:5050 in Mesos clusterThat's it. Zeppelin will work 
with any version of Spark and any deployment type without rebuilding Z
 eppelin in this way.For the further information about Spark & Zeppelin 
version compatibility, please refer to "Available 
Interpreters" section in Zeppelin download page.Note that without 
exporting SPARK_HOME, it's running in local mode with included version 
of Spark. The included version may vary depending on the build profile.3. Yarn 
modeZeppelin support both yarn client and yarn cluster mode (yarn cluster mode 
is supported from 0.8.0). For yarn mode, you must specify SPARK_HOME & 
HADOOP_CONF_DIR.You can either specify them in zeppelin-env.sh, or in 
interpreter setting page. Specifying them in zeppelin-env.sh means you can use 
only one version of spark & hadoop. Specifying themin interpreter 
setting page means you can use multiple versions of spark & hadoop in 
one zeppelin instance.4. New Version of SparkInterpreterStarting from 0.9, we 
totally removed the old spark interpreter implementation, and make the new 
spark interpre
 ter as the official spark interpreter.SparkContext, SQLContext, SparkSession, 
ZeppelinContextSparkContext, SQLContext and ZeppelinContext are automatically 
created and exposed as variable names sc, sqlContext and z, respectively, in 
Scala, Python and R environments.Staring from 0.6.1 SparkSession is available 
as variable spark when you are using Spark 2.x.Note that Scala/Python/R 
environment shares the same SparkContext, SQLContext and ZeppelinContext 
instance. How to pass property to SparkConfThere're 2 kinds of 
properties that would be passed to SparkConfStandard spark property (prefix 
with spark.). e.g. spark.executor.memory will be passed to 
SparkConfNon-standard spark property (prefix with zeppelin.spark.).  e.g. 
zeppelin.spark.property_1, property_1 will be passed to SparkConfDependency 
ManagementFor spark interpreter, you should not use Zeppelin's 
Dependency Management for managing third party dependencies, (%spark.dep also 
is not the recommended approach star
 ting from Zeppelin 0.8). Instead you should set spark properties (spark.jars, 
spark.files, spark.jars.packages) in 2 ways.      spark-defaults.conf    
SPARK_SUBMIT_OPTIONS    Description        spark.jars    --jars    
Comma-separated list of local jars to include on the driver and executor 
classpaths.        spark.jars.packages    --packages    Comma-separated list of 
maven coordinates of jars to include on the driver and executor classpaths. 
Will search the local maven repo, then maven central and any additional remote 
repositories given by --repositories. The format for the coordinates should be 
groupId:artifactId:version.        spark.files    --files    Comma-separated 
list of files to be placed in the working directory of each executor.  1. Set 
spark properties in zeppelin side.In zeppelin side, you can either set them in 
spark interpreter setting page or via Generic ConfInterpreter.It is not 
recommended to set them in SPARK_SUBMIT_OPTIONS. Because it will be shared by 
all spar
 k interpreters, you can not set different dependencies for different users.2. 
Set spark properties in spark side.In spark side, you can set them in 
spark-defaults.conf.e.g.    spark.jars        /path/mylib1.jar,/path/mylib2.jar 
   spark.jars.packages   com.databricks:spark-csv_2.10:1.2.0    spark.files     
  /path/mylib1.py,/path/mylib2.egg,/path/mylib3.zipZeppelinContextZeppelin 
automatically injects ZeppelinContext as variable z in your Scala/Python 
environment. ZeppelinContext provides some additional functions and 
utilities.See Zeppelin-Context for more details.Matplotlib Integration 
(pyspark)Both the python and pyspark interpreters have built-in support for 
inline visualization using matplotlib,a popular plotting library for python. 
More details can be found in the python interpreter documentation,since 
matplotlib support is identical. More advanced interactive plotting can be done 
with pyspark throughutilizing Zeppelin's built-in Angular Display 
System, as shown below:
 Running spark sql concurrentlyBy default, each sql statement would run 
sequentially in %spark.sql. But you can run them concurrently by following 
setup.set zeppelin.spark.concurrentSQL to true to enable the sql concurrent 
feature, underneath zeppelin will change to use fairscheduler for spark. And 
also set zeppelin.spark.concurrentSQL.max to control the max number of sql 
statements running concurrently.configure pools by creating fairscheduler.xml 
under your SPARK_CONF_DIR, check the offical spark doc Configuring Pool 
Propertiesset pool property via setting paragraph property. 
e.g.%spark(pool=pool1)sql statementThis feature is available for both all 
versions of scala spark, pyspark. For sparkr, it is only available starting 
from 2.3.0.Interpreter setting optionYou can choose one of shared, scoped and 
isolated options wheh you configure Spark interpreter.Spark interpreter creates 
separated Scala compiler per each notebook but share a single SparkContext in 
scoped mode (experimental).
 It creates separated SparkContext per each notebook in isolated mode.IPython 
supportBy default, zeppelin would use IPython in pyspark when IPython is 
available, Otherwise it would fall back to the original PySpark 
implementation.If you don't want to use IPython, then you can set 
zeppelin.pyspark.useIPython as false in interpreter setting. For the IPython 
features, you can refer docPython InterpreterSetting up Zeppelin with 
KerberosLogical setup with Zeppelin, Kerberos Key Distribution Center (KDC), 
and Spark on YARN:Deprecate Spark 2.2 and earlier versionsStarting from 0.9, 
Zeppelin deprecate Spark 2.2 and earlier versions. So you will see a warning 
message when you use Spark 2.2 and earlier.You can get rid of this message by 
setting zeppelin.spark.deprecatedMsg.show to false.Configuration SetupOn the 
server that Zeppelin is installed, install Kerberos client modules and 
configuration, krb5.conf.This is to make the server communicate with KDC.Set 
SPARK_HOME in [ZEPPELIN_HOME
 ]/conf/zeppelin-env.sh to use spark-submit(Additionally, you might have to set 
export HADOOP_CONF_DIR=/etc/hadoop/conf)Add the two properties below to Spark 
configuration 
([SPARK_HOME]/conf/spark-defaults.conf):spark.yarn.principalspark.yarn.keytabNOTE:
 If you do not have permission to access for the above spark-defaults.conf 
file, optionally, you can add the above lines to the Spark Interpreter setting 
through the Interpreter tab in the Zeppelin UI.That's it. Play with 
Zeppelin!",
+      "content"  : "<!--Licensed under the Apache License, Version 2.0 (the 
"License");you may not use this file except in compliance with the 
License.You may obtain a copy of the License 
athttp://www.apache.org/licenses/LICENSE-2.0Unless required by applicable law 
or agreed to in writing, softwaredistributed under the License is distributed 
on an "AS IS" BASIS,WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
either express or implied.See the License for the specific language governing 
permissions andlimitations under the License.-->Spark Interpreter for Apache 
ZeppelinOverviewApache Spark is a fast and general-purpose cluster computing 
system.It provides high-level APIs in Java, Scala, Python and R, and an 
optimized engine that supports general execution graphs.Apache Spark is 
supported in Zeppelin with Spark interpreter group which consists of below six 
interpreters.      Name    Class    Description        %spark    
SparkInterpreter    Creates a SparkContex
 t/SparkSession and provides a Scala environment        %spark.pyspark    
PySparkInterpreter    Provides a Python environment        %spark.ipyspark    
IPySparkInterpreter    Provides a IPython environment        %spark.r    
SparkRInterpreter    Provides an R environment with SparkR support        
%spark.sql    SparkSQLInterpreter    Provides a SQL environment        
%spark.kotlin    KotlinSparkInterpreter    Provides a Kotlin environment  
ConfigurationThe Spark interpreter can be configured with properties provided 
by Zeppelin.You can also set other Spark properties which are not listed in the 
table. For a list of additional properties, refer to Spark Available 
Properties.      Property    Default    Description        SPARK_HOME        
Location of spark distribution        master    local[*]    Spark master uri.  
e.g. spark://masterhost:7077      spark.app.name    Zeppelin    The name of 
spark application.        spark.driver.cores    1    Number of cores to use for 
the driver proc
 ess, only in cluster mode.        spark.driver.memory    1g    Amount of 
memory to use for the driver process, i.e. where SparkContext is initialized, 
in the same format as JVM memory strings with a size unit suffix 
("k", "m", "g" or 
"t") (e.g. 512m, 2g).        spark.executor.cores    1    The 
number of cores to use on each executor        spark.executor.memory    1g    
Executor memory per worker instance.  e.g. 512m, 32g        spark.files        
Comma-separated list of files to be placed in the working directory of each 
executor. Globs are allowed.        spark.jars        Comma-separated list of 
jars to include on the driver and executor classpaths. Globs are allowed.       
 spark.jars.packages        Comma-separated list of Maven coordinates of jars 
to include on the driver and executor classpaths. The coordinates should be 
groupId:artifactId:version. If spark.jars.ivySettings is given artifacts will 
be res
 olved according to the configuration in the file, otherwise artifacts will be 
searched for in the local maven repo, then maven central and finally any 
additional remote repositories given by the command-line option --repositories. 
       PYSPARK_PYTHON    python    Python binary executable to use for PySpark 
in both driver and executors (default is python).            Property 
spark.pyspark.python take precedence if it is set        PYSPARK_DRIVER_PYTHON  
  python    Python binary executable to use for PySpark in driver only (default 
is PYSPARK_PYTHON).            Property spark.pyspark.driver.python take 
precedence if it is set        zeppelin.pyspark.useIPython    false    Whether 
use IPython when the ipython prerequisites are met in %spark.pyspark        
zeppelin.R.cmd    R    R binary executable path.        
zeppelin.spark.concurrentSQL    false    Execute multiple SQL concurrently if 
set true.        zeppelin.spark.concurrentSQL.max    10    Max number of SQL 
concurrently execu
 ted        zeppelin.spark.maxResult    1000    Max number rows of Spark SQL 
result to display.        zeppelin.spark.printREPLOutput    true    Print scala 
REPL output        zeppelin.spark.useHiveContext    true    Use HiveContext 
instead of SQLContext if it is true. Enable hive for SparkSession        
zeppelin.spark.enableSupportedVersionCheck    true    Do not change - developer 
only setting, not for production use        zeppelin.spark.sql.interpolation    
false    Enable ZeppelinContext variable interpolation into spark sql      
zeppelin.spark.uiWebUrl        Overrides Spark UI default URL. Value should be 
a full URL (ex: http://{hostName}/{uniquePath}    spark.webui.yarn.useProxy    
false    whether use yarn proxy url as spark weburl, e.g. 
http://localhost:8088/proxy/application1583396598068_0004  Without any 
configuration, Spark interpreter works out of box in local mode. But if you 
want to connect to your Spark cluster, you'll need to follow below two 
simple steps.Ex
 port SPARK_HOMEThere are several options for setting SPARK_HOME.Set SPARK_HOME 
in zeppelin-env.shSet SPARK_HOME in Interpreter setting pageSet SPARK_HOME via 
inline generic configuration 1. Set SPARK_HOME in zeppelin-env.shIf you work 
with only one version of spark, then you can set SPARK_HOME in zeppelin-env.sh 
because any setting in zeppelin-env.sh is globally applied.e.g. export 
SPARK_HOME=/usr/lib/sparkYou can optionally set more environment variables in 
zeppelin-env.sh# set hadoop conf direxport HADOOP_CONF_DIR=/usr/lib/hadoop2. 
Set SPARK_HOME in Interpreter setting pageIf you want to use multiple versions 
of spark, then you need create multiple spark interpreters and set SPARK_HOME 
for each of them. e.g.Create a new spark interpreter spark24 for spark 2.4 and 
set SPARK_HOME in interpreter setting pageCreate a new spark interpreter 
spark16 for spark 1.6 and set SPARK_HOME in interpreter setting page3. Set 
SPARK_HOME via inline generic configurationBesides setting SPARK_HOME in 
 interpreter setting page, you can also use inline generic configuration to put 
the configuration with code together for more flexibility. e.g.Set master in 
Interpreter menuAfter starting Zeppelin, go to Interpreter menu and edit master 
property in your Spark interpreter setting. The value may vary depending on 
your Spark cluster deployment type.For example,local[*] in local 
modespark://master:7077 in standalone clusteryarn-client in Yarn client 
modeyarn-cluster in Yarn cluster modemesos://host:5050 in Mesos 
clusterThat's it. Zeppelin will work with any version of Spark and any 
deployment type without rebuilding Zeppelin in this way.For the further 
information about Spark & Zeppelin version compatibility, please refer 
to "Available Interpreters" section in Zeppelin download 
page.Note that without exporting SPARK_HOME, it's running in local mode 
with included version of Spark. The included version may vary depending on the 
build profile.SparkC
 ontext, SQLContext, SparkSession, ZeppelinContextSparkContext, SQLContext, 
SparkSession (for spark 2.x) and ZeppelinContext are automatically created and 
exposed as variable names sc, sqlContext, spark and z, respectively, in Scala, 
Kotlin, Python and R environments.Note that Scala/Python/R environment shares 
the same SparkContext, SQLContext, SparkSession and ZeppelinContext 
instance.YARN ModeZeppelin support both yarn client and yarn cluster mode (yarn 
cluster mode is supported from 0.8.0). For yarn mode, you must specify 
SPARK_HOME & HADOOP_CONF_DIR. Usually you only have one hadoop cluster, 
so you can set HADOOP_CONF_DIR in zeppelin-env.sh which is applied to all spark 
interpreters. If you want to use spark against multiple hadoop cluster, then 
you need to defineHADOOP_CONF_DIR in interpreter setting or via inline generic 
configuration.Dependency ManagementFor spark interpreter, it is not recommended 
to use Zeppelin's Dependency Management for managing third part
 y dependencies (%spark.dep is removed from Zeppelin 0.9 as well). Instead you 
should set the standard Spark properties.      Spark Property    Spark Submit 
Argument    Description        spark.files    --files    Comma-separated list 
of files to be placed in the working directory of each executor. Globs are 
allowed.        spark.jars    --jars    Comma-separated list of jars to include 
on the driver and executor classpaths. Globs are allowed.        
spark.jars.packages    --packages    Comma-separated list of Maven coordinates 
of jars to include on the driver and executor classpaths. The coordinates 
should be groupId:artifactId:version. If spark.jars.ivySettings is given 
artifacts will be resolved according to the configuration in the file, 
otherwise artifacts will be searched for in the local maven repo, then maven 
central and finally any additional remote repositories given by the 
command-line option --repositories.  You can either set Spark properties in 
interpreter setting page 
 or set Spark submit arguments in zeppelin-env.sh via environment variable 
SPARK_SUBMIT_OPTIONS. For examples:export 
SPARK_SUBMIT_OPTIONS="--files <my_file> --jars 
<my_jar> --packages <my_package>"But it is 
not recommended to set them in SPARK_SUBMIT_OPTIONS. Because it will be shared 
by all spark interpreters, which means you can not set different dependencies 
for different users.PySparkThere're 2 ways to use PySpark in 
Zeppelin:Vanilla PySparkIPySparkVanilla PySpark (Not Recommended)Vanilla 
PySpark interpreter is almost the same as vanilla Python interpreter except 
Zeppelin inject SparkContext, SQLContext, SparkSession via variables sc, 
sqlContext, spark.By default, Zeppelin would use IPython in %spark.pyspark when 
IPython is available, Otherwise it would fall back to the original PySpark 
implementation.If you don't want to use IPython, then you can set 
zeppelin.pyspark.useIPython as false in interpreter se
 tting. For the IPython features, you can refer docPython InterpreterIPySpark 
(Recommended)You can use IPySpark explicitly via %spark.ipyspark. IPySpark 
interpreter is almost the same as IPython interpreter except Zeppelin inject 
SparkContext, SQLContext, SparkSession via variables sc, sqlContext, spark.For 
the IPython features, you can refer doc Python InterpreterSparkRZeppelin 
support SparkR via %spark.r. Here's configuration for SparkR 
Interpreter.      Spark Property    Default    Description        
zeppelin.R.cmd    R    R binary executable path.        zeppelin.R.knitr    
true    Whether use knitr or not. (It is recommended to install knitr and use 
it in Zeppelin)        zeppelin.R.image.width    100%    R plotting image 
width.        zeppelin.R.render.options    out.format = 'html', comment 
= NA, echo = FALSE, results = 'asis', message = F, warning = F, 
fig.retina = 2    R plotting options.  SparkSqlSpark Sql Interpreter share the 
same SparkContext/Spar
 kSession with other Spark interpreter. That means any table registered in 
scala, python or r code can be accessed by Spark Sql.For examples:%sparkcase 
class People(name: String, age: Int)var df = 
spark.createDataFrame(List(People("jeff", 23), 
People("andy", 
20)))df.createOrReplaceTempView("people")%spark.sqlselect * 
from peopleBy default, each sql statement would run sequentially in %spark.sql. 
But you can run them concurrently by following setup.Set 
zeppelin.spark.concurrentSQL to true to enable the sql concurrent feature, 
underneath zeppelin will change to use fairscheduler for spark. And also set 
zeppelin.spark.concurrentSQL.max to control the max number of sql statements 
running concurrently.Configure pools by creating fairscheduler.xml under your 
SPARK_CONF_DIR, check the official spark doc Configuring Pool PropertiesSet 
pool property via setting paragraph property. e.g.%spark(pool=pool1)sql 
statementThis pool feature is als
 o available for all versions of scala Spark, PySpark. For SparkR, it is only 
available starting from 2.3.0.Interpreter Setting OptionYou can choose one of 
shared, scoped and isolated options when you configure Spark interpreter.e.g. 
In scoped per user mode, Zeppelin creates separated Scala compiler for each 
user but share a single SparkContext.In isolated per user mode, Zeppelin 
creates separated SparkContext for each user.ZeppelinContextZeppelin 
automatically injects ZeppelinContext as variable z in your Scala/Python 
environment. ZeppelinContext provides some additional functions and 
utilities.See Zeppelin-Context for more details.User ImpersonationIn yarn mode, 
the user who launch the zeppelin server will be used to launch the spark yarn 
application. This is not a good practise.Most of time, you will enable shiro in 
Zeppelin and would like to use the login user to submit the spark yarn app. For 
this purpose,you need to enable user impersonation for more security control. 
In order 
 the enable user impersonation, you need to do the following stepsStep 1 Enable 
user impersonation setting hadoop's core-site.xml. E.g. if you are 
using user zeppelin to launch Zeppelin, then add the following to 
core-site.xml, then restart both hdfs and yarn. <property>  
<name>hadoop.proxyuser.zeppelin.groups</name>  
<value>*</value></property><property>
  <name>hadoop.proxyuser.zeppelin.hosts</name>  
<value>*</value></property>Step 2 
Enable interpreter user impersonation in Spark interpreter's 
interpreter setting. (Enable shiro first of course)Step 3(Optional) If you are 
using kerberos cluster, then you need to set zeppelin.server.kerberos.keytab 
and zeppelin.server.kerberos.principal to the user(aka. user in Step 1) you 
want to impersonate in zeppelin-site.xml.Setting up Zeppelin with 
KerberosLogical 
 setup with Zeppelin, Kerberos Key Distribution Center (KDC), and Spark on 
YARN:Deprecate Spark 2.2 and earlier versionsStarting from 0.9, Zeppelin 
deprecate Spark 2.2 and earlier versions. So you will see a warning message 
when you use Spark 2.2 and earlier.You can get rid of this message by setting 
zeppelin.spark.deprecatedMsg.show to false.Configuration SetupOn the server 
that Zeppelin is installed, install Kerberos client modules and configuration, 
krb5.conf.This is to make the server communicate with KDC.Add the two 
properties below to Spark configuration 
([SPARK_HOME]/conf/spark-defaults.conf):spark.yarn.principalspark.yarn.keytabNOTE:
 If you do not have permission to access for the above spark-defaults.conf 
file, optionally, you can add the above lines to the Spark Interpreter setting 
through the Interpreter tab in the Zeppelin UI.That's it. Play with 
Zeppelin!",
       "url": " /interpreter/spark.html",
       "group": "interpreter",
       "excerpt": "Apache Spark is a fast and general-purpose cluster computing 
system. It provides high-level APIs in Java, Scala, Python and R, and an 
optimized engine that supports general execution engine."
     }
     ,
-    
-  
+
+
 
     "/interpreter/python.html": {
       "title": "Python 2 & 3 Interpreter for Apache Zeppelin",
-      "content"  : "<!--Licensed under the Apache License, Version 2.0 (the 
"License");you may not use this file except in compliance with the 
License.You may obtain a copy of the License 
athttp://www.apache.org/licenses/LICENSE-2.0Unless required by applicable law 
or agreed to in writing, softwaredistributed under the License is distributed 
on an "AS IS" BASIS,WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
either express or implied.See the License for the specific language governing 
permissions andlimitations under the License.-->Python 2 & 3 
Interpreter for Apache ZeppelinConfiguration      Property    Default    
Description        zeppelin.python    python    Path of the already installed 
Python binary (could be python2 or python3).    If python is not in your $PATH 
you can set the absolute directory (example : /usr/bin/python)            
zeppelin.python.maxResult    1000    Max number of dataframe rows to display.  
Enabling Python InterpreterIn a
  notebook, to enable the Python interpreter, click on the Gear icon and select 
PythonUsing the Python InterpreterIn a paragraph, use %python to select the 
Python interpreter and then input all commands.The interpreter can only work if 
you already have python installed (the interpreter doesn't bring it own 
python binaries).To access the help, type help()Python environmentsDefaultBy 
default, PythonInterpreter will use python command defined in zeppelin.python 
property to run python process.The interpreter can use all modules already 
installed (with pip, easy_install...)CondaConda is an package management system 
and environment management system for python.%python.conda interpreter lets you 
change between environments.Usageget the Conda Infomation: %python.conda 
infolist the Conda environments: %python.conda env listcreate a conda 
enviornment: %python.conda create --name [ENV NAME]activate an environment 
(python interpreter will be restarted): %python.conda activate [ENV NAME]d
 eactivate%python.conda deactivateget installed package list inside the current 
environment%python.conda listinstall package%python.conda install [PACKAGE 
NAME]uninstall package%python.conda uninstall [PACKAGE 
NAME]Docker%python.docker interpreter allows PythonInterpreter creates python 
process in a specified docker container.Usageactivate an 
environment%python.docker activate [Repository]%python.docker activate 
[Repository:Tag]%python.docker activate [Image Id]deactivate%python.docker 
deactivateHere is an example# activate latest tensorflow image as a python 
environment%python.docker activate gcr.io/tensorflow/tensorflow:latestUsing 
Zeppelin Dynamic FormsYou can leverage Zeppelin Dynamic Form inside your Python 
code.Zeppelin Dynamic Form can only be used if py4j Python library is installed 
in your system. If not, you can install it with pip install py4j.Example : 
%python### Input formprint 
(z.input("f1","defaultValue"))### Select 
formprint (z.sele
 
ct("f1",[("o1","1"),("o2","2")],"2"))###
 Checkbox 
formprint("".join(z.checkbox("f3", 
[("o1","1"), 
("o2","2")],["1"])))Matplotlib
 integrationThe python interpreter can display matplotlib figures inline 
automatically using the pyplot module:%pythonimport matplotlib.pyplot as 
pltplt.plot([1, 2, 3])This is the recommended method for using matplotlib from 
within a Zeppelin notebook. The output of this command will by default be 
converted to HTML by implicitly making use of the %html magic. Additional 
configuration can be achieved using the builtin z.configure_mpl() method. For 
example, z.configure_mpl(width=400, height=300, 
fmt='svg')plt.plot([1, 2, 3])Will produce a 400x300 image in 
SVG format, which by default are normally 600x400 and PNG r
 espectively. In the future, another option called angular can be used to make 
it possible to update a plot produced from one paragraph directly from another 
(the output will be %angular instead of %html). However, this feature is 
already available in the pyspark interpreter. More details can be found in the 
included "Zeppelin Tutorial: Python - matplotlib basic" 
tutorial notebook. If Zeppelin cannot find the matplotlib backend files (which 
should usually be found in $ZEPPELIN_HOME/interpreter/lib/python) in your 
PYTHONPATH, then the backend will automatically be set to agg, and the 
(otherwise deprecated) instructions below can be used for more limited inline 
plotting.If you are unable to load the inline backend, use 
z.show(plt):%pythonimport matplotlib.pyplot as pltplt.figure()(.. 
..)z.show(plt)plt.close()The z.show() function can take optional parameters to 
adapt graph dimensions (width and height) as well as output format (png or 
optionally svg).%pythonz.show(plt
 , width='50px')z.show(plt, height='150px', 
fmt='svg')Pandas integrationApache Zeppelin Table Display 
System provides built-in data visualization capabilities. Python interpreter 
leverages it to visualize Pandas DataFrames though similar z.show() API, same 
as with Matplotlib integration.Example:import pandas as pdrates = 
pd.read_csv("bank.csv", 
sep=";")z.show(rates)SQL over Pandas DataFramesThere is a 
convenience %python.sql interpreter that matches Apache Spark experience in 
Zeppelin and enables usage of SQL language to query Pandas DataFrames and 
visualization of results though built-in Table Display 
System.Pre-requestsPandas pip install pandasPandaSQL pip install -U pandasqlIn 
case default binded interpreter is Python (first in the interpreter list, under 
the Gear Icon), you can just use it as %sql i.efirst paragraphimport pandas as 
pdrates = pd.read_csv("bank.csv", sep="
 ;")next paragraph%sqlSELECT * FROM rates WHERE age < 
40Otherwise it can be referred to as %python.sqlIPython SupportIPython is more 
powerful than the default python interpreter with extra functionality. You can 
use IPython with Python2 or Python3 which depends on which python you set 
zeppelin.python.Pre-requests- Jupyter `pip install jupyter`- grpcio `pip 
install grpcio`- protobuf `pip install protobuf`If you already install 
anaconda, then you just need to install grpcio as Jupyter is already included 
in anaconda. For grpcio version >= 1.12.0 you'll also need to 
install protobuf separately.In addition to all basic functions of the python 
interpreter, you can use all the IPython advanced features as you use it in 
Jupyter Notebook.e.g. Use IPython magic%python.ipython#python 
helprange?#timeit%timeit range(100)Use matplotlib %python.ipython%matplotlib 
inlineimport matplotlib.pyplot as pltprint("hello 
world")data=[1,2,3,4]plt.figure()plt.
 plot(data)We also make ZeppelinContext available in IPython Interpreter. You 
can use ZeppelinContext to create dynamic forms and display pandas 
DataFrame.e.g.Create dynamic formz.input(name='my_name', 
defaultValue='hello')Show pandas dataframeimport pandas as pddf 
= pd.DataFrame({'id':[1,2,3], 
'name':['a','b','c']})z.show(df)By
 default, we would use IPython in %python.python if IPython is available. 
Otherwise it would fall back to the original Python implementation.If you 
don't want to use IPython, then you can set zeppelin.python.useIPython 
as false in interpreter setting.Technical descriptionFor in-depth technical 
details on current implementation please refer to python/README.md.Some 
features not yet implemented in the Python InterpreterInterrupt a paragraph 
execution (cancel() method) is currently only supported in Linux and MacOs. If 
interpreter runs in anothe
 r operating system (for instance MS Windows) , interrupt a paragraph will 
close the whole interpreter. A JIRA ticket (ZEPPELIN-893) is opened to 
implement this feature in a next release of the interpreter.Progression bar in 
webUI  (getProgress() method) is currently not implemented.Code-completion is 
currently not implemented.",
+      "content"  : "<!--Licensed under the Apache License, Version 2.0 (the 
"License");you may not use this file except in compliance with the 
License.You may obtain a copy of the License 
athttp://www.apache.org/licenses/LICENSE-2.0Unless required by applicable law 
or agreed to in writing, softwaredistributed under the License is distributed 
on an "AS IS" BASIS,WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
either express or implied.See the License for the specific language governing 
permissions andlimitations under the License.-->Python 2 & 3 
Interpreter for Apache ZeppelinOverviewZeppelin supports python language which 
is very popular in data analytics and machine learning.      Name    Class    
Description        %python    PythonInterpreter    Vanilla python interpreter, 
with least dependencies, only python environment installed is required        
%python.ipython    IPythonInterpreter    Provide more fancy python runtime via 
IPython, almost the s
 ame experience like Jupyter. It requires more things, but is the recommended 
interpreter for using python in Zeppelin, see below        %python.sql    
PythonInterpreterPandasSql    Provide sql capability to query data in Pandas 
DataFrame via pandasql  Configuration      Property    Default    Description   
     zeppelin.python    python    Path of the installed Python binary (could be 
python2 or python3).    You should set this property explicitly if python is 
not in your $PATH(example: /usr/bin/python).            
zeppelin.python.maxResult    1000    Max number of dataframe rows to display.   
     zeppelin.python.useIPython    true    When this property is true, %python 
would be delegated to %python.ipython if IPython is available, otherwise    
IPython is only used in %python.ipython.      Vanilla Python Interpreter 
(%python)The vanilla python interpreter provides basic python interpreter 
feature, only python installed is required.Matplotlib integrationThe vanilla 
python interprete
 r can display matplotlib figures inline automatically using the 
matplotlib:%pythonimport matplotlib.pyplot as pltplt.plot([1, 2, 3])The output 
of this command will by default be converted to HTML by implicitly making use 
of the %html magic. Additional configuration can be achieved using the builtin 
z.configure_mpl() method. For example, z.configure_mpl(width=400, height=300, 
fmt='svg')plt.plot([1, 2, 3])Will produce a 400x300 image in 
SVG format, which by default are normally 600x400 and PNG respectively. In the 
future, another option called angular can be used to make it possible to update 
a plot produced from one paragraph directly from another (the output will be 
%angular instead of %html). However, this feature is already available in the 
pyspark interpreter. More details can be found in the included 
"Zeppelin Tutorial: Python - matplotlib basic" tutorial 
notebook. If Zeppelin cannot find the matplotlib backend files (which should 
usually be fou
 nd in $ZEPPELIN_HOME/interpreter/lib/python) in your PYTHONPATH, then the 
backend will automatically be set to agg, and the (otherwise deprecated) 
instructions below can be used for more limited inline plotting.If you are 
unable to load the inline backend, use z.show(plt):%pythonimport 
matplotlib.pyplot as pltplt.figure()(.. ..)z.show(plt)plt.close()The z.show() 
function can take optional parameters to adapt graph dimensions (width and 
height) as well as output format (png or optionally svg).%pythonz.show(plt, 
width='50px')z.show(plt, height='150px', 
fmt='svg')IPython Interpreter (%python.ipython) 
(recommended)IPython is more powerful than the vanilla python interpreter with 
extra functionality. You can use IPython with Python2 or Python3 which depends 
on which python you set in zeppelin.python.For non-anaconda environment 
Prerequisites- Jupyter `pip install jupyter`- grpcio `pip install grpcio`- 
protobuf `pip install protobuf`For anac
 onda environment (zeppelin.python points to the python under 
anaconda)Prerequisites- grpcio `pip install grpcio`- protobuf `pip install 
protobuf`In addition to all the basic functions of the vanilla python 
interpreter, you can use all the IPython advanced features as you use it in 
Jupyter Notebook.e.g. Use IPython magic%python.ipython#python 
helprange?#timeit%timeit range(100)Use matplotlib%python.ipython%matplotlib 
inlineimport matplotlib.pyplot as pltprint("hello 
world")data=[1,2,3,4]plt.figure()plt.plot(data)Colored text outputMore 
types of visualizatione.g. IPython supports hvplotBetter code completionBy 
default, Zeppelin would use IPython in %python if IPython prerequisites are 
meet, otherwise it would use vanilla Python interpreter in %python.If you 
don't want to use IPython via %python, then you can set 
zeppelin.python.useIPython as false in interpreter setting.Pandas 
integrationApache Zeppelin Table Display System provides built-in data 
visualizatio
 n capabilities. Python interpreter leverages it to visualize Pandas DataFrames 
though similar z.show() API, same as with Matplotlib 
integration.Example:%pythonimport pandas as pdrates = 
pd.read_csv("bank.csv", 
sep=";")z.show(rates)SQL over Pandas DataFramesThere is a 
convenience %python.sql interpreter that matches Apache Spark experience in 
Zeppelin and enables usage of SQL language to query Pandas DataFrames and 
visualization of results though built-in Table Display 
System.PrerequisitesPandas pip install pandasPandaSQL pip install -U 
pandasqlHere's one example:first paragraph%pythonimport pandas as 
pdrates = pd.read_csv("bank.csv", sep=";")  
```next paragraph%python.sqlSELECT * FROM rates WHERE age < 40  ```Using 
Zeppelin Dynamic FormsYou can leverage Zeppelin Dynamic Form inside your Python 
code.Example : %python### Input 
formprint(z.input("f1","defaultValue"
 ))### Select 
formprint(z.select("f2",[("o1","1"),("o2","2")],"o1"))###
 Checkbox 
formprint("".join(z.checkbox("f3", 
[("o1","1"), 
("o2","2")],["o1"])))ZeppelinContext
 APIPython interpreter create a variable z which represent ZeppelinContext for 
you. User can use it to do more fancy and complex things in Zeppelin.      API  
  Description        z.put(key, value)    Put object value with identifier key 
to distributed resource pool of Zeppelin,     so that it can be used by other 
interpreters        z.get(key)    Get object with identifier key from 
distributed resource pool of Zeppelin        z.remove(key)    Remove object 
with identifier key from distributed resource pool of Zeppelin        
z.getAsDataFrame(key)    Get object with identifier key from distri
 buted resource pool of Zeppelin and converted into pandas dataframe.    The 
object in the distributed resource pool must be table type, e.g. jdbc 
interpreter result.            z.angular(name, noteId = None, paragraphId = 
None)    Get the angular object with identifier name        z.angularBind(name, 
value, noteId = None, paragraphId = None)    Bind value to angular object with 
identifier name        z.angularUnbind(name, noteId = None)    Unbind value 
from angular object with identifier name        z.show(p)    Show python object 
p in Zeppelin, if it is pandas dataframe, it would be displayed in 
Zeppelin's table format,     others will be converted to string          
z.textbox(name, defaultValue="")    Create dynamic form Textbox name 
with defaultValue        z.select(name, options, defaultValue="")    
Create dynamic form Select name with options and defaultValue. options should 
be a list of Tuple(first element is key,     the second element is the displayed
  value) e.g. 
z.select("f2",[("o1","1"),("o2","2")],"o1")
        z.checkbox(name, options, defaultChecked=[])    Create dynamic form 
Checkbox `name` with options and defaultChecked. options should be a list of 
Tuple(first element is key,     the second element is the displayed value) e.g. 
z.checkbox("f3", [("o1","1"), 
("o2","2")],["o1"])        z.noteTextbox(name, 
defaultValue="")    Create note level dynamic form Textbox        
z.noteSelect(name, options, defaultValue="")    Create note level 
dynamic form Select        z.noteCheckbox(name, options, defaultChecked=[])    
Create note level dynamic form Checkbox        z.run(paragraphId)    Run 
paragraph        z.run(noteId, paragraphId)    Run paragraph        
z.runNote(noteId)    Run the whole note  Python environmentsDefaultBy default, 
PythonInterpreter will use python command defined in zeppeli
 n.python property to run python process.The interpreter can use all modules 
already installed (with pip, easy_install...)CondaConda is an package 
management system and environment management system for python.%python.conda 
interpreter lets you change between environments.Usageget the Conda 
Information: %python.conda infolist the Conda environments: %python.conda env 
listcreate a conda enviornment: %python.conda create --name [ENV NAME]activate 
an environment (python interpreter will be restarted): %python.conda activate 
[ENV NAME]deactivate%python.conda deactivateget installed package list inside 
the current environment%python.conda listinstall package%python.conda install 
[PACKAGE NAME]uninstall package%python.conda uninstall [PACKAGE 
NAME]Docker%python.docker interpreter allows PythonInterpreter creates python 
process in a specified docker container.Usageactivate an 
environment%python.docker activate [Repository]%python.docker activate 
[Repository:Tag]%python.docker activate [Imag
 e Id]deactivate%python.docker deactivateHere is an example# activate latest 
tensorflow image as a python environment%python.docker activate 
gcr.io/tensorflow/tensorflow:latestTechnical descriptionFor in-depth technical 
details on current implementation please refer to python/README.md.Some 
features not yet implemented in the vanilla Python interpreterInterrupt a 
paragraph execution (cancel() method) is currently only supported in Linux and 
MacOs. If interpreter runs in another operating system (for instance MS 
Windows) , interrupt a paragraph will close the whole interpreter. A JIRA 
ticket (ZEPPELIN-893) is opened to implement this feature in a next release of 
the interpreter.Progression bar in webUI  (getProgress() method) is currently 
not implemented.",
       "url": " /interpreter/python.html",
       "group": "interpreter",
       "excerpt": "Python is a programming language that lets you work quickly 
and integrate systems more effectively."
     }
     ,
-    
-  
+
+
 
     "/interpreter/hive.html": {
       "title": "Hive Interpreter for Apache Zeppelin",
@@ -86,8 +108,8 @@
       "excerpt": "Apache Hive data warehouse software facilitates querying and 
managing large datasets residing in distributed storage. Hive provides a 
mechanism to project structure onto this data and query the data using a 
SQL-like language called HiveQL. At the same time this..."
     }
     ,
-    
-  
+
+
 
     "/interpreter/ignite.html": {
       "title": "Ignite Interpreter for Apache Zeppelin",
@@ -97,8 +119,8 @@
       "excerpt": "Apache Ignite in-memory Data Fabric is a high-performance, 
integrated and distributed in-memory platform for computing and transacting on 
large-scale data sets in real-time, orders of magnitude faster than possible 
with traditional disk-based or flash technologies."
     }
     ,
-    
-  
+
+
 
     "/interpreter/groovy.html": {
       "title": "Apache Groovy Interpreter for Apache Zeppelin",
@@ -108,8 +130,19 @@
       "excerpt": "Apache Groovy is a powerful, optionally typed and dynamic 
language, with static-typing and static compilation capabilities, for the Java 
platform aimed at improving developer productivity thanks to a concise, 
familiar and easy to learn syntax."
     }
     ,
-    
-  
+
+
+
+    "/interpreter/jupyter.html": {
+      "title": "Jupyter Interpreter for Apache Zeppelin",
+      "content"  : "<!--Licensed under the Apache License, Version 2.0 (the 
"License");you may not use this file except in compliance with the 
License.You may obtain a copy of the License 
athttp://www.apache.org/licenses/LICENSE-2.0Unless required by applicable law 
or agreed to in writing, softwaredistributed under the License is distributed 
on an "AS IS" BASIS,WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
either express or implied.See the License for the specific language governing 
permissions andlimitations under the License.-->Jupyter Interpreter for 
Apache ZeppelinOverviewProject Jupyter exists to develop open-source software, 
open-standards, and services for interactive computing across dozens of 
programming languages.Zeppelin's Jupyter interpreter is a 
bridge/adapter between Zeppelin interpreter and Jupyter kernel. You can use any 
of jupyter kernel as long as you installed the necessary 
dependencies.ConfigurationTo run any Jupyter kernel in Ze
 ppelin you first need to install the following prerequisite:pip install 
jupyter-clientpip install grpciopip install protobufThen you need install the 
jupyter kernel you want to use. In the following sections, we will talk about 
how to use the following 3 jupyter kernels in Zeppelin:ipythonirjuliaJupyter 
Python kernelIn order to use Jupyter Python kernel in Zeppelin, you need to 
install ipykernel first. pip install ipykernelThen you can run python code in 
Jupyter interpreter like following. %jupyter(kernel=python)%matplotlib 
inlineimport matplotlib.pyplot as pltplt.plot([1, 2, 3])Jupyter R kernelIn 
order to use IRKernel, you need to first install IRkernel package in 
R.install.packages('IRkernel')IRkernel::installspec()  # to 
register the kernel in the current R installationThen you can run r code in 
Jupyter interpreter like following. 
%jupyter(kernel=ir)library(ggplot2)ggplot(mpg, aes(x = displ, y = hwy)) +  
geom_point()Jupyter Julia kernelIn order to use Julia in Zep
 pelin, you first need to install IJulia firstusing 
PkgPkg.add("IJulia")Then you can run julia code in Jupyter 
interpreter like following. %jupyter(kernel=julia-1.3)using 
PkgPkg.add("Plots")using Plotsplotly() # Choose the Plotly.jl 
backend for web interactivityplot(rand(5,5),linewidth=2,title="My 
Plot")Pkg.add("PyPlot") # Install a different 
backendpyplot() # Switch to using the PyPlot.jl 
backendplot(rand(5,5),linewidth=2,title="My Plot")Use any 
other kernelFor any other jupyter kernel, you can follow the below steps to use 
it in Zeppelin.Install the specified jupyter kernel. you can find all the 
available jupyter kernels here Find its kernel name by run the following 
commandbashjupyter kernelspec listRun the kernel as 
following%jupyter(kernel=kernel_name)code",
+      "url": " /interpreter/jupyter.html",
+      "group": "interpreter",
+      "excerpt": "Project Jupyter exists to develop open-source software, 
open-standards, and services for interactive computing across dozens of 
programming languages."
+    }
+    ,
+
+
 
     "/interpreter/sap.html": {
       "title": "SAP BusinessObjects Interpreter for Apache Zeppelin",
@@ -119,8 +152,8 @@
       "excerpt": "SAP BusinessObjects BI platform can simplify the lives of 
business users and IT staff. SAP BusinessObjects is based on universes. The 
universe contains dual-semantic layer model. The users make queries upon 
universes. This interpreter is new interface for universes."
     }
     ,
-    
-  
+
+
 
     "/interpreter/kylin.html": {
       "title": "Apache Kylin Interpreter for Apache Zeppelin",
@@ -130,8 +163,8 @@
       "excerpt": "Apache Kylin™ is an open source Distributed Analytics 
Engine designed to provide SQL interface and multi-dimensional analysis (OLAP) 
on Hadoop supporting extremely large datasets, original contributed from eBay 
Inc. ."
     }
     ,
-    
-  
+
+
 
     "/interpreter/hazelcastjet.html": {
       "title": "Hazelcast Jet interpreter in Apache Zeppelin",
@@ -141,8 +174,8 @@
       "excerpt": "Build and execture Hazelcast Jet computation jobs."
     }
     ,
-    
-  
+
+
 
     "/interpreter/hdfs.html": {
       "title": "HDFS File System Interpreter for Apache Zeppelin",
@@ -152,8 +185,8 @@
       "excerpt": "Hadoop File System is a distributed, fault tolerant file 
system part of the hadoop project and is often used as storage for distributed 
processing engines like Hadoop MapReduce and Apache Spark or underlying file 
systems like Alluxio."
     }
     ,
-    
-  
+
+
 
     "/interpreter/hbase.html": {
       "title": "HBase Shell Interpreter for Apache Zeppelin",
@@ -163,8 +196,8 @@
       "excerpt": "HBase Shell is a JRuby IRB client for Apache HBase. This 
interpreter provides all capabilities of Apache HBase shell within Apache 
Zeppelin."
     }
     ,
-    
-  
+
+
 
     "/interpreter/beam.html": {
       "title": "Beam interpreter in Apache Zeppelin",
@@ -174,8 +207,8 @@
       "excerpt": "Apache Beam is an open source, unified programming model 
that you can use to create a data processing pipeline."
     }
     ,
-    
-  
+
+
 
     "/interpreter/geode.html": {
       "title": "Geode/Gemfire OQL Interpreter for Apache Zeppelin",
@@ -185,19 +218,19 @@
       "excerpt": "Apache Geode (incubating) provides a database-like 
consistency model, reliable transaction processing and a shared-nothing 
architecture to maintain very low latency performance with high concurrency 
processing."
     }
     ,
-    
-  
+
+
 
     "/interpreter/r.html": {
       "title": "R Interpreter for Apache Zeppelin",
-      "content"  : "<!--Licensed under the Apache License, Version 2.0 (the 
"License");you may not use this file except in compliance with the 
License.You may obtain a copy of the License 
athttp://www.apache.org/licenses/LICENSE-2.0Unless required by applicable law 
or agreed to in writing, softwaredistributed under the License is distributed 
on an "AS IS" BASIS,WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
either express or implied.See the License for the specific language governing 
permissions andlimitations under the License.-->R Interpreter for Apache 
ZeppelinOverviewR is a free software environment for statistical computing and 
graphics.To run R code and visualize plots in Apache Zeppelin, you will need R 
on your master node (or your dev laptop).For Centos: yum install R R-devel 
libcurl-devel openssl-develFor Ubuntu: apt-get install r-baseValidate your 
installation with a simple R command:R -e "print(1+1)"To 
enjoy plots, install additi
 onal libraries with:devtools with R -e 
"install.packages('devtools', repos = 
'http://cran.us.r-project.org')"knitr with R -e 
"install.packages('knitr', repos = 
'http://cran.us.r-project.org')"ggplot2 withR -e 
"install.packages('ggplot2', repos = 
'http://cran.us.r-project.org')"Other visualization 
libraries: R -e 
"install.packages(c('devtools','mplot',
 'googleVis'), repos = 
'http://cran.us.r-project.org'); require(devtools); 
install_github('ramnathv/rCharts')"We recommend you to 
also install the following optional R libraries for happy data 
analytics:glmnetpROCdata.tablecaretsqldfwordcloudConfigurationTo run Zeppelin 
with the R Interpreter, the SPARK_HOME environment variable must be set. The 
best way to do this is by editing conf/zeppelin-
 env.sh.If it is not set, the R Interpreter will not be able to interface with 
Spark.You should also copy conf/zeppelin-site.xml.template to 
conf/zeppelin-site.xml. That will ensure that Zeppelin sees the R Interpreter 
the first time it starts up.Using the R InterpreterBy default, the R 
Interpreter appears as two Zeppelin Interpreters, %r and %knitr.%r will behave 
like an ordinary REPL.  You can execute commands as in the CLI.   R base 
plotting is fully supportedIf you return a data.frame, Zeppelin will attempt to 
display it using Zeppelin's built-in visualizations.%knitr interfaces 
directly against knitr, with chunk options on the first line:The two 
interpreters share the same environment.  If you define a variable from %r, it 
will be within-scope if you then make a call using knitr.Using SparkR & 
Moving Between LanguagesIf SPARK_HOME is set, the SparkR package will be loaded 
automatically:The Spark Context and SQL Context are created and injected into 
the local envi
 ronment automatically as sc and sql.The same context are shared with the 
%spark, %sql and %pyspark interpreters:You can also make an ordinary R variable 
accessible in scala and Python:And vice versa:Caveats & 
TroubleshootingAlmost all issues with the R interpreter turned out to be caused 
by an incorrectly set SPARK_HOME.  The R interpreter must load a version of the 
SparkR package that matches the running version of Spark, and it does this by 
searching SPARK_HOME. If Zeppelin isn't configured to interface with 
Spark in SPARK_HOME, the R interpreter will not be able to connect to Spark.The 
knitr environment is persistent. If you run a chunk from Zeppelin that changes 
a variable, then run the same chunk again, the variable has already been 
changed.  Use immutable variables.(Note that %spark.r and %r are two different 
ways of calling the same interpreter, as are %spark.knitr and %knitr. By 
default, Zeppelin puts the R interpreters in the %spark. Interpreter 
Group.Using 
 the %r interpreter, if you return a data.frame, HTML, or an image, it will 
dominate the result. So if you execute three commands, and one is hist(), all 
you will see is the histogram, not the results of the other commands. This is a 
Zeppelin limitation.If you return a data.frame (for instance, from calling 
head()) from the %spark.r interpreter, it will be parsed by Zeppelin's 
built-in data visualization system.  Why knitr Instead of rmarkdown?  Why no 
htmlwidgets?  In order to support htmlwidgets, which has indirect dependencies, 
rmarkdown uses pandoc, which requires writing to and reading from disc.  This 
makes it many times slower than knitr, which can operate entirely in RAM.Why no 
ggvis or shiny?  Supporting shiny would require integrating a reverse-proxy 
into Zeppelin, which is a task.Max OS X & case-insensitive filesystem.  
If you try to install on a case-insensitive filesystem, which is the Mac OS X 
default, maven can unintentionally delete the install directo
 ry because r and R become the same subdirectory.Error unable to start device 
X11 with the repl interpreter.  Check your shell login scripts to see if they 
are adjusting the DISPLAY environment variable.  This is common on some 
operating systems as a workaround for ssh issues, but can interfere with R 
plotting.akka Library Version or TTransport errors.  This can happen if you try 
to run Zeppelin with a SPARK_HOME that has a version of Spark other than the 
one specified with -Pspark-1.x when Zeppelin was compiled.",
+      "content"  : "<!--Licensed under the Apache License, Version 2.0 (the 
"License");you may not use this file except in compliance with the 
License.You may obtain a copy of the License 
athttp://www.apache.org/licenses/LICENSE-2.0Unless required by applicable law 
or agreed to in writing, softwaredistributed under the License is distributed 
on an "AS IS" BASIS,WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
either express or implied.See the License for the specific language governing 
permissions andlimitations under the License.-->R Interpreter for Apache 
ZeppelinOverviewR is a free software environment for statistical computing and 
graphics.To run R code and visualize plots in Apache Zeppelin, you will need R 
on your master node (or your dev laptop).For Centos: yum install R R-devel 
libcurl-devel openssl-develFor Ubuntu: apt-get install r-baseValidate your 
installation with a simple R command:R -e "print(1+1)"To 
enjoy plots, install additi
 onal libraries with:devtools with R -e 
"install.packages('devtools', repos = 
'http://cran.us.r-project.org')"knitr with R -e 
"install.packages('knitr', repos = 
'http://cran.us.r-project.org')"ggplot2 withR -e 
"install.packages('ggplot2', repos = 
'http://cran.us.r-project.org')"Other visualization 
libraries: R -e 
"install.packages(c('devtools','mplot',
 'googleVis'), repos = 
'http://cran.us.r-project.org'); require(devtools); 
install_github('ramnathv/rCharts')"We recommend you to 
also install the following optional R libraries for happy data 
analytics:glmnetpROCdata.tablecaretsqldfwordcloudSupported InterpretersZeppelin 
supports R language in 3 interpreters      Name    Class    Description        
%r.r    RInterpreter    Vanilla r interprete
 r, with least dependencies, only R environment installed is required.    It is 
always recommended to use the fully qualified interpreter name %r.rcode>, 
because %r is ambiguous,     it could mean both %spark.r and %r.r        %r.ir  
  IRInterpreter    Provide more fancy R runtime via 
[IRKernel](https://github.com/IRkernel/IRkernel), almost the same experience 
like using R in Jupyter. It requires more things, but is the recommended 
interpreter for using R in Zeppelin.        %r.shiny    ShinyInterpreter    Run 
Shiny app in Zeppelin  If you want to use R with Spark, it is almost the same 
via %spark.r, %spark.ir & %spark.shiny . You can refer Spark 
Interpreter docs for more details.Configuration      Property    Default    
Description        zeppelin.R.cmd    R    Path of the installed R binary. You 
should set this property explicitly if R is not in your $PATH(example: 
/usr/bin/R).            zeppelin.R.knitr    true    Whether to use knitr or 
not. It is recommended to insta
 ll [knitr](https://yihui.org/knitr/)        zeppelin.R.image.width    100%    
Image width of R plotting        zeppelin.R.shiny.iframe_width    100%    
IFrame width of Shiny App        zeppelin.R.shiny.iframe_height    500px    
IFrame height of Shiny App  Using the R Interpreter(%r.r & %r.ir)By 
default, the R Interpreter appears as two Zeppelin Interpreters, %r.r and 
%r.ir.%r.r behaves like an ordinary REPL and use SparkR to communicate between 
R process and JVM process.%r.ir use IRKernel underneath, it behaves like using 
IRKernel in Jupyter notebook.  R basic expressionR base plotting is fully 
supportedBesides R base plotting, you can use other visualization library, e.g. 
ggplot and googlevis Make Shiny App in ZeppelinShiny is an R package that makes 
it easy to build interactive web applications (apps) straight from R.For 
developing one Shiny App in Zeppelin, you need to at least 3 paragraphs (server 
paragraph, ui paragraph and run type paragraph)Server type R shiny paragra
 ph%r.shiny(type=server)# Define server logic to summarize and view selected 
dataset ----server <- function(input, output) {    # Return the 
requested dataset ----    datasetInput <- reactive({        
switch(input$dataset,        "rock" = rock,        
"pressure" = pressure,        "cars" = 
cars)    })    # Generate a summary of the dataset ----    output$summary 
<- renderPrint({        dataset <- datasetInput()        
summary(dataset)    })    # Show the first "n" observations 
----    output$view <- renderTable({        head(datasetInput(), n = 
input$obs)    })}UI type R shiny paragraph%r.shiny(type=ui)# Define UI for 
dataset viewer app ----ui <- fluidPage(    # App title ----    
titlePanel("Shiny Text"),    # Sidebar layout with a input 
and output definitions ----    sidebarLayout(        # Sidebar panel for inputs 
----        sidebarPanel(        # Inp
 ut: Selector for choosing dataset ----        selectInput(inputId = 
"dataset",        label = "Choose a 
dataset:",        choices = c("rock", 
"pressure", "cars")),        # Input: 
Numeric entry for number of obs to view ----        numericInput(inputId = 
"obs",        label = "Number of observations to 
view:",        value = 10)        ),        # Main panel for 
displaying outputs ----        mainPanel(        # Output: Verbatim text for 
data summary ----        verbatimTextOutput("summary"),       
 # Output: HTML table with requested number of observations ----        
tableOutput("view")        )    ))Run type R shiny 
paragraph%r.shiny(type=run)After executing the run type R shiny paragraph, the 
shiny app will be launched and embedded as Iframe in paragraph.Run multiple 
shiny appIf you want to run multiple shiny app, you c
 an specify app in paragraph local property to differentiate shiny 
app.e.g.%r.shiny(type=ui, app=app_1)%r.shiny(type=server, 
app=app_1)%r.shiny(type=run, app=app_1)",
       "url": " /interpreter/r.html",
       "group": "interpreter",
       "excerpt": "R is a free software environment for statistical computing 
and graphics."
     }
     ,
-    
-  
+
+
 
     "/interpreter/java.html": {
       "title": "Java interpreter in Apache Zeppelin",
@@ -207,19 +240,19 @@
       "excerpt": "Run Java code and any distributed java computation library 
by importing the dependencies in the interpreter configuration."
     }
     ,
-    
-  
+
+
 
     "/interpreter/flink.html": {
       "title": "Flink Interpreter for Apache Zeppelin",
-      "content"  : "<!--Licensed under the Apache License, Version 2.0 (the 
"License");you may not use this file except in compliance with the 
License.You may obtain a copy of the License 
athttp://www.apache.org/licenses/LICENSE-2.0Unless required by applicable law 
or agreed to in writing, softwaredistributed under the License is distributed 
on an "AS IS" BASIS,WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
either express or implied.See the License for the specific language governing 
permissions andlimitations under the License.-->Flink interpreter for Apache 
ZeppelinOverviewApache Flink is an open source platform for distributed stream 
and batch data processing. Flink’s core is a streaming dataflow engine that 
provides data distribution, communication, and fault tolerance for distributed 
computations over data streams. Flink also builds batch processing on top of 
the streaming engine, overlaying native iteration support, managed memory, and 
program opt
 imization.Apache Flink is supported in Zeppelin with Flink interpreter group 
which consists of below five interpreters.      Name    Class    Description    
    %flink    FlinkInterpreter    Creates 
ExecutionEnvironment/StreamExecutionEnvironment/BatchTableEnvironment/StreamTableEnvironment
 and provides a Scala environment        %flink.pyflink    PyFlinkInterpreter   
 Provides a python environment        %flink.ipyflink    IPyFlinkInterpreter    
Provides an ipython environment        %flink.ssql    FlinkStreamSqlInterpreter 
   Provides a stream sql environment        %flink.bsql    
FlinkBatchSqlInterpreter    Provides a batch sql environment  ConfigurationThe 
Flink interpreter can be configured with properties provided by Zeppelin.You 
can also set other flink properties which are not listed in the table. For a 
list of additional properties, refer to Flink Available Properties.      
Property    Default    Description        FLINK_HOME        Location of flink 
installation. It is mus
 t be specified, otherwise you can not use flink in zeppelin        
flink.execution.mode    local    Execution mode of flink, e.g. 
local/yarn/remote        flink.execution.remote.host        jobmanager hostname 
if it is remote mode        flink.execution.remote.port        jobmanager port 
if it is remote mode        flink.jm.memory    1024    Total number of 
memory(mb) of JobManager        flink.tm.memory    1024    Total number of 
memory(mb) of TaskManager        flink.tm.num    2    Number of TaskManager     
   flink.tm.slot    1    Number of slot per TaskManager        
flink.yarn.appName    Zeppelin Flink Session    Yarn app name        
flink.yarn.queue        queue name of yarn app        flink.yarn.jars        
additional user jars (comma separated)        zeppelin.flink.scala.color    
true    whether display scala shell output in colorful format        
zeppelin.flink.enableHive    false    whether enable hive        
zeppelin.flink.printREPLOutput    true    Print REPL output    
     zeppelin.flink.maxResult    1000    max number of row returned by sql 
interpreter        zeppelin.flink.planner    blink    planner or flink table 
api, blink or flink        zeppelin.pyflink.python    python    python 
executable for pyflink  StreamExecutionEnvironment, ExecutionEnvironment, 
StreamTableEnvironment, BatchTableEnvironmentZeppelin will create 4 variables 
to represent flink's entrypoint:* senv    (StreamExecutionEnvironment), 
* env     (ExecutionEnvironment)* stenv   (StreamTableEnvironment) * btenv   
(BatchTableEnvironment)ZeppelinContextZeppelin automatically injects 
ZeppelinContext as variable z in your Scala/Python environment. ZeppelinContext 
provides some additional functions and utilities.See Zeppelin-Context for more 
details.IPython supportBy default, zeppelin would use IPython in pyflink when 
IPython is available, Otherwise it would fall back to the original PyFlink 
implementation.If you don't want to use IPython, then you can set 
zeppelin.py
 flink.useIPython as false in interpreter setting. For the IPython features, 
you can refer docPython Interpreter",
+      "content"  : "<!--Licensed under the Apache License, Version 2.0 (the 
"License");you may not use this file except in compliance with the 
License.You may obtain a copy of the License 
athttp://www.apache.org/licenses/LICENSE-2.0Unless required by applicable law 
or agreed to in writing, softwaredistributed under the License is distributed 
on an "AS IS" BASIS,WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
either express or implied.See the License for the specific language governing 
permissions andlimitations under the License.-->Flink interpreter for Apache 
ZeppelinOverviewApache Flink is an open source platform for distributed stream 
and batch data processing. Flink’s core is a streaming dataflow engine that 
provides data distribution, communication, and fault tolerance for distributed 
computations over data streams. Flink also builds batch processing on top of 
the streaming engine, overlaying native iteration support, managed memory, and 
program opt
 imization.In Zeppelin 0.9, we refactor the Flink interpreter in Zeppelin to 
support the latest version of Flink. Only Flink 1.10+ is supported, old version 
of flink may not work.Apache Flink is supported in Zeppelin with Flink 
interpreter group which consists of below five interpreters.      Name    Class 
   Description        %flink    FlinkInterpreter    Creates 
ExecutionEnvironment/StreamExecutionEnvironment/BatchTableEnvironment/StreamTableEnvironment
 and provides a Scala environment        %flink.pyflink    PyFlinkInterpreter   
 Provides a python environment        %flink.ipyflink    IPyFlinkInterpreter    
Provides an ipython environment        %flink.ssql    FlinkStreamSqlInterpreter 
   Provides a stream sql environment        %flink.bsql    
FlinkBatchSqlInterpreter    Provides a batch sql environment  
PrerequisitesDownload Flink 1.10 for scala 2.11 (Only scala-2.11 is supported, 
scala-2.12 is not supported yet in Zeppelin)Download flink-hadoop-shaded and 
put it under lib fold
 er of flink (flink interpreter need that to support yarn mode)ConfigurationThe 
Flink interpreter can be configured with properties provided by Zeppelin (as 
following table).You can also set other flink properties which are not listed 
in the table. For a list of additional properties, refer to Flink Available 
Properties.      Property    Default    Description        FLINK_HOME        
Location of flink installation. It is must be specified, otherwise you can not 
use flink in Zeppelin        HADOOP_CONF_DIR        Location of hadoop conf, 
this is must be set if running in yarn mode        HIVE_CONF_DIR        
Location of hive conf, this is must be set if you want to connect to hive 
metastore        flink.execution.mode    local    Execution mode of flink, e.g. 
local | yarn | remote        flink.execution.remote.host        Host name of 
running JobManager. Only used for remote mode        
flink.execution.remote.port        Port of running JobManager. Only used for 
remote mode        fl
 ink.jm.memory    1024    Total number of memory(mb) of JobManager        
flink.tm.memory    1024    Total number of memory(mb) of TaskManager        
flink.tm.slot    1    Number of slot per TaskManager        
local.number-taskmanager    4    Total number of TaskManagers in local mode     
   flink.yarn.appName    Zeppelin Flink Session    Yarn app name        
flink.yarn.queue        queue name of yarn app        flink.webui.yarn.useProxy 
   false    whether use yarn proxy url as flink weburl, e.g. 
http://localhost:8088/proxy/application15833965980680004        flink.udf.jars  
      Flink udf jars (comma separated), zeppelin will register udf in this jar 
automatically for user. The udf name is the class name.        
flink.execution.jars        Additional user jars (comma separated)        
flink.execution.packages        Additional user packages (comma separated), 
e.g. 
org.apache.flink:flink-connector-kafka2.11:1.10,org.apache.flink:flink-connector-kafka-base2.11:1.10.0,org.apache.flin
 k:flink-json:1.10.0        zeppelin.flink.concurrentBatchSql.max    10    Max 
concurrent sql of Batch Sql (%flink.bsql)        
zeppelin.flink.concurrentStreamSql.max    10    Max concurrent sql of Stream 
Sql (%flink.ssql)        zeppelin.pyflink.python    python    Python binary 
executable for PyFlink        table.exec.resource.default-parallelism    1    
Default parallelism for flink sql job        zeppelin.flink.scala.color    true 
   Whether display scala shell output in colorful format        
zeppelin.flink.enableHive    false    Whether enable hive        
zeppelin.flink.enableHive    false    Whether enable hive        
zeppelin.flink.hive.version    2.3.4    Hive version that you would like to 
connect        zeppelin.flink.maxResult    1000    max number of row returned 
by sql interpreter        flink.interpreter.close.shutdown_cluster    true    
Whether shutdown application when closing interpreter        
zeppelin.interpreter.close.cancel_job    true    Whether cancel flink jo
 b when closing interpreter  StreamExecutionEnvironment, ExecutionEnvironment, 
StreamTableEnvironment, BatchTableEnvironmentZeppelin will create 6 variables 
as flink scala (%flink) entry point:senv    (StreamExecutionEnvironment), benv  
   (ExecutionEnvironment)stenv   (StreamTableEnvironment for blink planner) 
btenv   (BatchTableEnvironment for blink planner)stenv_2   
(StreamTableEnvironment for flink planner) btenv_2   (BatchTableEnvironment for 
flink planner)And will create 6 variables as pyflink (%flink.pyflink or 
%flink.ipyflink) entry point:s_env    (StreamExecutionEnvironment), b_env     
(ExecutionEnvironment)st_env   (StreamTableEnvironment for blink planner) 
bt_env   (BatchTableEnvironment for blink planner)st_env_2   
(StreamTableEnvironment for flink planner) bt_env_2   (BatchTableEnvironment 
for flink planner)Execution mode (Local/Remote/Yarn)Flink in Zeppelin supports 
3 execution modes (flink.execution.mode):LocalRemoteYarnRun Flink in Local 
ModeRunning Flink in Local mod
 e will start a MiniCluster in local JVM. By default, the local MiniCluster 
will use port 8081, so make sure this port is available in your 
machine,otherwise you can configure rest.port to specify another port. You can 
also specify local.number-taskmanager and flink.tm.slot to customize the number 
of TM and number of slots per TM, because by default it is only 4 TM with 1 
Slots which may not be enough for some cases.Run Flink in Remote ModeRunning 
Flink in remote mode will connect to a existing flink cluster which could be 
standalone cluster or yarn session cluster. Besides specifying 
flink.execution.mode to be remote. You also need to 
specifyflink.execution.remote.host and flink.execution.remote.port to point to 
flink job manager.Run Flink in Yarn ModeIn order to run flink in Yarn mode, you 
need to make the following settings:Set flink.execution.mode to yarnSet 
HADOOP_CONF_DIR in flink's interpreter setting.Make sure hadoop command 
is your PATH. Because internally flink will
  call command hadoop classpath and load all the hadoop related jars in the 
flink interpreter processBlink/Flink PlannerThere're 2 planners 
supported by Flink's table api: flink & blink.If you want to 
use DataSet api, and convert it to flink table then please use flink planner 
(btenv_2 and stenv_2).In other cases, we would always recommend you to use 
blink planner. This is also what flink batch/streaming sql interpreter use 
(%flink.bsql & %flink.ssql)How to use HiveIn order to use Hive in 
Flink, you have to make the following setting.Set zeppelin.flink.enableHive to 
be trueSet zeppelin.flink.hive.version to be the hive version you are using.Set 
HIVE_CONF_DIR to be the location where hive-site.xml is located. Make sure hive 
metastore is started and you have configure hive.metastore.uris in 
hive-site.xmlCopy the following dependencies to the lib folder of flink 
installation. 
flink-connector-hive_2.11–1.10.0.jarflink-hadoop-compatibility_2.11â€�
 �1.10.0.jarhive-exec-2.x.jar (for hive 1.x, you need to copy 
hive-exec-1.x.jar, hive-metastore-1.x.jar, libfb303–0.9.2.jar and 
libthrift-0.9.2.jar)After these settings, you will be able to query hive table 
via either table api %flink or batch sql %flink.bsqlFlink Batch SQL%flink.bsql 
is used for flink's batch sql. You just type help to get all the 
available commands.Use insert into statement for batch ETLUse select statement 
for exploratory data analytics Flink Streaming SQL%flink.ssql is used for 
flink's streaming sql. You just type help to get all the available 
commands. Mainlly there're 2 cases:Use insert into statement for 
streaming processingUse select statement for streaming data analyticsFlink 
UDFYou can use Flink scala UDF or Python UDF in sql. UDF for batch and 
streaming sql is the same. Here's 2 examples.Scala UDF%flinkclass 
ScalaUpper extends ScalarFunction {  def eval(str: String) = 
str.toUpperCase}btenv.registerFunction("sca
 la_upper", new ScalaUpper())Python UDF%flink.pyflinkclass 
PythonUpper(ScalarFunction):  def eval(self, s):    return 
s.upper()bt_env.register_function("python_upper", 
udf(PythonUpper(), DataTypes.STRING(), DataTypes.STRING()))Besides defining udf 
in Zeppelin, you can also load udfs in jars via flink.udf.jars. For example, 
you can createudfs in intellij and then build these udfs in one jar. After that 
you can specify flink.udf.jars to this jar, and flinkinterpreter will detect 
all the udfs in this jar and register all the udfs to TableEnvironment, the udf 
name is the class name.ZeppelinContextZeppelin automatically injects 
ZeppelinContext as variable z in your Scala/Python environment. ZeppelinContext 
provides some additional functions and utilities.See Zeppelin-Context for more 
details.IPython SupportBy default, zeppelin would use IPython in %flink.pyflink 
when IPython is available, Otherwise it would fall back to the original python 
implementation.For the
  IPython features, you can refer docPython InterpreterTutorial NotesZeppelin 
is shipped with several Flink tutorial notes which may be helpful for you. 
Except the first one, the below 4 notes cover the 4 main scenarios of 
flink.Flink BasicBatch ETLExploratory Data AnalyticsStreaming ETLStreaming Data 
Analytics",
       "url": " /interpreter/flink.html",
       "group": "interpreter",
       "excerpt": "Apache Flink is an open source platform for distributed 
stream and batch data processing."
     }
     ,
-    
-  
+
+
 
     "/interpreter/postgresql.html": {
       "title": "PostgreSQL, Apache HAWQ (incubating) Interpreter for Apache 
Zeppelin",
@@ -229,8 +262,19 @@
       "excerpt": "Apache Zeppelin supports PostgreSQL, Apache HAWQ(incubating) 
and Greenplum SQL data processing engines."
     }
     ,
-    
-  
+
+
+
+    "/interpreter/sparql.html": {
+      "title": "SPARQL Interpreter for Apache Zeppelin",
+      "content"  : "<!--Licensed under the Apache License, Version 2.0 (the 
"License");you may not use this file except in compliance with the 
License.You may obtain a copy of the License 
athttp://www.apache.org/licenses/LICENSE-2.0Unless required by applicable law 
or agreed to in writing, softwaredistributed under the License is distributed 
on an "AS IS" BASIS,WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
either express or implied.See the License for the specific language governing 
permissions andlimitations under the License.-->SPARQL Interpreter for 
Apache ZeppelinOverviewSPARQL is an RDF query language able to retrieve and 
manipulate data stored in Resource Description Framework (RDF) format.Apache 
Zeppelin for now only supports Apache Jena to query SPARQL-Endpoints.To query 
your endpoint configure it in the Interpreter-Settings and use the %sparql 
interpreter.Then write your query in the paragraph.If you want the prefixes to 
replace the URI's, 
 set the replaceURIs setting.Configuration      Name    Default Value    
Description        sparql.engine    jena    The sparql engine to use for the 
queries        sparql.endpoint    http://dbpedia.org/sparql    Complete URL of 
the endpoint        sparql.replaceURIs    true    Replace the URIs in the 
result with the prefixes        sparql.removeDatatypes    true    Remove the 
datatypes from Literals so Zeppelin can use the values  
ExampleAcknowledgementThis work was partially supported by the Bavarian State 
Ministry of Economic Affairs,Regional Development and Energy within the 
framework of the Bavarian Research andDevelopment Program "Information 
and Communication Technology".",
+      "url": " /interpreter/sparql.html",
+      "group": "interpreter",
+      "excerpt": "SPARQL is an RDF query language able to retrieve and 
manipulate data stored in Resource Description Framework (RDF) format. Apache 
Zeppelin uses Apache Jena"
+    }
+    ,
+
+
 
     "/interpreter/cassandra.html": {
       "title": "Cassandra CQL Interpreter for Apache Zeppelin",
@@ -240,8 +284,8 @@
       "excerpt": "Apache Cassandra database is the right choice when you need 
scalability and high availability without compromising performance."
     }
     ,
-    
-  
+
+
 
     "/interpreter/lens.html": {
       "title": "Lens Interpreter for Apache Zeppelin",
@@ -251,8 +295,8 @@
       "excerpt": "Apache Lens provides an Unified Analytics interface. Lens 
aims to cut the Data Analytics silos by providing a single view of data across 
multiple tiered data stores and optimal execution environment for the 
analytical query. It seamlessly integrates Hadoop with..."
     }
     ,
-    
-  
+
+
 
     "/interpreter/elasticsearch.html": {
       "title": "Elasticsearch Interpreter for Apache Zeppelin",
@@ -262,8 +306,8 @@
       "excerpt": "Elasticsearch is a highly scalable open-source full-text 
search and analytics engine."
     }
     ,
-    
-  
+
+
 
     "/interpreter/jdbc.html": {
       "title": "Generic JDBC Interpreter for Apache Zeppelin",
@@ -273,8 +317,8 @@
       "excerpt": "Generic JDBC Interpreter lets you create a JDBC connection 
to any data source. You can use Postgres, MySql, MariaDB, Redshift, Apache 
Hive, Apache Phoenix, Apache Drill and Apache Tajo using JDBC interpreter."
     }
     ,
-    
-  
+
+
 
     "/interpreter/neo4j.html": {
       "title": "Neo4j Interpreter for Apache Zeppelin",
@@ -284,8 +328,8 @@
       "excerpt": "Neo4j is a native graph database, designed to store and 
process graphs from bottom to top."
     }
     ,
-    
-  
+
+
 
     "/interpreter/bigquery.html": {
       "title": "BigQuery Interpreter for Apache Zeppelin",
@@ -295,8 +339,8 @@

[... 793 lines stripped ...]

Reply via email to