Modified: zeppelin/site/docs/0.9.0-SNAPSHOT/search_data.json URL: http://svn.apache.org/viewvc/zeppelin/site/docs/0.9.0-SNAPSHOT/search_data.json?rev=1876141&r1=1876140&r2=1876141&view=diff ============================================================================== --- zeppelin/site/docs/0.9.0-SNAPSHOT/search_data.json (original) +++ zeppelin/site/docs/0.9.0-SNAPSHOT/search_data.json Sun Apr 5 05:06:30 2020 @@ -1,5 +1,5 @@ { - + "/interpreter/livy.html": { "title": "Livy Interpreter for Apache Zeppelin", @@ -9,8 +9,19 @@ "excerpt": "Livy is an open source REST interface for interacting with Spark from anywhere. It supports executing snippets of code or programs in a Spark context that runs locally or in YARN." } , - - + + + + "/interpreter/ksql.html": { + "title": "KSQL Interpreter for Apache Zeppelin", + "content" : "<!--Licensed under the Apache License, Version 2.0 (the "License");you may not use this file except in compliance with the License.You may obtain a copy of the License athttp://www.apache.org/licenses/LICENSE-2.0Unless required by applicable law or agreed to in writing, softwaredistributed under the License is distributed on an "AS IS" BASIS,WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.See the License for the specific language governing permissions andlimitations under the License.-->KSQL Interpreter for Apache ZeppelinOverviewKSQL is the streaming SQL engine for Apache Kafka®. It provides an easy-to-use yet powerful interactive SQL interface for stream processing on Kafka,Configuration Property Default Description ksql.url http://localhost:8080 The KSQL Endpoint base URL N.b. The interpreter supports all the KSQL properties, i.e. ksql.streams.auto.offset. reset.The full list of KSQL parameters is here.Using the KSQL InterpreterIn a paragraph, use %ksql and start your SQL query in order to start to interact with KSQL.Following some examples:%ksqlPRINT &#39;orders&#39;;%ksqlCREATE STREAM ORDERS WITH (VALUE_FORMAT=&#39;AVRO&#39;, KAFKA_TOPIC =&#39;orders&#39;);%ksqlSELECT *FROM ORDERSLIMIT 10", + "url": " /interpreter/ksql.html", + "group": "interpreter", + "excerpt": "SQL is the streaming SQL engine for Apache Kafka and provides an easy-to-use yet powerful interactive SQL interface for stream processing on Kafka." + } + , + + "/interpreter/pig.html": { "title": "Pig Interpreter for Apache Zeppelin", @@ -20,8 +31,8 @@ "excerpt": "Apache Pig is a platform for analyzing large data sets that consists of a high-level language for expressing data analysis programs, coupled with infrastructure for evaluating these programs." } , - - + + "/interpreter/markdown.html": { "title": "Markdown Interpreter for Apache Zeppelin", @@ -31,8 +42,8 @@ "excerpt": "Markdown is a plain text formatting syntax designed so that it can be converted to HTML. Apache Zeppelin uses markdown4j." } , - - + + "/interpreter/submarine.html": { "title": "Apache Hadoop Submarine Interpreter for Apache Zeppelin", @@ -42,8 +53,8 @@ "excerpt": "Hadoop Submarine is the latest machine learning framework subproject in the Hadoop 3.1 release. It allows Hadoop to support Tensorflow, MXNet, Caffe, Spark, etc." } , - - + + "/interpreter/mahout.html": { "title": "Mahout Interpreter for Apache Zeppelin", @@ -53,30 +64,41 @@ "excerpt": "Apache Mahout provides a unified API (the R-Like Scala DSL) for quickly creating machine learning algorithms on a variety of engines." } , - - + + + + "/interpreter/kotlin.html": { + "title": "Kotlin interpreter in Apache Zeppelin", + "content" : "<!--Licensed under the Apache License, Version 2.0 (the "License");you may not use this file except in compliance with the License.You may obtain a copy of the License athttp://www.apache.org/licenses/LICENSE-2.0Unless required by applicable law or agreed to in writing, softwaredistributed under the License is distributed on an "AS IS" BASIS,WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.See the License for the specific language governing permissions andlimitations under the License.-->Kotlin interpreter for Apache ZeppelinOverviewKotlin is a cross-platform, statically typed, general-purpose programming language with type inference.It is designed to interoperate fully with Java, and the JVM version of its standard library depends on the Java Class Library, but type inference allows its syntax to be more concise.Configuration Name Default Description zeppelin.kot lin.maxResult 1000 Max n zeppelin.kotlin.shortenTypes true Display shortened types instead of full, e.g. Int vs kotlin.Int Example%kotlin fun square(n: Int): Int = n * nKotlin ContextKotlin context is accessible via kc object bound to the interpreter. It holds vars and functions fields that return all user-defined variables and functions present in the interpreter.You can also print variables or functions by calling kc.showVars() or kc.showFunctions().Examplefun square(n: Int): Int = n * nval greeter = { s: String -&gt; println(&quot;Hello $s!&quot;) }val l = listOf(&quot;Drive&quot;, &quot;to&quot;, &quot;develop&quot;)kc.showVars()kc.showFunctions()Output:l: List&lt;String&gt; = [Drive, to, develop]greeter: (String) -&gt; Unit = (kotlin.String) -&gt; kotlin.Unitfun square(Int): Int", + "url": " /interpreter/kotlin.html", + "group": "interpreter", + "excerpt": "Kotlin is a cross-platform, statically typed, general-purpose programming language with type inference." + } + , + + "/interpreter/spark.html": { "title": "Apache Spark Interpreter for Apache Zeppelin", - "content" : "<!--Licensed under the Apache License, Version 2.0 (the "License");you may not use this file except in compliance with the License.You may obtain a copy of the License athttp://www.apache.org/licenses/LICENSE-2.0Unless required by applicable law or agreed to in writing, softwaredistributed under the License is distributed on an "AS IS" BASIS,WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.See the License for the specific language governing permissions andlimitations under the License.-->Spark Interpreter for Apache ZeppelinOverviewApache Spark is a fast and general-purpose cluster computing system.It provides high-level APIs in Java, Scala, Python and R, and an optimized engine that supports general execution graphs.Apache Spark is supported in Zeppelin with Spark interpreter group which consists of below five interpreters. Name Class Description %spark SparkInterpreter Creates a SparkConte xt and provides a Scala environment %spark.pyspark PySparkInterpreter Provides a Python environment %spark.r SparkRInterpreter Provides an R environment with SparkR support %spark.sql SparkSQLInterpreter Provides a SQL environment %spark.dep DepInterpreter Dependency loader ConfigurationThe Spark interpreter can be configured with properties provided by Zeppelin.You can also set other Spark properties which are not listed in the table. For a list of additional properties, refer to Spark Available Properties. Property Default Description args Spark commandline args master local[*] Spark master uri. ex) spark://masterhost:7077 spark.app.name Zeppelin The name of spark application. spark.cores.max Total number of cores to use. Empty value uses all available core. spark.executor.memory 1g Executor memory per worker instance. ex) 512m, 32g zeppelin.dep .additionalRemoteRepository spark-packages, http://dl.bintray.com/spark-packages/maven, false; A list of id,remote-repository-URL,is-snapshot; for each remote repository. zeppelin.dep.localrepo local-repo Local repository for dependency loader PYSPARK_PYTHON python Python binary executable to use for PySpark in both driver and workers (default is python). Property spark.pyspark.python take precedence if it is set PYSPARK_DRIVER_PYTHON python Python binary executable to use for PySpark in driver only (default is PYSPARK_PYTHON). Property spark.pyspark.driver.python take precedence if it is set zeppelin.spark.concurrentSQL false Execute multiple SQL concurrently if set true. zeppelin.spark.concurrentSQL.max 10 Max number of SQL concurrently executed zeppelin.spark.maxResult 1000 Max number of Spark SQL result to display. zeppelin.spark.printREPLOutput true Print RE PL output zeppelin.spark.useHiveContext true Use HiveContext instead of SQLContext if it is true. zeppelin.spark.importImplicit true Import implicits, UDF collection, and sql if set true. zeppelin.spark.enableSupportedVersionCheck true Do not change - developer only setting, not for production use zeppelin.spark.sql.interpolation false Enable ZeppelinContext variable interpolation into paragraph text zeppelin.spark.uiWebUrl Overrides Spark UI default URL. Value should be a full URL (ex: http://{hostName}/{uniquePath} zeppelin.spark.scala.color true Whether to enable color output of spark scala interpreter Without any configuration, Spark interpreter works out of box in local mode. But if you want to connect to your Spark cluster, you&#39;ll need to follow below two simple steps.1. Export SPARK_HOMEIn conf/zeppelin-env.sh, export SPARK_HOME environment variable with your Spark installation path.For example, export SPARK_HOME=/usr/lib/sparkYou can optionally set more environment variables# set hadoop conf direxport HADOOP_CONF_DIR=/usr/lib/hadoop# set options to pass spark-submit commandexport SPARK_SUBMIT_OPTIONS=&quot;--packages com.databricks:spark-csv_2.10:1.2.0&quot;# extra classpath. e.g. set classpath for hive-site.xmlexport ZEPPELIN_INTP_CLASSPATH_OVERRIDES=/etc/hive/confFor Windows, ensure you have winutils.exe in %HADOOP_HOME%bin. Please see Problems running Hadoop on Windows for the details.2. Set master in Interpreter menuAfter start Zeppelin, go to Interpreter menu and edit master property in your Spark interpreter setting. The value may vary depending on your Spark cluster deployment type.For example,local[*] in local modespark://master:7077 in standalone clusteryarn-client in Yarn client modeyarn-cluster in Yarn cluster modemesos://host:5050 in Mesos clusterThat&#39;s it. Zeppelin will work with any version of Spark and any deployment type without rebuilding Z eppelin in this way.For the further information about Spark &amp; Zeppelin version compatibility, please refer to &quot;Available Interpreters&quot; section in Zeppelin download page.Note that without exporting SPARK_HOME, it&#39;s running in local mode with included version of Spark. The included version may vary depending on the build profile.3. Yarn modeZeppelin support both yarn client and yarn cluster mode (yarn cluster mode is supported from 0.8.0). For yarn mode, you must specify SPARK_HOME &amp; HADOOP_CONF_DIR.You can either specify them in zeppelin-env.sh, or in interpreter setting page. Specifying them in zeppelin-env.sh means you can use only one version of spark &amp; hadoop. Specifying themin interpreter setting page means you can use multiple versions of spark &amp; hadoop in one zeppelin instance.4. New Version of SparkInterpreterStarting from 0.9, we totally removed the old spark interpreter implementation, and make the new spark interpre ter as the official spark interpreter.SparkContext, SQLContext, SparkSession, ZeppelinContextSparkContext, SQLContext and ZeppelinContext are automatically created and exposed as variable names sc, sqlContext and z, respectively, in Scala, Python and R environments.Staring from 0.6.1 SparkSession is available as variable spark when you are using Spark 2.x.Note that Scala/Python/R environment shares the same SparkContext, SQLContext and ZeppelinContext instance. How to pass property to SparkConfThere&#39;re 2 kinds of properties that would be passed to SparkConfStandard spark property (prefix with spark.). e.g. spark.executor.memory will be passed to SparkConfNon-standard spark property (prefix with zeppelin.spark.). e.g. zeppelin.spark.property_1, property_1 will be passed to SparkConfDependency ManagementFor spark interpreter, you should not use Zeppelin&#39;s Dependency Management for managing third party dependencies, (%spark.dep also is not the recommended approach star ting from Zeppelin 0.8). Instead you should set spark properties (spark.jars, spark.files, spark.jars.packages) in 2 ways. spark-defaults.conf SPARK_SUBMIT_OPTIONS Description spark.jars --jars Comma-separated list of local jars to include on the driver and executor classpaths. spark.jars.packages --packages Comma-separated list of maven coordinates of jars to include on the driver and executor classpaths. Will search the local maven repo, then maven central and any additional remote repositories given by --repositories. The format for the coordinates should be groupId:artifactId:version. spark.files --files Comma-separated list of files to be placed in the working directory of each executor. 1. Set spark properties in zeppelin side.In zeppelin side, you can either set them in spark interpreter setting page or via Generic ConfInterpreter.It is not recommended to set them in SPARK_SUBMIT_OPTIONS. Because it will be shared by all spar k interpreters, you can not set different dependencies for different users.2. Set spark properties in spark side.In spark side, you can set them in spark-defaults.conf.e.g. spark.jars /path/mylib1.jar,/path/mylib2.jar spark.jars.packages com.databricks:spark-csv_2.10:1.2.0 spark.files /path/mylib1.py,/path/mylib2.egg,/path/mylib3.zipZeppelinContextZeppelin automatically injects ZeppelinContext as variable z in your Scala/Python environment. ZeppelinContext provides some additional functions and utilities.See Zeppelin-Context for more details.Matplotlib Integration (pyspark)Both the python and pyspark interpreters have built-in support for inline visualization using matplotlib,a popular plotting library for python. More details can be found in the python interpreter documentation,since matplotlib support is identical. More advanced interactive plotting can be done with pyspark throughutilizing Zeppelin&#39;s built-in Angular Display System, as shown below: Running spark sql concurrentlyBy default, each sql statement would run sequentially in %spark.sql. But you can run them concurrently by following setup.set zeppelin.spark.concurrentSQL to true to enable the sql concurrent feature, underneath zeppelin will change to use fairscheduler for spark. And also set zeppelin.spark.concurrentSQL.max to control the max number of sql statements running concurrently.configure pools by creating fairscheduler.xml under your SPARK_CONF_DIR, check the offical spark doc Configuring Pool Propertiesset pool property via setting paragraph property. e.g.%spark(pool=pool1)sql statementThis feature is available for both all versions of scala spark, pyspark. For sparkr, it is only available starting from 2.3.0.Interpreter setting optionYou can choose one of shared, scoped and isolated options wheh you configure Spark interpreter.Spark interpreter creates separated Scala compiler per each notebook but share a single SparkContext in scoped mode (experimental). It creates separated SparkContext per each notebook in isolated mode.IPython supportBy default, zeppelin would use IPython in pyspark when IPython is available, Otherwise it would fall back to the original PySpark implementation.If you don&#39;t want to use IPython, then you can set zeppelin.pyspark.useIPython as false in interpreter setting. For the IPython features, you can refer docPython InterpreterSetting up Zeppelin with KerberosLogical setup with Zeppelin, Kerberos Key Distribution Center (KDC), and Spark on YARN:Deprecate Spark 2.2 and earlier versionsStarting from 0.9, Zeppelin deprecate Spark 2.2 and earlier versions. So you will see a warning message when you use Spark 2.2 and earlier.You can get rid of this message by setting zeppelin.spark.deprecatedMsg.show to false.Configuration SetupOn the server that Zeppelin is installed, install Kerberos client modules and configuration, krb5.conf.This is to make the server communicate with KDC.Set SPARK_HOME in [ZEPPELIN_HOME ]/conf/zeppelin-env.sh to use spark-submit(Additionally, you might have to set export HADOOP_CONF_DIR=/etc/hadoop/conf)Add the two properties below to Spark configuration ([SPARK_HOME]/conf/spark-defaults.conf):spark.yarn.principalspark.yarn.keytabNOTE: If you do not have permission to access for the above spark-defaults.conf file, optionally, you can add the above lines to the Spark Interpreter setting through the Interpreter tab in the Zeppelin UI.That&#39;s it. Play with Zeppelin!", + "content" : "<!--Licensed under the Apache License, Version 2.0 (the "License");you may not use this file except in compliance with the License.You may obtain a copy of the License athttp://www.apache.org/licenses/LICENSE-2.0Unless required by applicable law or agreed to in writing, softwaredistributed under the License is distributed on an "AS IS" BASIS,WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.See the License for the specific language governing permissions andlimitations under the License.-->Spark Interpreter for Apache ZeppelinOverviewApache Spark is a fast and general-purpose cluster computing system.It provides high-level APIs in Java, Scala, Python and R, and an optimized engine that supports general execution graphs.Apache Spark is supported in Zeppelin with Spark interpreter group which consists of below six interpreters. Name Class Description %spark SparkInterpreter Creates a SparkContex t/SparkSession and provides a Scala environment %spark.pyspark PySparkInterpreter Provides a Python environment %spark.ipyspark IPySparkInterpreter Provides a IPython environment %spark.r SparkRInterpreter Provides an R environment with SparkR support %spark.sql SparkSQLInterpreter Provides a SQL environment %spark.kotlin KotlinSparkInterpreter Provides a Kotlin environment ConfigurationThe Spark interpreter can be configured with properties provided by Zeppelin.You can also set other Spark properties which are not listed in the table. For a list of additional properties, refer to Spark Available Properties. Property Default Description SPARK_HOME Location of spark distribution master local[*] Spark master uri. e.g. spark://masterhost:7077 spark.app.name Zeppelin The name of spark application. spark.driver.cores 1 Number of cores to use for the driver proc ess, only in cluster mode. spark.driver.memory 1g Amount of memory to use for the driver process, i.e. where SparkContext is initialized, in the same format as JVM memory strings with a size unit suffix (&quot;k&quot;, &quot;m&quot;, &quot;g&quot; or &quot;t&quot;) (e.g. 512m, 2g). spark.executor.cores 1 The number of cores to use on each executor spark.executor.memory 1g Executor memory per worker instance. e.g. 512m, 32g spark.files Comma-separated list of files to be placed in the working directory of each executor. Globs are allowed. spark.jars Comma-separated list of jars to include on the driver and executor classpaths. Globs are allowed. spark.jars.packages Comma-separated list of Maven coordinates of jars to include on the driver and executor classpaths. The coordinates should be groupId:artifactId:version. If spark.jars.ivySettings is given artifacts will be res olved according to the configuration in the file, otherwise artifacts will be searched for in the local maven repo, then maven central and finally any additional remote repositories given by the command-line option --repositories. PYSPARK_PYTHON python Python binary executable to use for PySpark in both driver and executors (default is python). Property spark.pyspark.python take precedence if it is set PYSPARK_DRIVER_PYTHON python Python binary executable to use for PySpark in driver only (default is PYSPARK_PYTHON). Property spark.pyspark.driver.python take precedence if it is set zeppelin.pyspark.useIPython false Whether use IPython when the ipython prerequisites are met in %spark.pyspark zeppelin.R.cmd R R binary executable path. zeppelin.spark.concurrentSQL false Execute multiple SQL concurrently if set true. zeppelin.spark.concurrentSQL.max 10 Max number of SQL concurrently execu ted zeppelin.spark.maxResult 1000 Max number rows of Spark SQL result to display. zeppelin.spark.printREPLOutput true Print scala REPL output zeppelin.spark.useHiveContext true Use HiveContext instead of SQLContext if it is true. Enable hive for SparkSession zeppelin.spark.enableSupportedVersionCheck true Do not change - developer only setting, not for production use zeppelin.spark.sql.interpolation false Enable ZeppelinContext variable interpolation into spark sql zeppelin.spark.uiWebUrl Overrides Spark UI default URL. Value should be a full URL (ex: http://{hostName}/{uniquePath} spark.webui.yarn.useProxy false whether use yarn proxy url as spark weburl, e.g. http://localhost:8088/proxy/application1583396598068_0004 Without any configuration, Spark interpreter works out of box in local mode. But if you want to connect to your Spark cluster, you&#39;ll need to follow below two simple steps.Ex port SPARK_HOMEThere are several options for setting SPARK_HOME.Set SPARK_HOME in zeppelin-env.shSet SPARK_HOME in Interpreter setting pageSet SPARK_HOME via inline generic configuration 1. Set SPARK_HOME in zeppelin-env.shIf you work with only one version of spark, then you can set SPARK_HOME in zeppelin-env.sh because any setting in zeppelin-env.sh is globally applied.e.g. export SPARK_HOME=/usr/lib/sparkYou can optionally set more environment variables in zeppelin-env.sh# set hadoop conf direxport HADOOP_CONF_DIR=/usr/lib/hadoop2. Set SPARK_HOME in Interpreter setting pageIf you want to use multiple versions of spark, then you need create multiple spark interpreters and set SPARK_HOME for each of them. e.g.Create a new spark interpreter spark24 for spark 2.4 and set SPARK_HOME in interpreter setting pageCreate a new spark interpreter spark16 for spark 1.6 and set SPARK_HOME in interpreter setting page3. Set SPARK_HOME via inline generic configurationBesides setting SPARK_HOME in interpreter setting page, you can also use inline generic configuration to put the configuration with code together for more flexibility. e.g.Set master in Interpreter menuAfter starting Zeppelin, go to Interpreter menu and edit master property in your Spark interpreter setting. The value may vary depending on your Spark cluster deployment type.For example,local[*] in local modespark://master:7077 in standalone clusteryarn-client in Yarn client modeyarn-cluster in Yarn cluster modemesos://host:5050 in Mesos clusterThat&#39;s it. Zeppelin will work with any version of Spark and any deployment type without rebuilding Zeppelin in this way.For the further information about Spark &amp; Zeppelin version compatibility, please refer to &quot;Available Interpreters&quot; section in Zeppelin download page.Note that without exporting SPARK_HOME, it&#39;s running in local mode with included version of Spark. The included version may vary depending on the build profile.SparkC ontext, SQLContext, SparkSession, ZeppelinContextSparkContext, SQLContext, SparkSession (for spark 2.x) and ZeppelinContext are automatically created and exposed as variable names sc, sqlContext, spark and z, respectively, in Scala, Kotlin, Python and R environments.Note that Scala/Python/R environment shares the same SparkContext, SQLContext, SparkSession and ZeppelinContext instance.YARN ModeZeppelin support both yarn client and yarn cluster mode (yarn cluster mode is supported from 0.8.0). For yarn mode, you must specify SPARK_HOME &amp; HADOOP_CONF_DIR. Usually you only have one hadoop cluster, so you can set HADOOP_CONF_DIR in zeppelin-env.sh which is applied to all spark interpreters. If you want to use spark against multiple hadoop cluster, then you need to defineHADOOP_CONF_DIR in interpreter setting or via inline generic configuration.Dependency ManagementFor spark interpreter, it is not recommended to use Zeppelin&#39;s Dependency Management for managing third part y dependencies (%spark.dep is removed from Zeppelin 0.9 as well). Instead you should set the standard Spark properties. Spark Property Spark Submit Argument Description spark.files --files Comma-separated list of files to be placed in the working directory of each executor. Globs are allowed. spark.jars --jars Comma-separated list of jars to include on the driver and executor classpaths. Globs are allowed. spark.jars.packages --packages Comma-separated list of Maven coordinates of jars to include on the driver and executor classpaths. The coordinates should be groupId:artifactId:version. If spark.jars.ivySettings is given artifacts will be resolved according to the configuration in the file, otherwise artifacts will be searched for in the local maven repo, then maven central and finally any additional remote repositories given by the command-line option --repositories. You can either set Spark properties in interpreter setting page or set Spark submit arguments in zeppelin-env.sh via environment variable SPARK_SUBMIT_OPTIONS. For examples:export SPARK_SUBMIT_OPTIONS=&quot;--files &lt;my_file&gt; --jars &lt;my_jar&gt; --packages &lt;my_package&gt;&quot;But it is not recommended to set them in SPARK_SUBMIT_OPTIONS. Because it will be shared by all spark interpreters, which means you can not set different dependencies for different users.PySparkThere&#39;re 2 ways to use PySpark in Zeppelin:Vanilla PySparkIPySparkVanilla PySpark (Not Recommended)Vanilla PySpark interpreter is almost the same as vanilla Python interpreter except Zeppelin inject SparkContext, SQLContext, SparkSession via variables sc, sqlContext, spark.By default, Zeppelin would use IPython in %spark.pyspark when IPython is available, Otherwise it would fall back to the original PySpark implementation.If you don&#39;t want to use IPython, then you can set zeppelin.pyspark.useIPython as false in interpreter se tting. For the IPython features, you can refer docPython InterpreterIPySpark (Recommended)You can use IPySpark explicitly via %spark.ipyspark. IPySpark interpreter is almost the same as IPython interpreter except Zeppelin inject SparkContext, SQLContext, SparkSession via variables sc, sqlContext, spark.For the IPython features, you can refer doc Python InterpreterSparkRZeppelin support SparkR via %spark.r. Here&#39;s configuration for SparkR Interpreter. Spark Property Default Description zeppelin.R.cmd R R binary executable path. zeppelin.R.knitr true Whether use knitr or not. (It is recommended to install knitr and use it in Zeppelin) zeppelin.R.image.width 100% R plotting image width. zeppelin.R.render.options out.format = 'html', comment = NA, echo = FALSE, results = 'asis', message = F, warning = F, fig.retina = 2 R plotting options. SparkSqlSpark Sql Interpreter share the same SparkContext/Spar kSession with other Spark interpreter. That means any table registered in scala, python or r code can be accessed by Spark Sql.For examples:%sparkcase class People(name: String, age: Int)var df = spark.createDataFrame(List(People(&quot;jeff&quot;, 23), People(&quot;andy&quot;, 20)))df.createOrReplaceTempView(&quot;people&quot;)%spark.sqlselect * from peopleBy default, each sql statement would run sequentially in %spark.sql. But you can run them concurrently by following setup.Set zeppelin.spark.concurrentSQL to true to enable the sql concurrent feature, underneath zeppelin will change to use fairscheduler for spark. And also set zeppelin.spark.concurrentSQL.max to control the max number of sql statements running concurrently.Configure pools by creating fairscheduler.xml under your SPARK_CONF_DIR, check the official spark doc Configuring Pool PropertiesSet pool property via setting paragraph property. e.g.%spark(pool=pool1)sql statementThis pool feature is als o available for all versions of scala Spark, PySpark. For SparkR, it is only available starting from 2.3.0.Interpreter Setting OptionYou can choose one of shared, scoped and isolated options when you configure Spark interpreter.e.g. In scoped per user mode, Zeppelin creates separated Scala compiler for each user but share a single SparkContext.In isolated per user mode, Zeppelin creates separated SparkContext for each user.ZeppelinContextZeppelin automatically injects ZeppelinContext as variable z in your Scala/Python environment. ZeppelinContext provides some additional functions and utilities.See Zeppelin-Context for more details.User ImpersonationIn yarn mode, the user who launch the zeppelin server will be used to launch the spark yarn application. This is not a good practise.Most of time, you will enable shiro in Zeppelin and would like to use the login user to submit the spark yarn app. For this purpose,you need to enable user impersonation for more security control. In order the enable user impersonation, you need to do the following stepsStep 1 Enable user impersonation setting hadoop&#39;s core-site.xml. E.g. if you are using user zeppelin to launch Zeppelin, then add the following to core-site.xml, then restart both hdfs and yarn. &lt;property&gt; &lt;name&gt;hadoop.proxyuser.zeppelin.groups&lt;/name&gt; &lt;value&gt;*&lt;/value&gt;&lt;/property&gt;&lt;property&gt; &lt;name&gt;hadoop.proxyuser.zeppelin.hosts&lt;/name&gt; &lt;value&gt;*&lt;/value&gt;&lt;/property&gt;Step 2 Enable interpreter user impersonation in Spark interpreter&#39;s interpreter setting. (Enable shiro first of course)Step 3(Optional) If you are using kerberos cluster, then you need to set zeppelin.server.kerberos.keytab and zeppelin.server.kerberos.principal to the user(aka. user in Step 1) you want to impersonate in zeppelin-site.xml.Setting up Zeppelin with KerberosLogical setup with Zeppelin, Kerberos Key Distribution Center (KDC), and Spark on YARN:Deprecate Spark 2.2 and earlier versionsStarting from 0.9, Zeppelin deprecate Spark 2.2 and earlier versions. So you will see a warning message when you use Spark 2.2 and earlier.You can get rid of this message by setting zeppelin.spark.deprecatedMsg.show to false.Configuration SetupOn the server that Zeppelin is installed, install Kerberos client modules and configuration, krb5.conf.This is to make the server communicate with KDC.Add the two properties below to Spark configuration ([SPARK_HOME]/conf/spark-defaults.conf):spark.yarn.principalspark.yarn.keytabNOTE: If you do not have permission to access for the above spark-defaults.conf file, optionally, you can add the above lines to the Spark Interpreter setting through the Interpreter tab in the Zeppelin UI.That&#39;s it. Play with Zeppelin!", "url": " /interpreter/spark.html", "group": "interpreter", "excerpt": "Apache Spark is a fast and general-purpose cluster computing system. It provides high-level APIs in Java, Scala, Python and R, and an optimized engine that supports general execution engine." } , - - + + "/interpreter/python.html": { "title": "Python 2 & 3 Interpreter for Apache Zeppelin", - "content" : "<!--Licensed under the Apache License, Version 2.0 (the "License");you may not use this file except in compliance with the License.You may obtain a copy of the License athttp://www.apache.org/licenses/LICENSE-2.0Unless required by applicable law or agreed to in writing, softwaredistributed under the License is distributed on an "AS IS" BASIS,WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.See the License for the specific language governing permissions andlimitations under the License.-->Python 2 &amp; 3 Interpreter for Apache ZeppelinConfiguration Property Default Description zeppelin.python python Path of the already installed Python binary (could be python2 or python3). If python is not in your $PATH you can set the absolute directory (example : /usr/bin/python) zeppelin.python.maxResult 1000 Max number of dataframe rows to display. Enabling Python InterpreterIn a notebook, to enable the Python interpreter, click on the Gear icon and select PythonUsing the Python InterpreterIn a paragraph, use %python to select the Python interpreter and then input all commands.The interpreter can only work if you already have python installed (the interpreter doesn&#39;t bring it own python binaries).To access the help, type help()Python environmentsDefaultBy default, PythonInterpreter will use python command defined in zeppelin.python property to run python process.The interpreter can use all modules already installed (with pip, easy_install...)CondaConda is an package management system and environment management system for python.%python.conda interpreter lets you change between environments.Usageget the Conda Infomation: %python.conda infolist the Conda environments: %python.conda env listcreate a conda enviornment: %python.conda create --name [ENV NAME]activate an environment (python interpreter will be restarted): %python.conda activate [ENV NAME]d eactivate%python.conda deactivateget installed package list inside the current environment%python.conda listinstall package%python.conda install [PACKAGE NAME]uninstall package%python.conda uninstall [PACKAGE NAME]Docker%python.docker interpreter allows PythonInterpreter creates python process in a specified docker container.Usageactivate an environment%python.docker activate [Repository]%python.docker activate [Repository:Tag]%python.docker activate [Image Id]deactivate%python.docker deactivateHere is an example# activate latest tensorflow image as a python environment%python.docker activate gcr.io/tensorflow/tensorflow:latestUsing Zeppelin Dynamic FormsYou can leverage Zeppelin Dynamic Form inside your Python code.Zeppelin Dynamic Form can only be used if py4j Python library is installed in your system. If not, you can install it with pip install py4j.Example : %python### Input formprint (z.input(&quot;f1&quot;,&quot;defaultValue&quot;))### Select formprint (z.sele ct(&quot;f1&quot;,[(&quot;o1&quot;,&quot;1&quot;),(&quot;o2&quot;,&quot;2&quot;)],&quot;2&quot;))### Checkbox formprint(&quot;&quot;.join(z.checkbox(&quot;f3&quot;, [(&quot;o1&quot;,&quot;1&quot;), (&quot;o2&quot;,&quot;2&quot;)],[&quot;1&quot;])))Matplotlib integrationThe python interpreter can display matplotlib figures inline automatically using the pyplot module:%pythonimport matplotlib.pyplot as pltplt.plot([1, 2, 3])This is the recommended method for using matplotlib from within a Zeppelin notebook. The output of this command will by default be converted to HTML by implicitly making use of the %html magic. Additional configuration can be achieved using the builtin z.configure_mpl() method. For example, z.configure_mpl(width=400, height=300, fmt=&#39;svg&#39;)plt.plot([1, 2, 3])Will produce a 400x300 image in SVG format, which by default are normally 600x400 and PNG r espectively. In the future, another option called angular can be used to make it possible to update a plot produced from one paragraph directly from another (the output will be %angular instead of %html). However, this feature is already available in the pyspark interpreter. More details can be found in the included &quot;Zeppelin Tutorial: Python - matplotlib basic&quot; tutorial notebook. If Zeppelin cannot find the matplotlib backend files (which should usually be found in $ZEPPELIN_HOME/interpreter/lib/python) in your PYTHONPATH, then the backend will automatically be set to agg, and the (otherwise deprecated) instructions below can be used for more limited inline plotting.If you are unable to load the inline backend, use z.show(plt):%pythonimport matplotlib.pyplot as pltplt.figure()(.. ..)z.show(plt)plt.close()The z.show() function can take optional parameters to adapt graph dimensions (width and height) as well as output format (png or optionally svg).%pythonz.show(plt , width=&#39;50px&#39;)z.show(plt, height=&#39;150px&#39;, fmt=&#39;svg&#39;)Pandas integrationApache Zeppelin Table Display System provides built-in data visualization capabilities. Python interpreter leverages it to visualize Pandas DataFrames though similar z.show() API, same as with Matplotlib integration.Example:import pandas as pdrates = pd.read_csv(&quot;bank.csv&quot;, sep=&quot;;&quot;)z.show(rates)SQL over Pandas DataFramesThere is a convenience %python.sql interpreter that matches Apache Spark experience in Zeppelin and enables usage of SQL language to query Pandas DataFrames and visualization of results though built-in Table Display System.Pre-requestsPandas pip install pandasPandaSQL pip install -U pandasqlIn case default binded interpreter is Python (first in the interpreter list, under the Gear Icon), you can just use it as %sql i.efirst paragraphimport pandas as pdrates = pd.read_csv(&quot;bank.csv&quot;, sep=&quot; ;&quot;)next paragraph%sqlSELECT * FROM rates WHERE age &lt; 40Otherwise it can be referred to as %python.sqlIPython SupportIPython is more powerful than the default python interpreter with extra functionality. You can use IPython with Python2 or Python3 which depends on which python you set zeppelin.python.Pre-requests- Jupyter `pip install jupyter`- grpcio `pip install grpcio`- protobuf `pip install protobuf`If you already install anaconda, then you just need to install grpcio as Jupyter is already included in anaconda. For grpcio version &gt;= 1.12.0 you&#39;ll also need to install protobuf separately.In addition to all basic functions of the python interpreter, you can use all the IPython advanced features as you use it in Jupyter Notebook.e.g. Use IPython magic%python.ipython#python helprange?#timeit%timeit range(100)Use matplotlib %python.ipython%matplotlib inlineimport matplotlib.pyplot as pltprint(&quot;hello world&quot;)data=[1,2,3,4]plt.figure()plt. plot(data)We also make ZeppelinContext available in IPython Interpreter. You can use ZeppelinContext to create dynamic forms and display pandas DataFrame.e.g.Create dynamic formz.input(name=&#39;my_name&#39;, defaultValue=&#39;hello&#39;)Show pandas dataframeimport pandas as pddf = pd.DataFrame({&#39;id&#39;:[1,2,3], &#39;name&#39;:[&#39;a&#39;,&#39;b&#39;,&#39;c&#39;]})z.show(df)By default, we would use IPython in %python.python if IPython is available. Otherwise it would fall back to the original Python implementation.If you don&#39;t want to use IPython, then you can set zeppelin.python.useIPython as false in interpreter setting.Technical descriptionFor in-depth technical details on current implementation please refer to python/README.md.Some features not yet implemented in the Python InterpreterInterrupt a paragraph execution (cancel() method) is currently only supported in Linux and MacOs. If interpreter runs in anothe r operating system (for instance MS Windows) , interrupt a paragraph will close the whole interpreter. A JIRA ticket (ZEPPELIN-893) is opened to implement this feature in a next release of the interpreter.Progression bar in webUI (getProgress() method) is currently not implemented.Code-completion is currently not implemented.", + "content" : "<!--Licensed under the Apache License, Version 2.0 (the "License");you may not use this file except in compliance with the License.You may obtain a copy of the License athttp://www.apache.org/licenses/LICENSE-2.0Unless required by applicable law or agreed to in writing, softwaredistributed under the License is distributed on an "AS IS" BASIS,WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.See the License for the specific language governing permissions andlimitations under the License.-->Python 2 &amp; 3 Interpreter for Apache ZeppelinOverviewZeppelin supports python language which is very popular in data analytics and machine learning. Name Class Description %python PythonInterpreter Vanilla python interpreter, with least dependencies, only python environment installed is required %python.ipython IPythonInterpreter Provide more fancy python runtime via IPython, almost the s ame experience like Jupyter. It requires more things, but is the recommended interpreter for using python in Zeppelin, see below %python.sql PythonInterpreterPandasSql Provide sql capability to query data in Pandas DataFrame via pandasql Configuration Property Default Description zeppelin.python python Path of the installed Python binary (could be python2 or python3). You should set this property explicitly if python is not in your $PATH(example: /usr/bin/python). zeppelin.python.maxResult 1000 Max number of dataframe rows to display. zeppelin.python.useIPython true When this property is true, %python would be delegated to %python.ipython if IPython is available, otherwise IPython is only used in %python.ipython. Vanilla Python Interpreter (%python)The vanilla python interpreter provides basic python interpreter feature, only python installed is required.Matplotlib integrationThe vanilla python interprete r can display matplotlib figures inline automatically using the matplotlib:%pythonimport matplotlib.pyplot as pltplt.plot([1, 2, 3])The output of this command will by default be converted to HTML by implicitly making use of the %html magic. Additional configuration can be achieved using the builtin z.configure_mpl() method. For example, z.configure_mpl(width=400, height=300, fmt=&#39;svg&#39;)plt.plot([1, 2, 3])Will produce a 400x300 image in SVG format, which by default are normally 600x400 and PNG respectively. In the future, another option called angular can be used to make it possible to update a plot produced from one paragraph directly from another (the output will be %angular instead of %html). However, this feature is already available in the pyspark interpreter. More details can be found in the included &quot;Zeppelin Tutorial: Python - matplotlib basic&quot; tutorial notebook. If Zeppelin cannot find the matplotlib backend files (which should usually be fou nd in $ZEPPELIN_HOME/interpreter/lib/python) in your PYTHONPATH, then the backend will automatically be set to agg, and the (otherwise deprecated) instructions below can be used for more limited inline plotting.If you are unable to load the inline backend, use z.show(plt):%pythonimport matplotlib.pyplot as pltplt.figure()(.. ..)z.show(plt)plt.close()The z.show() function can take optional parameters to adapt graph dimensions (width and height) as well as output format (png or optionally svg).%pythonz.show(plt, width=&#39;50px&#39;)z.show(plt, height=&#39;150px&#39;, fmt=&#39;svg&#39;)IPython Interpreter (%python.ipython) (recommended)IPython is more powerful than the vanilla python interpreter with extra functionality. You can use IPython with Python2 or Python3 which depends on which python you set in zeppelin.python.For non-anaconda environment Prerequisites- Jupyter `pip install jupyter`- grpcio `pip install grpcio`- protobuf `pip install protobuf`For anac onda environment (zeppelin.python points to the python under anaconda)Prerequisites- grpcio `pip install grpcio`- protobuf `pip install protobuf`In addition to all the basic functions of the vanilla python interpreter, you can use all the IPython advanced features as you use it in Jupyter Notebook.e.g. Use IPython magic%python.ipython#python helprange?#timeit%timeit range(100)Use matplotlib%python.ipython%matplotlib inlineimport matplotlib.pyplot as pltprint(&quot;hello world&quot;)data=[1,2,3,4]plt.figure()plt.plot(data)Colored text outputMore types of visualizatione.g. IPython supports hvplotBetter code completionBy default, Zeppelin would use IPython in %python if IPython prerequisites are meet, otherwise it would use vanilla Python interpreter in %python.If you don&#39;t want to use IPython via %python, then you can set zeppelin.python.useIPython as false in interpreter setting.Pandas integrationApache Zeppelin Table Display System provides built-in data visualizatio n capabilities. Python interpreter leverages it to visualize Pandas DataFrames though similar z.show() API, same as with Matplotlib integration.Example:%pythonimport pandas as pdrates = pd.read_csv(&quot;bank.csv&quot;, sep=&quot;;&quot;)z.show(rates)SQL over Pandas DataFramesThere is a convenience %python.sql interpreter that matches Apache Spark experience in Zeppelin and enables usage of SQL language to query Pandas DataFrames and visualization of results though built-in Table Display System.PrerequisitesPandas pip install pandasPandaSQL pip install -U pandasqlHere&#39;s one example:first paragraph%pythonimport pandas as pdrates = pd.read_csv(&quot;bank.csv&quot;, sep=&quot;;&quot;) ```next paragraph%python.sqlSELECT * FROM rates WHERE age &lt; 40 ```Using Zeppelin Dynamic FormsYou can leverage Zeppelin Dynamic Form inside your Python code.Example : %python### Input formprint(z.input(&quot;f1&quot;,&quot;defaultValue&quot; ))### Select formprint(z.select(&quot;f2&quot;,[(&quot;o1&quot;,&quot;1&quot;),(&quot;o2&quot;,&quot;2&quot;)],&quot;o1&quot;))### Checkbox formprint(&quot;&quot;.join(z.checkbox(&quot;f3&quot;, [(&quot;o1&quot;,&quot;1&quot;), (&quot;o2&quot;,&quot;2&quot;)],[&quot;o1&quot;])))ZeppelinContext APIPython interpreter create a variable z which represent ZeppelinContext for you. User can use it to do more fancy and complex things in Zeppelin. API Description z.put(key, value) Put object value with identifier key to distributed resource pool of Zeppelin, so that it can be used by other interpreters z.get(key) Get object with identifier key from distributed resource pool of Zeppelin z.remove(key) Remove object with identifier key from distributed resource pool of Zeppelin z.getAsDataFrame(key) Get object with identifier key from distri buted resource pool of Zeppelin and converted into pandas dataframe. The object in the distributed resource pool must be table type, e.g. jdbc interpreter result. z.angular(name, noteId = None, paragraphId = None) Get the angular object with identifier name z.angularBind(name, value, noteId = None, paragraphId = None) Bind value to angular object with identifier name z.angularUnbind(name, noteId = None) Unbind value from angular object with identifier name z.show(p) Show python object p in Zeppelin, if it is pandas dataframe, it would be displayed in Zeppelin's table format, others will be converted to string z.textbox(name, defaultValue="") Create dynamic form Textbox name with defaultValue z.select(name, options, defaultValue="") Create dynamic form Select name with options and defaultValue. options should be a list of Tuple(first element is key, the second element is the displayed value) e.g. z.select("f2",[("o1","1"),("o2","2")],"o1") z.checkbox(name, options, defaultChecked=[]) Create dynamic form Checkbox `name` with options and defaultChecked. options should be a list of Tuple(first element is key, the second element is the displayed value) e.g. z.checkbox("f3", [("o1","1"), ("o2","2")],["o1"]) z.noteTextbox(name, defaultValue="") Create note level dynamic form Textbox z.noteSelect(name, options, defaultValue="") Create note level dynamic form Select z.noteCheckbox(name, options, defaultChecked=[]) Create note level dynamic form Checkbox z.run(paragraphId) Run paragraph z.run(noteId, paragraphId) Run paragraph z.runNote(noteId) Run the whole note Python environmentsDefaultBy default, PythonInterpreter will use python command defined in zeppeli n.python property to run python process.The interpreter can use all modules already installed (with pip, easy_install...)CondaConda is an package management system and environment management system for python.%python.conda interpreter lets you change between environments.Usageget the Conda Information: %python.conda infolist the Conda environments: %python.conda env listcreate a conda enviornment: %python.conda create --name [ENV NAME]activate an environment (python interpreter will be restarted): %python.conda activate [ENV NAME]deactivate%python.conda deactivateget installed package list inside the current environment%python.conda listinstall package%python.conda install [PACKAGE NAME]uninstall package%python.conda uninstall [PACKAGE NAME]Docker%python.docker interpreter allows PythonInterpreter creates python process in a specified docker container.Usageactivate an environment%python.docker activate [Repository]%python.docker activate [Repository:Tag]%python.docker activate [Imag e Id]deactivate%python.docker deactivateHere is an example# activate latest tensorflow image as a python environment%python.docker activate gcr.io/tensorflow/tensorflow:latestTechnical descriptionFor in-depth technical details on current implementation please refer to python/README.md.Some features not yet implemented in the vanilla Python interpreterInterrupt a paragraph execution (cancel() method) is currently only supported in Linux and MacOs. If interpreter runs in another operating system (for instance MS Windows) , interrupt a paragraph will close the whole interpreter. A JIRA ticket (ZEPPELIN-893) is opened to implement this feature in a next release of the interpreter.Progression bar in webUI (getProgress() method) is currently not implemented.", "url": " /interpreter/python.html", "group": "interpreter", "excerpt": "Python is a programming language that lets you work quickly and integrate systems more effectively." } , - - + + "/interpreter/hive.html": { "title": "Hive Interpreter for Apache Zeppelin", @@ -86,8 +108,8 @@ "excerpt": "Apache Hive data warehouse software facilitates querying and managing large datasets residing in distributed storage. Hive provides a mechanism to project structure onto this data and query the data using a SQL-like language called HiveQL. At the same time this..." } , - - + + "/interpreter/ignite.html": { "title": "Ignite Interpreter for Apache Zeppelin", @@ -97,8 +119,8 @@ "excerpt": "Apache Ignite in-memory Data Fabric is a high-performance, integrated and distributed in-memory platform for computing and transacting on large-scale data sets in real-time, orders of magnitude faster than possible with traditional disk-based or flash technologies." } , - - + + "/interpreter/groovy.html": { "title": "Apache Groovy Interpreter for Apache Zeppelin", @@ -108,8 +130,19 @@ "excerpt": "Apache Groovy is a powerful, optionally typed and dynamic language, with static-typing and static compilation capabilities, for the Java platform aimed at improving developer productivity thanks to a concise, familiar and easy to learn syntax." } , - - + + + + "/interpreter/jupyter.html": { + "title": "Jupyter Interpreter for Apache Zeppelin", + "content" : "<!--Licensed under the Apache License, Version 2.0 (the "License");you may not use this file except in compliance with the License.You may obtain a copy of the License athttp://www.apache.org/licenses/LICENSE-2.0Unless required by applicable law or agreed to in writing, softwaredistributed under the License is distributed on an "AS IS" BASIS,WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.See the License for the specific language governing permissions andlimitations under the License.-->Jupyter Interpreter for Apache ZeppelinOverviewProject Jupyter exists to develop open-source software, open-standards, and services for interactive computing across dozens of programming languages.Zeppelin&#39;s Jupyter interpreter is a bridge/adapter between Zeppelin interpreter and Jupyter kernel. You can use any of jupyter kernel as long as you installed the necessary dependencies.ConfigurationTo run any Jupyter kernel in Ze ppelin you first need to install the following prerequisite:pip install jupyter-clientpip install grpciopip install protobufThen you need install the jupyter kernel you want to use. In the following sections, we will talk about how to use the following 3 jupyter kernels in Zeppelin:ipythonirjuliaJupyter Python kernelIn order to use Jupyter Python kernel in Zeppelin, you need to install ipykernel first. pip install ipykernelThen you can run python code in Jupyter interpreter like following. %jupyter(kernel=python)%matplotlib inlineimport matplotlib.pyplot as pltplt.plot([1, 2, 3])Jupyter R kernelIn order to use IRKernel, you need to first install IRkernel package in R.install.packages(&#39;IRkernel&#39;)IRkernel::installspec() # to register the kernel in the current R installationThen you can run r code in Jupyter interpreter like following. %jupyter(kernel=ir)library(ggplot2)ggplot(mpg, aes(x = displ, y = hwy)) + geom_point()Jupyter Julia kernelIn order to use Julia in Zep pelin, you first need to install IJulia firstusing PkgPkg.add(&quot;IJulia&quot;)Then you can run julia code in Jupyter interpreter like following. %jupyter(kernel=julia-1.3)using PkgPkg.add(&quot;Plots&quot;)using Plotsplotly() # Choose the Plotly.jl backend for web interactivityplot(rand(5,5),linewidth=2,title=&quot;My Plot&quot;)Pkg.add(&quot;PyPlot&quot;) # Install a different backendpyplot() # Switch to using the PyPlot.jl backendplot(rand(5,5),linewidth=2,title=&quot;My Plot&quot;)Use any other kernelFor any other jupyter kernel, you can follow the below steps to use it in Zeppelin.Install the specified jupyter kernel. you can find all the available jupyter kernels here Find its kernel name by run the following commandbashjupyter kernelspec listRun the kernel as following%jupyter(kernel=kernel_name)code", + "url": " /interpreter/jupyter.html", + "group": "interpreter", + "excerpt": "Project Jupyter exists to develop open-source software, open-standards, and services for interactive computing across dozens of programming languages." + } + , + + "/interpreter/sap.html": { "title": "SAP BusinessObjects Interpreter for Apache Zeppelin", @@ -119,8 +152,8 @@ "excerpt": "SAP BusinessObjects BI platform can simplify the lives of business users and IT staff. SAP BusinessObjects is based on universes. The universe contains dual-semantic layer model. The users make queries upon universes. This interpreter is new interface for universes." } , - - + + "/interpreter/kylin.html": { "title": "Apache Kylin Interpreter for Apache Zeppelin", @@ -130,8 +163,8 @@ "excerpt": "Apache Kylin⢠is an open source Distributed Analytics Engine designed to provide SQL interface and multi-dimensional analysis (OLAP) on Hadoop supporting extremely large datasets, original contributed from eBay Inc. ." } , - - + + "/interpreter/hazelcastjet.html": { "title": "Hazelcast Jet interpreter in Apache Zeppelin", @@ -141,8 +174,8 @@ "excerpt": "Build and execture Hazelcast Jet computation jobs." } , - - + + "/interpreter/hdfs.html": { "title": "HDFS File System Interpreter for Apache Zeppelin", @@ -152,8 +185,8 @@ "excerpt": "Hadoop File System is a distributed, fault tolerant file system part of the hadoop project and is often used as storage for distributed processing engines like Hadoop MapReduce and Apache Spark or underlying file systems like Alluxio." } , - - + + "/interpreter/hbase.html": { "title": "HBase Shell Interpreter for Apache Zeppelin", @@ -163,8 +196,8 @@ "excerpt": "HBase Shell is a JRuby IRB client for Apache HBase. This interpreter provides all capabilities of Apache HBase shell within Apache Zeppelin." } , - - + + "/interpreter/beam.html": { "title": "Beam interpreter in Apache Zeppelin", @@ -174,8 +207,8 @@ "excerpt": "Apache Beam is an open source, unified programming model that you can use to create a data processing pipeline." } , - - + + "/interpreter/geode.html": { "title": "Geode/Gemfire OQL Interpreter for Apache Zeppelin", @@ -185,19 +218,19 @@ "excerpt": "Apache Geode (incubating) provides a database-like consistency model, reliable transaction processing and a shared-nothing architecture to maintain very low latency performance with high concurrency processing." } , - - + + "/interpreter/r.html": { "title": "R Interpreter for Apache Zeppelin", - "content" : "<!--Licensed under the Apache License, Version 2.0 (the "License");you may not use this file except in compliance with the License.You may obtain a copy of the License athttp://www.apache.org/licenses/LICENSE-2.0Unless required by applicable law or agreed to in writing, softwaredistributed under the License is distributed on an "AS IS" BASIS,WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.See the License for the specific language governing permissions andlimitations under the License.-->R Interpreter for Apache ZeppelinOverviewR is a free software environment for statistical computing and graphics.To run R code and visualize plots in Apache Zeppelin, you will need R on your master node (or your dev laptop).For Centos: yum install R R-devel libcurl-devel openssl-develFor Ubuntu: apt-get install r-baseValidate your installation with a simple R command:R -e &quot;print(1+1)&quot;To enjoy plots, install additi onal libraries with:devtools with R -e &quot;install.packages(&#39;devtools&#39;, repos = &#39;http://cran.us.r-project.org&#39;)&quot;knitr with R -e &quot;install.packages(&#39;knitr&#39;, repos = &#39;http://cran.us.r-project.org&#39;)&quot;ggplot2 withR -e &quot;install.packages(&#39;ggplot2&#39;, repos = &#39;http://cran.us.r-project.org&#39;)&quot;Other visualization libraries: R -e &quot;install.packages(c(&#39;devtools&#39;,&#39;mplot&#39;, &#39;googleVis&#39;), repos = &#39;http://cran.us.r-project.org&#39;); require(devtools); install_github(&#39;ramnathv/rCharts&#39;)&quot;We recommend you to also install the following optional R libraries for happy data analytics:glmnetpROCdata.tablecaretsqldfwordcloudConfigurationTo run Zeppelin with the R Interpreter, the SPARK_HOME environment variable must be set. The best way to do this is by editing conf/zeppelin- env.sh.If it is not set, the R Interpreter will not be able to interface with Spark.You should also copy conf/zeppelin-site.xml.template to conf/zeppelin-site.xml. That will ensure that Zeppelin sees the R Interpreter the first time it starts up.Using the R InterpreterBy default, the R Interpreter appears as two Zeppelin Interpreters, %r and %knitr.%r will behave like an ordinary REPL. You can execute commands as in the CLI. R base plotting is fully supportedIf you return a data.frame, Zeppelin will attempt to display it using Zeppelin&#39;s built-in visualizations.%knitr interfaces directly against knitr, with chunk options on the first line:The two interpreters share the same environment. If you define a variable from %r, it will be within-scope if you then make a call using knitr.Using SparkR &amp; Moving Between LanguagesIf SPARK_HOME is set, the SparkR package will be loaded automatically:The Spark Context and SQL Context are created and injected into the local envi ronment automatically as sc and sql.The same context are shared with the %spark, %sql and %pyspark interpreters:You can also make an ordinary R variable accessible in scala and Python:And vice versa:Caveats &amp; TroubleshootingAlmost all issues with the R interpreter turned out to be caused by an incorrectly set SPARK_HOME. The R interpreter must load a version of the SparkR package that matches the running version of Spark, and it does this by searching SPARK_HOME. If Zeppelin isn&#39;t configured to interface with Spark in SPARK_HOME, the R interpreter will not be able to connect to Spark.The knitr environment is persistent. If you run a chunk from Zeppelin that changes a variable, then run the same chunk again, the variable has already been changed. Use immutable variables.(Note that %spark.r and %r are two different ways of calling the same interpreter, as are %spark.knitr and %knitr. By default, Zeppelin puts the R interpreters in the %spark. Interpreter Group.Using the %r interpreter, if you return a data.frame, HTML, or an image, it will dominate the result. So if you execute three commands, and one is hist(), all you will see is the histogram, not the results of the other commands. This is a Zeppelin limitation.If you return a data.frame (for instance, from calling head()) from the %spark.r interpreter, it will be parsed by Zeppelin&#39;s built-in data visualization system. Why knitr Instead of rmarkdown? Why no htmlwidgets? In order to support htmlwidgets, which has indirect dependencies, rmarkdown uses pandoc, which requires writing to and reading from disc. This makes it many times slower than knitr, which can operate entirely in RAM.Why no ggvis or shiny? Supporting shiny would require integrating a reverse-proxy into Zeppelin, which is a task.Max OS X &amp; case-insensitive filesystem. If you try to install on a case-insensitive filesystem, which is the Mac OS X default, maven can unintentionally delete the install directo ry because r and R become the same subdirectory.Error unable to start device X11 with the repl interpreter. Check your shell login scripts to see if they are adjusting the DISPLAY environment variable. This is common on some operating systems as a workaround for ssh issues, but can interfere with R plotting.akka Library Version or TTransport errors. This can happen if you try to run Zeppelin with a SPARK_HOME that has a version of Spark other than the one specified with -Pspark-1.x when Zeppelin was compiled.", + "content" : "<!--Licensed under the Apache License, Version 2.0 (the "License");you may not use this file except in compliance with the License.You may obtain a copy of the License athttp://www.apache.org/licenses/LICENSE-2.0Unless required by applicable law or agreed to in writing, softwaredistributed under the License is distributed on an "AS IS" BASIS,WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.See the License for the specific language governing permissions andlimitations under the License.-->R Interpreter for Apache ZeppelinOverviewR is a free software environment for statistical computing and graphics.To run R code and visualize plots in Apache Zeppelin, you will need R on your master node (or your dev laptop).For Centos: yum install R R-devel libcurl-devel openssl-develFor Ubuntu: apt-get install r-baseValidate your installation with a simple R command:R -e &quot;print(1+1)&quot;To enjoy plots, install additi onal libraries with:devtools with R -e &quot;install.packages(&#39;devtools&#39;, repos = &#39;http://cran.us.r-project.org&#39;)&quot;knitr with R -e &quot;install.packages(&#39;knitr&#39;, repos = &#39;http://cran.us.r-project.org&#39;)&quot;ggplot2 withR -e &quot;install.packages(&#39;ggplot2&#39;, repos = &#39;http://cran.us.r-project.org&#39;)&quot;Other visualization libraries: R -e &quot;install.packages(c(&#39;devtools&#39;,&#39;mplot&#39;, &#39;googleVis&#39;), repos = &#39;http://cran.us.r-project.org&#39;); require(devtools); install_github(&#39;ramnathv/rCharts&#39;)&quot;We recommend you to also install the following optional R libraries for happy data analytics:glmnetpROCdata.tablecaretsqldfwordcloudSupported InterpretersZeppelin supports R language in 3 interpreters Name Class Description %r.r RInterpreter Vanilla r interprete r, with least dependencies, only R environment installed is required. It is always recommended to use the fully qualified interpreter name %r.rcode>, because %r is ambiguous, it could mean both %spark.r and %r.r %r.ir IRInterpreter Provide more fancy R runtime via [IRKernel](https://github.com/IRkernel/IRkernel), almost the same experience like using R in Jupyter. It requires more things, but is the recommended interpreter for using R in Zeppelin. %r.shiny ShinyInterpreter Run Shiny app in Zeppelin If you want to use R with Spark, it is almost the same via %spark.r, %spark.ir &amp; %spark.shiny . You can refer Spark Interpreter docs for more details.Configuration Property Default Description zeppelin.R.cmd R Path of the installed R binary. You should set this property explicitly if R is not in your $PATH(example: /usr/bin/R). zeppelin.R.knitr true Whether to use knitr or not. It is recommended to insta ll [knitr](https://yihui.org/knitr/) zeppelin.R.image.width 100% Image width of R plotting zeppelin.R.shiny.iframe_width 100% IFrame width of Shiny App zeppelin.R.shiny.iframe_height 500px IFrame height of Shiny App Using the R Interpreter(%r.r &amp; %r.ir)By default, the R Interpreter appears as two Zeppelin Interpreters, %r.r and %r.ir.%r.r behaves like an ordinary REPL and use SparkR to communicate between R process and JVM process.%r.ir use IRKernel underneath, it behaves like using IRKernel in Jupyter notebook. R basic expressionR base plotting is fully supportedBesides R base plotting, you can use other visualization library, e.g. ggplot and googlevis Make Shiny App in ZeppelinShiny is an R package that makes it easy to build interactive web applications (apps) straight from R.For developing one Shiny App in Zeppelin, you need to at least 3 paragraphs (server paragraph, ui paragraph and run type paragraph)Server type R shiny paragra ph%r.shiny(type=server)# Define server logic to summarize and view selected dataset ----server &lt;- function(input, output) { # Return the requested dataset ---- datasetInput &lt;- reactive({ switch(input$dataset, &quot;rock&quot; = rock, &quot;pressure&quot; = pressure, &quot;cars&quot; = cars) }) # Generate a summary of the dataset ---- output$summary &lt;- renderPrint({ dataset &lt;- datasetInput() summary(dataset) }) # Show the first &quot;n&quot; observations ---- output$view &lt;- renderTable({ head(datasetInput(), n = input$obs) })}UI type R shiny paragraph%r.shiny(type=ui)# Define UI for dataset viewer app ----ui &lt;- fluidPage( # App title ---- titlePanel(&quot;Shiny Text&quot;), # Sidebar layout with a input and output definitions ---- sidebarLayout( # Sidebar panel for inputs ---- sidebarPanel( # Inp ut: Selector for choosing dataset ---- selectInput(inputId = &quot;dataset&quot;, label = &quot;Choose a dataset:&quot;, choices = c(&quot;rock&quot;, &quot;pressure&quot;, &quot;cars&quot;)), # Input: Numeric entry for number of obs to view ---- numericInput(inputId = &quot;obs&quot;, label = &quot;Number of observations to view:&quot;, value = 10) ), # Main panel for displaying outputs ---- mainPanel( # Output: Verbatim text for data summary ---- verbatimTextOutput(&quot;summary&quot;), # Output: HTML table with requested number of observations ---- tableOutput(&quot;view&quot;) ) ))Run type R shiny paragraph%r.shiny(type=run)After executing the run type R shiny paragraph, the shiny app will be launched and embedded as Iframe in paragraph.Run multiple shiny appIf you want to run multiple shiny app, you c an specify app in paragraph local property to differentiate shiny app.e.g.%r.shiny(type=ui, app=app_1)%r.shiny(type=server, app=app_1)%r.shiny(type=run, app=app_1)", "url": " /interpreter/r.html", "group": "interpreter", "excerpt": "R is a free software environment for statistical computing and graphics." } , - - + + "/interpreter/java.html": { "title": "Java interpreter in Apache Zeppelin", @@ -207,19 +240,19 @@ "excerpt": "Run Java code and any distributed java computation library by importing the dependencies in the interpreter configuration." } , - - + + "/interpreter/flink.html": { "title": "Flink Interpreter for Apache Zeppelin", - "content" : "<!--Licensed under the Apache License, Version 2.0 (the "License");you may not use this file except in compliance with the License.You may obtain a copy of the License athttp://www.apache.org/licenses/LICENSE-2.0Unless required by applicable law or agreed to in writing, softwaredistributed under the License is distributed on an "AS IS" BASIS,WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.See the License for the specific language governing permissions andlimitations under the License.-->Flink interpreter for Apache ZeppelinOverviewApache Flink is an open source platform for distributed stream and batch data processing. Flinkâs core is a streaming dataflow engine that provides data distribution, communication, and fault tolerance for distributed computations over data streams. Flink also builds batch processing on top of the streaming engine, overlaying native iteration support, managed memory, and program opt imization.Apache Flink is supported in Zeppelin with Flink interpreter group which consists of below five interpreters. Name Class Description %flink FlinkInterpreter Creates ExecutionEnvironment/StreamExecutionEnvironment/BatchTableEnvironment/StreamTableEnvironment and provides a Scala environment %flink.pyflink PyFlinkInterpreter Provides a python environment %flink.ipyflink IPyFlinkInterpreter Provides an ipython environment %flink.ssql FlinkStreamSqlInterpreter Provides a stream sql environment %flink.bsql FlinkBatchSqlInterpreter Provides a batch sql environment ConfigurationThe Flink interpreter can be configured with properties provided by Zeppelin.You can also set other flink properties which are not listed in the table. For a list of additional properties, refer to Flink Available Properties. Property Default Description FLINK_HOME Location of flink installation. It is mus t be specified, otherwise you can not use flink in zeppelin flink.execution.mode local Execution mode of flink, e.g. local/yarn/remote flink.execution.remote.host jobmanager hostname if it is remote mode flink.execution.remote.port jobmanager port if it is remote mode flink.jm.memory 1024 Total number of memory(mb) of JobManager flink.tm.memory 1024 Total number of memory(mb) of TaskManager flink.tm.num 2 Number of TaskManager flink.tm.slot 1 Number of slot per TaskManager flink.yarn.appName Zeppelin Flink Session Yarn app name flink.yarn.queue queue name of yarn app flink.yarn.jars additional user jars (comma separated) zeppelin.flink.scala.color true whether display scala shell output in colorful format zeppelin.flink.enableHive false whether enable hive zeppelin.flink.printREPLOutput true Print REPL output zeppelin.flink.maxResult 1000 max number of row returned by sql interpreter zeppelin.flink.planner blink planner or flink table api, blink or flink zeppelin.pyflink.python python python executable for pyflink StreamExecutionEnvironment, ExecutionEnvironment, StreamTableEnvironment, BatchTableEnvironmentZeppelin will create 4 variables to represent flink&#39;s entrypoint:* senv (StreamExecutionEnvironment), * env (ExecutionEnvironment)* stenv (StreamTableEnvironment) * btenv (BatchTableEnvironment)ZeppelinContextZeppelin automatically injects ZeppelinContext as variable z in your Scala/Python environment. ZeppelinContext provides some additional functions and utilities.See Zeppelin-Context for more details.IPython supportBy default, zeppelin would use IPython in pyflink when IPython is available, Otherwise it would fall back to the original PyFlink implementation.If you don&#39;t want to use IPython, then you can set zeppelin.py flink.useIPython as false in interpreter setting. For the IPython features, you can refer docPython Interpreter", + "content" : "<!--Licensed under the Apache License, Version 2.0 (the "License");you may not use this file except in compliance with the License.You may obtain a copy of the License athttp://www.apache.org/licenses/LICENSE-2.0Unless required by applicable law or agreed to in writing, softwaredistributed under the License is distributed on an "AS IS" BASIS,WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.See the License for the specific language governing permissions andlimitations under the License.-->Flink interpreter for Apache ZeppelinOverviewApache Flink is an open source platform for distributed stream and batch data processing. Flinkâs core is a streaming dataflow engine that provides data distribution, communication, and fault tolerance for distributed computations over data streams. Flink also builds batch processing on top of the streaming engine, overlaying native iteration support, managed memory, and program opt imization.In Zeppelin 0.9, we refactor the Flink interpreter in Zeppelin to support the latest version of Flink. Only Flink 1.10+ is supported, old version of flink may not work.Apache Flink is supported in Zeppelin with Flink interpreter group which consists of below five interpreters. Name Class Description %flink FlinkInterpreter Creates ExecutionEnvironment/StreamExecutionEnvironment/BatchTableEnvironment/StreamTableEnvironment and provides a Scala environment %flink.pyflink PyFlinkInterpreter Provides a python environment %flink.ipyflink IPyFlinkInterpreter Provides an ipython environment %flink.ssql FlinkStreamSqlInterpreter Provides a stream sql environment %flink.bsql FlinkBatchSqlInterpreter Provides a batch sql environment PrerequisitesDownload Flink 1.10 for scala 2.11 (Only scala-2.11 is supported, scala-2.12 is not supported yet in Zeppelin)Download flink-hadoop-shaded and put it under lib fold er of flink (flink interpreter need that to support yarn mode)ConfigurationThe Flink interpreter can be configured with properties provided by Zeppelin (as following table).You can also set other flink properties which are not listed in the table. For a list of additional properties, refer to Flink Available Properties. Property Default Description FLINK_HOME Location of flink installation. It is must be specified, otherwise you can not use flink in Zeppelin HADOOP_CONF_DIR Location of hadoop conf, this is must be set if running in yarn mode HIVE_CONF_DIR Location of hive conf, this is must be set if you want to connect to hive metastore flink.execution.mode local Execution mode of flink, e.g. local | yarn | remote flink.execution.remote.host Host name of running JobManager. Only used for remote mode flink.execution.remote.port Port of running JobManager. Only used for remote mode fl ink.jm.memory 1024 Total number of memory(mb) of JobManager flink.tm.memory 1024 Total number of memory(mb) of TaskManager flink.tm.slot 1 Number of slot per TaskManager local.number-taskmanager 4 Total number of TaskManagers in local mode flink.yarn.appName Zeppelin Flink Session Yarn app name flink.yarn.queue queue name of yarn app flink.webui.yarn.useProxy false whether use yarn proxy url as flink weburl, e.g. http://localhost:8088/proxy/application15833965980680004 flink.udf.jars Flink udf jars (comma separated), zeppelin will register udf in this jar automatically for user. The udf name is the class name. flink.execution.jars Additional user jars (comma separated) flink.execution.packages Additional user packages (comma separated), e.g. org.apache.flink:flink-connector-kafka2.11:1.10,org.apache.flink:flink-connector-kafka-base2.11:1.10.0,org.apache.flin k:flink-json:1.10.0 zeppelin.flink.concurrentBatchSql.max 10 Max concurrent sql of Batch Sql (%flink.bsql) zeppelin.flink.concurrentStreamSql.max 10 Max concurrent sql of Stream Sql (%flink.ssql) zeppelin.pyflink.python python Python binary executable for PyFlink table.exec.resource.default-parallelism 1 Default parallelism for flink sql job zeppelin.flink.scala.color true Whether display scala shell output in colorful format zeppelin.flink.enableHive false Whether enable hive zeppelin.flink.enableHive false Whether enable hive zeppelin.flink.hive.version 2.3.4 Hive version that you would like to connect zeppelin.flink.maxResult 1000 max number of row returned by sql interpreter flink.interpreter.close.shutdown_cluster true Whether shutdown application when closing interpreter zeppelin.interpreter.close.cancel_job true Whether cancel flink jo b when closing interpreter StreamExecutionEnvironment, ExecutionEnvironment, StreamTableEnvironment, BatchTableEnvironmentZeppelin will create 6 variables as flink scala (%flink) entry point:senv (StreamExecutionEnvironment), benv (ExecutionEnvironment)stenv (StreamTableEnvironment for blink planner) btenv (BatchTableEnvironment for blink planner)stenv_2 (StreamTableEnvironment for flink planner) btenv_2 (BatchTableEnvironment for flink planner)And will create 6 variables as pyflink (%flink.pyflink or %flink.ipyflink) entry point:s_env (StreamExecutionEnvironment), b_env (ExecutionEnvironment)st_env (StreamTableEnvironment for blink planner) bt_env (BatchTableEnvironment for blink planner)st_env_2 (StreamTableEnvironment for flink planner) bt_env_2 (BatchTableEnvironment for flink planner)Execution mode (Local/Remote/Yarn)Flink in Zeppelin supports 3 execution modes (flink.execution.mode):LocalRemoteYarnRun Flink in Local ModeRunning Flink in Local mod e will start a MiniCluster in local JVM. By default, the local MiniCluster will use port 8081, so make sure this port is available in your machine,otherwise you can configure rest.port to specify another port. You can also specify local.number-taskmanager and flink.tm.slot to customize the number of TM and number of slots per TM, because by default it is only 4 TM with 1 Slots which may not be enough for some cases.Run Flink in Remote ModeRunning Flink in remote mode will connect to a existing flink cluster which could be standalone cluster or yarn session cluster. Besides specifying flink.execution.mode to be remote. You also need to specifyflink.execution.remote.host and flink.execution.remote.port to point to flink job manager.Run Flink in Yarn ModeIn order to run flink in Yarn mode, you need to make the following settings:Set flink.execution.mode to yarnSet HADOOP_CONF_DIR in flink&#39;s interpreter setting.Make sure hadoop command is your PATH. Because internally flink will call command hadoop classpath and load all the hadoop related jars in the flink interpreter processBlink/Flink PlannerThere&#39;re 2 planners supported by Flink&#39;s table api: flink &amp; blink.If you want to use DataSet api, and convert it to flink table then please use flink planner (btenv_2 and stenv_2).In other cases, we would always recommend you to use blink planner. This is also what flink batch/streaming sql interpreter use (%flink.bsql &amp; %flink.ssql)How to use HiveIn order to use Hive in Flink, you have to make the following setting.Set zeppelin.flink.enableHive to be trueSet zeppelin.flink.hive.version to be the hive version you are using.Set HIVE_CONF_DIR to be the location where hive-site.xml is located. Make sure hive metastore is started and you have configure hive.metastore.uris in hive-site.xmlCopy the following dependencies to the lib folder of flink installation. flink-connector-hive_2.11â1.10.0.jarflink-hadoop-compatibility_2.11â� �1.10.0.jarhive-exec-2.x.jar (for hive 1.x, you need to copy hive-exec-1.x.jar, hive-metastore-1.x.jar, libfb303â0.9.2.jar and libthrift-0.9.2.jar)After these settings, you will be able to query hive table via either table api %flink or batch sql %flink.bsqlFlink Batch SQL%flink.bsql is used for flink&#39;s batch sql. You just type help to get all the available commands.Use insert into statement for batch ETLUse select statement for exploratory data analytics Flink Streaming SQL%flink.ssql is used for flink&#39;s streaming sql. You just type help to get all the available commands. Mainlly there&#39;re 2 cases:Use insert into statement for streaming processingUse select statement for streaming data analyticsFlink UDFYou can use Flink scala UDF or Python UDF in sql. UDF for batch and streaming sql is the same. Here&#39;s 2 examples.Scala UDF%flinkclass ScalaUpper extends ScalarFunction { def eval(str: String) = str.toUpperCase}btenv.registerFunction(&quot;sca la_upper&quot;, new ScalaUpper())Python UDF%flink.pyflinkclass PythonUpper(ScalarFunction): def eval(self, s): return s.upper()bt_env.register_function(&quot;python_upper&quot;, udf(PythonUpper(), DataTypes.STRING(), DataTypes.STRING()))Besides defining udf in Zeppelin, you can also load udfs in jars via flink.udf.jars. For example, you can createudfs in intellij and then build these udfs in one jar. After that you can specify flink.udf.jars to this jar, and flinkinterpreter will detect all the udfs in this jar and register all the udfs to TableEnvironment, the udf name is the class name.ZeppelinContextZeppelin automatically injects ZeppelinContext as variable z in your Scala/Python environment. ZeppelinContext provides some additional functions and utilities.See Zeppelin-Context for more details.IPython SupportBy default, zeppelin would use IPython in %flink.pyflink when IPython is available, Otherwise it would fall back to the original python implementation.For the IPython features, you can refer docPython InterpreterTutorial NotesZeppelin is shipped with several Flink tutorial notes which may be helpful for you. Except the first one, the below 4 notes cover the 4 main scenarios of flink.Flink BasicBatch ETLExploratory Data AnalyticsStreaming ETLStreaming Data Analytics", "url": " /interpreter/flink.html", "group": "interpreter", "excerpt": "Apache Flink is an open source platform for distributed stream and batch data processing." } , - - + + "/interpreter/postgresql.html": { "title": "PostgreSQL, Apache HAWQ (incubating) Interpreter for Apache Zeppelin", @@ -229,8 +262,19 @@ "excerpt": "Apache Zeppelin supports PostgreSQL, Apache HAWQ(incubating) and Greenplum SQL data processing engines." } , - - + + + + "/interpreter/sparql.html": { + "title": "SPARQL Interpreter for Apache Zeppelin", + "content" : "<!--Licensed under the Apache License, Version 2.0 (the "License");you may not use this file except in compliance with the License.You may obtain a copy of the License athttp://www.apache.org/licenses/LICENSE-2.0Unless required by applicable law or agreed to in writing, softwaredistributed under the License is distributed on an "AS IS" BASIS,WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.See the License for the specific language governing permissions andlimitations under the License.-->SPARQL Interpreter for Apache ZeppelinOverviewSPARQL is an RDF query language able to retrieve and manipulate data stored in Resource Description Framework (RDF) format.Apache Zeppelin for now only supports Apache Jena to query SPARQL-Endpoints.To query your endpoint configure it in the Interpreter-Settings and use the %sparql interpreter.Then write your query in the paragraph.If you want the prefixes to replace the URI&#39;s, set the replaceURIs setting.Configuration Name Default Value Description sparql.engine jena The sparql engine to use for the queries sparql.endpoint http://dbpedia.org/sparql Complete URL of the endpoint sparql.replaceURIs true Replace the URIs in the result with the prefixes sparql.removeDatatypes true Remove the datatypes from Literals so Zeppelin can use the values ExampleAcknowledgementThis work was partially supported by the Bavarian State Ministry of Economic Affairs,Regional Development and Energy within the framework of the Bavarian Research andDevelopment Program &quot;Information and Communication Technology&quot;.", + "url": " /interpreter/sparql.html", + "group": "interpreter", + "excerpt": "SPARQL is an RDF query language able to retrieve and manipulate data stored in Resource Description Framework (RDF) format. Apache Zeppelin uses Apache Jena" + } + , + + "/interpreter/cassandra.html": { "title": "Cassandra CQL Interpreter for Apache Zeppelin", @@ -240,8 +284,8 @@ "excerpt": "Apache Cassandra database is the right choice when you need scalability and high availability without compromising performance." } , - - + + "/interpreter/lens.html": { "title": "Lens Interpreter for Apache Zeppelin", @@ -251,8 +295,8 @@ "excerpt": "Apache Lens provides an Unified Analytics interface. Lens aims to cut the Data Analytics silos by providing a single view of data across multiple tiered data stores and optimal execution environment for the analytical query. It seamlessly integrates Hadoop with..." } , - - + + "/interpreter/elasticsearch.html": { "title": "Elasticsearch Interpreter for Apache Zeppelin", @@ -262,8 +306,8 @@ "excerpt": "Elasticsearch is a highly scalable open-source full-text search and analytics engine." } , - - + + "/interpreter/jdbc.html": { "title": "Generic JDBC Interpreter for Apache Zeppelin", @@ -273,8 +317,8 @@ "excerpt": "Generic JDBC Interpreter lets you create a JDBC connection to any data source. You can use Postgres, MySql, MariaDB, Redshift, Apache Hive, Apache Phoenix, Apache Drill and Apache Tajo using JDBC interpreter." } , - - + + "/interpreter/neo4j.html": { "title": "Neo4j Interpreter for Apache Zeppelin", @@ -284,8 +328,8 @@ "excerpt": "Neo4j is a native graph database, designed to store and process graphs from bottom to top." } , - - + + "/interpreter/bigquery.html": { "title": "BigQuery Interpreter for Apache Zeppelin", @@ -295,8 +339,8 @@
[... 793 lines stripped ...]