ZEPPELIN-3111. Refactor SparkInterpreter ### What is this PR for? This is for the refactoring of SparkInterpreter. See design doc. https://docs.google.com/document/d/1AfGg3aGXonDyri1jrP4MMFT4Y4j3wpN1t8kL-GAKSUc/edit?usp=sharing
### What type of PR is it? [Refactoring] ### Todos * [ ] - Task ### What is the Jira issue? * https://issues.apache.org/jira/browse/ZEPPELIN-3111 ### How should this be tested? * Unit test is added. ### Screenshots (if appropriate) ### Questions: * Does the licenses files need update? No * Is there breaking changes for older versions? No * Does this needs documentation? No Author: Jeff Zhang <zjf...@apache.org> Closes #2709 from zjffdu/ZEPPELIN-3111 and squashes the following commits: aae4b09 [Jeff Zhang] ZEPPELIN-3111. Refactor SparkInterpreter Project: http://git-wip-us.apache.org/repos/asf/zeppelin/repo Commit: http://git-wip-us.apache.org/repos/asf/zeppelin/commit/d762b528 Tree: http://git-wip-us.apache.org/repos/asf/zeppelin/tree/d762b528 Diff: http://git-wip-us.apache.org/repos/asf/zeppelin/diff/d762b528 Branch: refs/heads/master Commit: d762b5288536201d8a2964891c556efaa1bae867 Parents: 6664412 Author: Jeff Zhang <zjf...@apache.org> Authored: Mon Jul 17 13:02:09 2017 +0800 Committer: Jeff Zhang <zjf...@apache.org> Committed: Fri Feb 2 14:00:35 2018 +0800 ---------------------------------------------------------------------- .travis.yml | 32 +- bin/interpreter.sh | 2 +- docs/interpreter/spark.md | 4 + pom.xml | 19 +- python/pom.xml | 41 +- .../zeppelin/python/IPythonInterpreter.java | 6 +- .../zeppelin/python/PythonInterpreter.java | 8 +- .../zeppelin/python/IPythonInterpreterTest.java | 14 +- .../python/PythonInterpreterMatplotlibTest.java | 2 +- .../zeppelin/python/PythonInterpreterTest.java | 2 +- r/pom.xml | 7 - spark-dependencies/pom.xml | 1042 ------------ spark/interpreter/figure/null-1.png | Bin 0 -> 13599 bytes spark/interpreter/pom.xml | 573 +++++++ .../spark/AbstractSparkInterpreter.java | 57 + .../apache/zeppelin/spark/DepInterpreter.java | 363 +++++ .../zeppelin/spark/IPySparkInterpreter.java | 128 ++ .../zeppelin/spark/NewSparkInterpreter.java | 390 +++++ .../zeppelin/spark/OldSparkInterpreter.java | 1525 ++++++++++++++++++ .../zeppelin/spark/PySparkInterpreter.java | 751 +++++++++ .../org/apache/zeppelin/spark/PythonUtils.java | 96 ++ .../apache/zeppelin/spark/SparkInterpreter.java | 163 ++ .../zeppelin/spark/SparkRInterpreter.java | 250 +++ .../zeppelin/spark/SparkSqlInterpreter.java | 187 +++ .../org/apache/zeppelin/spark/SparkVersion.java | 130 ++ .../zeppelin/spark/SparkZeppelinContext.java | 312 ++++ .../java/org/apache/zeppelin/spark/Utils.java | 177 ++ .../org/apache/zeppelin/spark/ZeppelinR.java | 394 +++++ .../apache/zeppelin/spark/ZeppelinRContext.java | 69 + .../spark/dep/SparkDependencyContext.java | 181 +++ .../spark/dep/SparkDependencyResolver.java | 351 ++++ .../src/main/resources/R/zeppelin_sparkr.R | 105 ++ .../src/main/resources/interpreter-setting.json | 233 +++ .../main/resources/python/zeppelin_ipyspark.py | 53 + .../main/resources/python/zeppelin_pyspark.py | 393 +++++ .../scala/org/apache/spark/SparkRBackend.scala | 54 + .../zeppelin/spark/ZeppelinRDisplay.scala | 117 ++ .../zeppelin/spark/utils/DisplayUtils.scala | 90 ++ .../zeppelin/spark/DepInterpreterTest.java | 94 ++ .../zeppelin/spark/IPySparkInterpreterTest.java | 204 +++ .../zeppelin/spark/NewSparkInterpreterTest.java | 389 +++++ .../spark/NewSparkSqlInterpreterTest.java | 173 ++ .../zeppelin/spark/OldSparkInterpreterTest.java | 368 +++++ .../spark/OldSparkSqlInterpreterTest.java | 189 +++ .../spark/PySparkInterpreterMatplotlibTest.java | 250 +++ .../zeppelin/spark/PySparkInterpreterTest.java | 193 +++ .../zeppelin/spark/SparkRInterpreterTest.java | 99 ++ .../apache/zeppelin/spark/SparkVersionTest.java | 71 + .../src/test/resources/log4j.properties | 52 + .../spark/utils/DisplayFunctionsTest.scala | 173 ++ spark/pom.xml | 871 +++------- spark/scala-2.10/pom.xml | 41 + spark/scala-2.10/spark-scala-parent | 1 + .../spark/SparkScala210Interpreter.scala | 141 ++ spark/scala-2.11/pom.xml | 41 + spark/scala-2.11/spark-scala-parent | 1 + .../src/main/resources/log4j.properties | 50 + .../spark/SparkScala211Interpreter.scala | 140 ++ spark/spark-dependencies/pom.xml | 591 +++++++ spark/spark-scala-parent/pom.xml | 172 ++ .../spark/BaseSparkScalaInterpreter.scala | 338 ++++ .../apache/zeppelin/spark/DepInterpreter.java | 363 ----- .../zeppelin/spark/IPySparkInterpreter.java | 128 -- .../zeppelin/spark/PySparkInterpreter.java | 745 --------- .../org/apache/zeppelin/spark/PythonUtils.java | 96 -- .../apache/zeppelin/spark/SparkInterpreter.java | 1525 ------------------ .../zeppelin/spark/SparkRInterpreter.java | 250 --- .../zeppelin/spark/SparkSqlInterpreter.java | 187 --- .../org/apache/zeppelin/spark/SparkVersion.java | 130 -- .../zeppelin/spark/SparkZeppelinContext.java | 314 ---- .../java/org/apache/zeppelin/spark/Utils.java | 177 -- .../org/apache/zeppelin/spark/ZeppelinR.java | 394 ----- .../apache/zeppelin/spark/ZeppelinRContext.java | 69 - .../spark/dep/SparkDependencyContext.java | 181 --- .../spark/dep/SparkDependencyResolver.java | 351 ---- spark/src/main/resources/R/zeppelin_sparkr.R | 105 -- .../src/main/resources/interpreter-setting.json | 226 --- .../main/resources/python/zeppelin_ipyspark.py | 53 - .../main/resources/python/zeppelin_pyspark.py | 393 ----- .../scala/org/apache/spark/SparkRBackend.scala | 54 - .../zeppelin/spark/ZeppelinRDisplay.scala | 117 -- .../zeppelin/spark/utils/DisplayUtils.scala | 90 -- .../zeppelin/spark/DepInterpreterTest.java | 94 -- .../zeppelin/spark/IPySparkInterpreterTest.java | 206 --- .../spark/PySparkInterpreterMatplotlibTest.java | 241 --- .../zeppelin/spark/PySparkInterpreterTest.java | 194 --- .../zeppelin/spark/SparkInterpreterTest.java | 355 ---- .../zeppelin/spark/SparkSqlInterpreterTest.java | 180 --- .../apache/zeppelin/spark/SparkVersionTest.java | 71 - .../spark/dep/SparkDependencyResolverTest.java | 51 - spark/src/test/resources/log4j.properties | 49 - .../spark/utils/DisplayFunctionsTest.scala | 173 -- testing/install_external_dependencies.sh | 4 +- zeppelin-display/pom.xml | 12 +- .../zeppelin/integration/SparkParagraphIT.java | 2 +- .../interpreter/BaseZeppelinContext.java | 2 + .../remote/RemoteInterpreterServer.java | 9 +- zeppelin-server/pom.xml | 6 + .../zeppelin/rest/AbstractTestRestApi.java | 15 +- .../zeppelin/rest/ZeppelinSparkClusterTest.java | 5 +- zeppelin-zengine/pom.xml | 2 +- 101 files changed, 11241 insertions(+), 9341 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/zeppelin/blob/d762b528/.travis.yml ---------------------------------------------------------------------- diff --git a/.travis.yml b/.travis.yml index 677209b..ce935b2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -68,7 +68,7 @@ matrix: dist: trusty addons: firefox: "31.0" - env: PYTHON="3" SCALA_VER="2.11" SPARK_VER="2.2.0" HADOOP_VER="2.6" PROFILE="-Pspark-2.2 -Pweb-ci -Pscalding -Phelium-dev -Pexamples -Pscala-2.11" BUILD_FLAG="package -Pbuild-distr -DskipRat" TEST_FLAG="verify -Pusing-packaged-distr -DskipRat" MODULES="-pl ${INTERPRETERS}" TEST_PROJECTS="-Dtests.to.exclude=**/ZeppelinSparkClusterTest.java,**/org.apache.zeppelin.spark.*,**/HeliumApplicationFactoryTest.java -DfailIfNoTests=false" + env: PYTHON="3" SCALA_VER="2.11" SPARK_VER="2.2.0" HADOOP_VER="2.6" PROFILE="-Pspark-2.2 -Pweb-ci -Pscalding -Phelium-dev -Pexamples -Pscala-2.11" BUILD_FLAG="package -Pbuild-distr -DskipRat" TEST_FLAG="verify -Pusing-packaged-distr -DskipRat" MODULES="-pl ${INTERPRETERS}" TEST_PROJECTS="-Dtests.to.exclude=**/ZeppelinSparkClusterTest.java,**/org/apache/zeppelin/spark/*,**/HeliumApplicationFactoryTest.java -DfailIfNoTests=false" # Test selenium with spark module for 1.6.3 - jdk: "oraclejdk8" @@ -82,43 +82,43 @@ matrix: dist: trusty env: PYTHON="3" SCALA_VER="2.10" PROFILE="-Pscalding" BUILD_FLAG="install -DskipTests -DskipRat -Pr" TEST_FLAG="test -DskipRat" MODULES="-pl $(echo .,zeppelin-interpreter,${INTERPRETERS} | sed 's/!//g')" TEST_PROJECTS="" - # Test spark module for 2.2.0 with scala 2.11, livy + # Test spark module for 2.2.0 with scala 2.11 - jdk: "oraclejdk8" dist: trusty - env: PYTHON="2" SCALA_VER="2.11" SPARK_VER="2.2.0" HADOOP_VER="2.6" PROFILE="-Pweb-ci -Pspark-2.2 -Phadoop3 -Phadoop-2.6 -Pscala-2.11" SPARKR="true" BUILD_FLAG="install -DskipTests -DskipRat" TEST_FLAG="test -DskipRat" MODULES="-pl .,zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark,python,livy" TEST_PROJECTS="-Dtest=ZeppelinSparkClusterTest,org.apache.zeppelin.spark.*,org.apache.zeppelin.livy.* -DfailIfNoTests=false" + env: PYTHON="2" SCALA_VER="2.11" SPARK_VER="2.2.0" HADOOP_VER="2.6" PROFILE="-Pweb-ci -Pspark-2.2 -Phadoop3 -Phadoop-2.6 -Pscala-2.11" SPARKR="true" BUILD_FLAG="install -DskipTests -DskipRat" TEST_FLAG="test -DskipRat" MODULES="-pl .,zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark/interpreter,spark/scala-2.10,spark/scala-2.11,spark/spark-dependencies,python" TEST_PROJECTS="-Dtest=ZeppelinSparkClusterTest,org.apache.zeppelin.spark.*,org.apache.zeppelin.livy.* -DfailIfNoTests=false" - # Test spark module for 2.1.0 with scala 2.11, livy + # Test spark module for 2.1.0 with scala 2.11 - jdk: "openjdk7" dist: trusty - env: PYTHON="2" SCALA_VER="2.11" SPARK_VER="2.1.0" HADOOP_VER="2.6" PROFILE="-Pweb-ci -Pspark-2.1 -Phadoop2 -Phadoop-2.6 -Pscala-2.11" SPARKR="true" BUILD_FLAG="install -DskipTests -DskipRat" TEST_FLAG="test -DskipRat" MODULES="-pl .,zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark,python,livy" TEST_PROJECTS="-Dtest=ZeppelinSparkClusterTest,org.apache.zeppelin.spark.*,org.apache.zeppelin.livy.* -DfailIfNoTests=false" + env: PYTHON="2" SCALA_VER="2.11" SPARK_VER="2.1.0" HADOOP_VER="2.6" PROFILE="-Pweb-ci -Pspark-2.1 -Phadoop2 -Phadoop-2.6 -Pscala-2.11" SPARKR="true" BUILD_FLAG="install -DskipTests -DskipRat" TEST_FLAG="test -DskipRat" MODULES="-pl .,zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark/interpreter,spark/scala-2.10,spark/scala-2.11,spark/spark-dependencies,python" TEST_PROJECTS="-Dtest=ZeppelinSparkClusterTest,org.apache.zeppelin.spark.*,org.apache.zeppelin.livy.* -DfailIfNoTests=false" # Test spark module for 2.0.2 with scala 2.11 - jdk: "oraclejdk8" dist: trusty - env: PYTHON="2" SCALA_VER="2.11" SPARK_VER="2.0.2" HADOOP_VER="2.6" PROFILE="-Pweb-ci -Pspark-2.0 -Phadoop3 -Phadoop-2.6 -Pscala-2.11" SPARKR="true" BUILD_FLAG="install -DskipTests -DskipRat" TEST_FLAG="test -DskipRat" MODULES="-pl .,zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark,python" TEST_PROJECTS="-Dtest=ZeppelinSparkClusterTest,org.apache.zeppelin.spark.* -DfailIfNoTests=false" + env: PYTHON="2" SCALA_VER="2.11" SPARK_VER="2.0.2" HADOOP_VER="2.6" PROFILE="-Pweb-ci -Pspark-2.0 -Phadoop3 -Phadoop-2.6 -Pscala-2.11" SPARKR="true" BUILD_FLAG="install -DskipTests -DskipRat" TEST_FLAG="test -DskipRat" MODULES="-pl .,zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark/interpreter,spark/scala-2.10,spark/scala-2.11,spark/spark-dependencies,python" TEST_PROJECTS="-Dtest=ZeppelinSparkClusterTest,org.apache.zeppelin.spark.* -DfailIfNoTests=false" - # Test spark module for 1.6.3 with scala 2.10 + # Test spark module for 1.6.3 with scala 2.11 - jdk: "openjdk7" dist: trusty - env: PYTHON="3" SCALA_VER="2.10" SPARK_VER="1.6.3" HADOOP_VER="2.6" PROFILE="-Pweb-ci -Pspark-1.6 -Phadoop2 -Phadoop-2.6 -Pscala-2.10" SPARKR="true" BUILD_FLAG="install -DskipTests -DskipRat" TEST_FLAG="test -DskipRat" MODULES="-pl .,zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark,python" TEST_PROJECTS="-Dtest=ZeppelinSparkClusterTest,org.apache.zeppelin.spark.*,org.apache.zeppelin.spark.* -DfailIfNoTests=false" + env: PYTHON="3" SCALA_VER="2.10" SPARK_VER="1.6.3" HADOOP_VER="2.6" PROFILE="-Pweb-ci -Pspark-1.6 -Phadoop2 -Phadoop-2.6 -Pscala-2.10" SPARKR="true" BUILD_FLAG="install -DskipTests -DskipRat" TEST_FLAG="test -DskipRat" MODULES="-pl .,zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark/interpreter,spark/scala-2.10,spark/scala-2.11,spark/spark-dependencies,python" TEST_PROJECTS="-Dtest=ZeppelinSparkClusterTest,org.apache.zeppelin.spark.*,org.apache.zeppelin.spark.* -DfailIfNoTests=false" # Test spark module for 1.6.3 with scala 2.11 - jdk: "oraclejdk8" dist: trusty - env: PYTHON="2" SCALA_VER="2.11" SPARK_VER="1.6.3" HADOOP_VER="2.6" PROFILE="-Pweb-ci -Pspark-1.6 -Phadoop3 -Phadoop-2.6 -Pscala-2.11" SPARKR="true" BUILD_FLAG="install -DskipTests -DskipRat" TEST_FLAG="test -DskipRat" MODULES="-pl .,zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark,python" TEST_PROJECTS="-Dtest=ZeppelinSparkClusterTest,org.apache.zeppelin.spark.* -DfailIfNoTests=false" + env: PYTHON="2" SCALA_VER="2.11" SPARK_VER="1.6.3" HADOOP_VER="2.6" PROFILE="-Pweb-ci -Pspark-1.6 -Phadoop3 -Phadoop-2.6 -Pscala-2.11" SPARKR="true" BUILD_FLAG="install -DskipTests -DskipRat" TEST_FLAG="test -DskipRat" MODULES="-pl .,zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark/interpreter,spark/scala-2.10,spark/scala-2.11,spark/spark-dependencies,python" TEST_PROJECTS="-Dtest=ZeppelinSparkClusterTest,org.apache.zeppelin.spark.* -DfailIfNoTests=false" # Test python/pyspark with python 2, livy 0.2 - sudo: required dist: trusty jdk: "openjdk7" - env: PYTHON="2" SCALA_VER="2.10" SPARK_VER="1.6.1" HADOOP_VER="2.6" LIVY_VER="0.4.0-incubating" PROFILE="-Pspark-1.6 -Phadoop2 -Phadoop-2.6 -Pscala-2.10" BUILD_FLAG="install -am -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" MODULES="-pl .,zeppelin-interpreter,zeppelin-display,spark-dependencies,spark,python,livy" TEST_PROJECTS="-Dtest=LivySQLInterpreterTest,org.apache.zeppelin.spark.PySpark*Test,org.apache.zeppelin.python.* -Dpyspark.test.exclude='' -DfailIfNoTests=false" + env: PYTHON="2" SCALA_VER="2.10" SPARK_VER="1.6.1" HADOOP_VER="2.6" LIVY_VER="0.4.0-incubating" PROFILE="-Pspark-1.6 -Phadoop2 -Phadoop-2.6 -Plivy-0.2 -Pscala-2.10" BUILD_FLAG="install -am -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" MODULES="-pl .,zeppelin-interpreter,zeppelin-display,spark/interpreter,spark/scala-2.10,spark/scala-2.11,spark/spark-dependencies,python,livy" TEST_PROJECTS="-Dtest=LivySQLInterpreterTest,org.apache.zeppelin.spark.PySpark*Test,org.apache.zeppelin.python.* -Dpyspark.test.exclude='' -DfailIfNoTests=false" # Test python/pyspark with python 3, livy 0.3 - sudo: required dist: trusty jdk: "openjdk7" - env: PYTHON="3" SCALA_VER="2.11" SPARK_VER="2.0.0" HADOOP_VER="2.6" LIVY_VER="0.4.0-incubating" PROFILE="-Pspark-2.0 -Phadoop3 -Phadoop-2.6 -Pscala-2.11" BUILD_FLAG="install -am -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" MODULES="-pl .,zeppelin-interpreter,zeppelin-display,spark-dependencies,spark,python,livy" TEST_PROJECTS="-Dtest=LivySQLInterpreterTest,org.apache.zeppelin.spark.PySpark*Test,org.apache.zeppelin.python.* -Dpyspark.test.exclude='' -DfailIfNoTests=false" - + env: PYTHON="3" SCALA_VER="2.11" SPARK_VER="2.0.0" HADOOP_VER="2.6" LIVY_VER="0.4.0-incubating" PROFILE="-Pspark-2.0 -Phadoop3 -Phadoop-2.6 -Pscala-2.11 -Plivy-0.3" BUILD_FLAG="install -am -DskipTests -DskipRat" TEST_FLAG="verify -DskipRat" MODULES="-pl .,zeppelin-interpreter,zeppelin-display,spark/interpreter,spark/scala-2.10,spark/scala-2.11,spark/spark-dependencies,python,livy" TEST_PROJECTS="-Dtest=LivySQLInterpreterTest,org.apache.zeppelin.spark.PySpark*Test,org.apache.zeppelin.python.* -Dpyspark.test.exclude='' -DfailIfNoTests=false" + before_install: # check files included in commit range, clear bower_components if a bower.json file has changed. # bower cache clearing can also be forced by putting "bower clear" or "clear bower" in a commit message @@ -133,7 +133,7 @@ before_install: - ls -la .spark-dist ${HOME}/.m2/repository/.cache/maven-download-plugin || true - ls .node_modules && cp -r .node_modules zeppelin-web/node_modules || echo "node_modules are not cached" - "/sbin/start-stop-daemon --start --quiet --pidfile /tmp/custom_xvfb_99.pid --make-pidfile --background --exec /usr/bin/Xvfb -- :99 -ac -screen 0 1600x1024x16" - - ./dev/change_scala_version.sh $SCALA_VER + #- ./dev/change_scala_version.sh $SCALA_VER - source ~/.environ install: @@ -145,9 +145,11 @@ before_script: - if [[ -n $LIVY_VER ]]; then ./testing/downloadLivy.sh $LIVY_VER; fi - if [[ -n $LIVY_VER ]]; then export LIVY_HOME=`pwd`/livy-$LIVY_VER-bin; fi - if [[ -n $LIVY_VER ]]; then export SPARK_HOME=`pwd`/spark-$SPARK_VER-bin-hadoop$HADOOP_VER; fi - - export SPARK_HOME=`pwd`/spark-$SPARK_VER-bin-hadoop$HADOOP_VER - - echo "export SPARK_HOME=`pwd`/spark-$SPARK_VER-bin-hadoop$HADOOP_VER" > conf/zeppelin-env.sh + - if [[ -n $SPARK_VER ]]; then export SPARK_HOME=`pwd`/spark-$SPARK_VER-bin-hadoop$HADOOP_VER; fi + - if [[ -n $SPARK_VER ]]; then echo "export SPARK_HOME=`pwd`/spark-$SPARK_VER-bin-hadoop$HADOOP_VER" > conf/zeppelin-env.sh; fi - echo "export ZEPPELIN_HELIUM_REGISTRY=helium" >> conf/zeppelin-env.sh + - echo "export SPARK_PRINT_LAUNCH_COMMAND=true" >> conf/zeppelin-env.sh + - export SPARK_PRINT_LAUNCH_COMMAND=true - tail conf/zeppelin-env.sh # https://docs.travis-ci.com/user/gui-and-headless-browsers/#Using-xvfb-to-Run-Tests-That-Require-a-GUI - if [[ -n $TEST_MODULES ]]; then export DISPLAY=:99.0; sh -e /etc/init.d/xvfb start; sleep 3; fi http://git-wip-us.apache.org/repos/asf/zeppelin/blob/d762b528/bin/interpreter.sh ---------------------------------------------------------------------- diff --git a/bin/interpreter.sh b/bin/interpreter.sh index aa25646..45ee0ce 100755 --- a/bin/interpreter.sh +++ b/bin/interpreter.sh @@ -121,7 +121,7 @@ if [[ "${INTERPRETER_ID}" == "spark" ]]; then fi if [[ -n "${SPARK_HOME}" ]]; then export SPARK_SUBMIT="${SPARK_HOME}/bin/spark-submit" - SPARK_APP_JAR="$(ls ${ZEPPELIN_HOME}/interpreter/spark/zeppelin-spark*.jar)" + SPARK_APP_JAR="$(ls ${ZEPPELIN_HOME}/interpreter/spark/spark-interpreter*.jar)" # This will evantually passes SPARK_APP_JAR to classpath of SparkIMain ZEPPELIN_INTP_CLASSPATH+=":${SPARK_APP_JAR}" http://git-wip-us.apache.org/repos/asf/zeppelin/blob/d762b528/docs/interpreter/spark.md ---------------------------------------------------------------------- diff --git a/docs/interpreter/spark.md b/docs/interpreter/spark.md index da957c6..90b1608 100644 --- a/docs/interpreter/spark.md +++ b/docs/interpreter/spark.md @@ -199,6 +199,10 @@ Zeppelin support both yarn client and yarn cluster mode (yarn cluster mode is su You can either specify them in `zeppelin-env.sh`, or in interpreter setting page. Specifying them in `zeppelin-env.sh` means you can use only one version of `spark` & `hadoop`. Specifying them in interpreter setting page means you can use multiple versions of `spark` & `hadoop` in one zeppelin instance. +### 4. New Version of SparkInterpreter +There's one new version of SparkInterpreter starting with better spark support and code completion from Zeppelin 0.8.0, by default we still use the old version of SparkInterpreter. +If you want to use the new one, you can configure `zeppelin.spark.useNew` as `true` in its interpreter setting. + ## SparkContext, SQLContext, SparkSession, ZeppelinContext SparkContext, SQLContext and ZeppelinContext are automatically created and exposed as variable names `sc`, `sqlContext` and `z`, respectively, in Scala, Python and R environments. Staring from 0.6.1 SparkSession is available as variable `spark` when you are using Spark 2.x. http://git-wip-us.apache.org/repos/asf/zeppelin/blob/d762b528/pom.xml ---------------------------------------------------------------------- diff --git a/pom.xml b/pom.xml index 725db41..2c230cb 100644 --- a/pom.xml +++ b/pom.xml @@ -56,9 +56,11 @@ <module>zeppelin-interpreter</module> <module>zeppelin-zengine</module> <module>zeppelin-display</module> - <module>spark-dependencies</module> <module>groovy</module> - <module>spark</module> + <module>spark/scala-2.10</module> + <module>spark/scala-2.11</module> + <module>spark/interpreter</module> + <module>spark/spark-dependencies</module> <module>markdown</module> <module>angular</module> <module>shell</module> @@ -86,6 +88,7 @@ <properties> <!-- language versions --> + <java.version>1.7</java.version> <scala.version>2.10.5</scala.version> <scala.binary.version>2.10</scala.binary.version> <scalatest.version>2.2.4</scalatest.version> @@ -329,8 +332,8 @@ <artifactId>maven-compiler-plugin</artifactId> <version>${plugin.compiler.version}</version> <configuration> - <source>1.7</source> - <target>1.7</target> + <source>${java.version}</source> + <target>${java.version}</target> </configuration> </plugin> @@ -739,9 +742,6 @@ <profiles> <profile> <id>scala-2.10</id> - <activation> - <activeByDefault>true</activeByDefault> - </activation> <properties> <scala.version>2.10.5</scala.version> <scala.binary.version>2.10</scala.binary.version> @@ -750,8 +750,11 @@ <profile> <id>scala-2.11</id> + <activation> + <activeByDefault>true</activeByDefault> + </activation> <properties> - <scala.version>2.11.7</scala.version> + <scala.version>2.11.8</scala.version> <scala.binary.version>2.11</scala.binary.version> </properties> </profile> http://git-wip-us.apache.org/repos/asf/zeppelin/blob/d762b528/python/pom.xml ---------------------------------------------------------------------- diff --git a/python/pom.xml b/python/pom.xml index 3ce47b0..c14d4b1 100644 --- a/python/pom.xml +++ b/python/pom.xml @@ -43,6 +43,7 @@ <pypi.repo.url>https://pypi.python.org/packages</pypi.repo.url> <python.py4j.repo.folder>/64/5c/01e13b68e8caafece40d549f232c9b5677ad1016071a48d04cc3895acaa3</python.py4j.repo.folder> <grpc.version>1.4.0</grpc.version> + <plugin.shade.version>2.4.1</plugin.shade.version> </properties> <dependencies> @@ -90,13 +91,7 @@ <artifactId>grpc-stub</artifactId> <version>${grpc.version}</version> </dependency> - - <dependency> - <groupId>com.google.guava</groupId> - <artifactId>guava</artifactId> - <version>18.0</version> - </dependency> - + <!-- test libraries --> <dependency> <groupId>junit</groupId> @@ -203,6 +198,38 @@ </plugin> <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-shade-plugin</artifactId> + <version>${plugin.shade.version}</version> + <configuration> + <transformers> + <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" /> + <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer"> + <resource>reference.conf</resource> + </transformer> + </transformers> + <relocations> + <relocation> + <pattern>com.google.common</pattern> + <shadedPattern>org.apache.zeppelin.com.google.common</shadedPattern> + </relocation> + <relocation> + <pattern>py4j</pattern> + <shadedPattern>org.apache.zeppelin.py4j</shadedPattern> + </relocation> + </relocations> + </configuration> + <executions> + <execution> + <phase>package</phase> + <goals> + <goal>shade</goal> + </goals> + </execution> + </executions> + </plugin> + + <plugin> <artifactId>maven-enforcer-plugin</artifactId> </plugin> <plugin> http://git-wip-us.apache.org/repos/asf/zeppelin/blob/d762b528/python/src/main/java/org/apache/zeppelin/python/IPythonInterpreter.java ---------------------------------------------------------------------- diff --git a/python/src/main/java/org/apache/zeppelin/python/IPythonInterpreter.java b/python/src/main/java/org/apache/zeppelin/python/IPythonInterpreter.java index bd687be..81cfeb2 100644 --- a/python/src/main/java/org/apache/zeppelin/python/IPythonInterpreter.java +++ b/python/src/main/java/org/apache/zeppelin/python/IPythonInterpreter.java @@ -299,7 +299,7 @@ public class IPythonInterpreter extends Interpreter implements ExecuteResultHand } @Override - public void close() { + public void close() throws InterpreterException { if (watchDog != null) { LOGGER.debug("Kill IPython Process"); ipythonClient.stop(StopRequest.newBuilder().build()); @@ -327,7 +327,7 @@ public class IPythonInterpreter extends Interpreter implements ExecuteResultHand } @Override - public void cancel(InterpreterContext context) { + public void cancel(InterpreterContext context) throws InterpreterException { ipythonClient.cancel(CancelRequest.newBuilder().build()); } @@ -337,7 +337,7 @@ public class IPythonInterpreter extends Interpreter implements ExecuteResultHand } @Override - public int getProgress(InterpreterContext context) { + public int getProgress(InterpreterContext context) throws InterpreterException { return 0; } http://git-wip-us.apache.org/repos/asf/zeppelin/blob/d762b528/python/src/main/java/org/apache/zeppelin/python/PythonInterpreter.java ---------------------------------------------------------------------- diff --git a/python/src/main/java/org/apache/zeppelin/python/PythonInterpreter.java b/python/src/main/java/org/apache/zeppelin/python/PythonInterpreter.java index b13cb8a..028f1c6 100644 --- a/python/src/main/java/org/apache/zeppelin/python/PythonInterpreter.java +++ b/python/src/main/java/org/apache/zeppelin/python/PythonInterpreter.java @@ -285,7 +285,7 @@ public class PythonInterpreter extends Interpreter implements ExecuteResultHandl } @Override - public void close() { + public void close() throws InterpreterException { if (iPythonInterpreter != null) { iPythonInterpreter.close(); return; @@ -463,7 +463,7 @@ public class PythonInterpreter extends Interpreter implements ExecuteResultHandl return context; } - public void interrupt() throws IOException { + public void interrupt() throws IOException, InterpreterException { if (pythonPid > -1) { logger.info("Sending SIGINT signal to PID : " + pythonPid); Runtime.getRuntime().exec("kill -SIGINT " + pythonPid); @@ -474,7 +474,7 @@ public class PythonInterpreter extends Interpreter implements ExecuteResultHandl } @Override - public void cancel(InterpreterContext context) { + public void cancel(InterpreterContext context) throws InterpreterException { if (iPythonInterpreter != null) { iPythonInterpreter.cancel(context); } @@ -491,7 +491,7 @@ public class PythonInterpreter extends Interpreter implements ExecuteResultHandl } @Override - public int getProgress(InterpreterContext context) { + public int getProgress(InterpreterContext context) throws InterpreterException { if (iPythonInterpreter != null) { return iPythonInterpreter.getProgress(context); } http://git-wip-us.apache.org/repos/asf/zeppelin/blob/d762b528/python/src/test/java/org/apache/zeppelin/python/IPythonInterpreterTest.java ---------------------------------------------------------------------- diff --git a/python/src/test/java/org/apache/zeppelin/python/IPythonInterpreterTest.java b/python/src/test/java/org/apache/zeppelin/python/IPythonInterpreterTest.java index d89ddac..cb854d6 100644 --- a/python/src/test/java/org/apache/zeppelin/python/IPythonInterpreterTest.java +++ b/python/src/test/java/org/apache/zeppelin/python/IPythonInterpreterTest.java @@ -66,7 +66,7 @@ public class IPythonInterpreterTest { } @After - public void close() { + public void close() throws InterpreterException { interpreter.close(); } @@ -81,6 +81,9 @@ public class IPythonInterpreterTest { InterpreterResult result = interpreter.interpret("from __future__ import print_function", getInterpreterContext()); assertEquals(InterpreterResult.Code.SUCCESS, result.code()); + result = interpreter.interpret("import sys\nprint(sys.version_info)", getInterpreterContext()); + assertEquals(InterpreterResult.Code.SUCCESS, result.code()); + // single output without print InterpreterContext context = getInterpreterContext(); result = interpreter.interpret("'hello world'", context); @@ -195,6 +198,9 @@ public class IPythonInterpreterTest { context = getInterpreterContext(); completions = interpreter.completion("sys.std", 7, context); + for (InterpreterCompletion completion : completions) { + System.out.println(completion.getValue()); + } assertEquals(3, completions.size()); assertEquals("stderr", completions.get(0).getValue()); assertEquals("stdin", completions.get(1).getValue()); @@ -308,6 +314,7 @@ public class IPythonInterpreterTest { context = getInterpreterContext(); result = interpreter.interpret("from bokeh.io import output_notebook, show\n" + "from bokeh.plotting import figure\n" + + "import bkzep\n" + "output_notebook(notebook_type='zeppelin')", context); Thread.sleep(100); assertEquals(InterpreterResult.Code.SUCCESS, result.code()); @@ -329,10 +336,11 @@ public class IPythonInterpreterTest { Thread.sleep(100); assertEquals(InterpreterResult.Code.SUCCESS, result.code()); interpreterResultMessages = context.out.getInterpreterResultMessages(); - assertEquals(1, interpreterResultMessages.size()); + assertEquals(2, interpreterResultMessages.size()); assertEquals(InterpreterResult.Type.HTML, interpreterResultMessages.get(0).getType()); + assertEquals(InterpreterResult.Type.HTML, interpreterResultMessages.get(1).getType()); // docs_json is the source data of plotting which bokeh would use to render the plotting. - assertTrue(interpreterResultMessages.get(0).getData().contains("docs_json")); + assertTrue(interpreterResultMessages.get(1).getData().contains("docs_json")); // ggplot context = getInterpreterContext(); http://git-wip-us.apache.org/repos/asf/zeppelin/blob/d762b528/python/src/test/java/org/apache/zeppelin/python/PythonInterpreterMatplotlibTest.java ---------------------------------------------------------------------- diff --git a/python/src/test/java/org/apache/zeppelin/python/PythonInterpreterMatplotlibTest.java b/python/src/test/java/org/apache/zeppelin/python/PythonInterpreterMatplotlibTest.java index 8c088dc..1ab9cf1 100644 --- a/python/src/test/java/org/apache/zeppelin/python/PythonInterpreterMatplotlibTest.java +++ b/python/src/test/java/org/apache/zeppelin/python/PythonInterpreterMatplotlibTest.java @@ -80,7 +80,7 @@ public class PythonInterpreterMatplotlibTest implements InterpreterOutputListene } @After - public void afterTest() throws IOException { + public void afterTest() throws IOException, InterpreterException { python.close(); } http://git-wip-us.apache.org/repos/asf/zeppelin/blob/d762b528/python/src/test/java/org/apache/zeppelin/python/PythonInterpreterTest.java ---------------------------------------------------------------------- diff --git a/python/src/test/java/org/apache/zeppelin/python/PythonInterpreterTest.java b/python/src/test/java/org/apache/zeppelin/python/PythonInterpreterTest.java index 4f08d50..1143b9e 100644 --- a/python/src/test/java/org/apache/zeppelin/python/PythonInterpreterTest.java +++ b/python/src/test/java/org/apache/zeppelin/python/PythonInterpreterTest.java @@ -93,7 +93,7 @@ public class PythonInterpreterTest implements InterpreterOutputListener { } @After - public void afterTest() throws IOException { + public void afterTest() throws IOException, InterpreterException { pythonInterpreter.close(); } http://git-wip-us.apache.org/repos/asf/zeppelin/blob/d762b528/r/pom.xml ---------------------------------------------------------------------- diff --git a/r/pom.xml b/r/pom.xml index 8c80b34..fef12e3 100644 --- a/r/pom.xml +++ b/r/pom.xml @@ -70,13 +70,6 @@ <dependency> <groupId>${project.groupId}</groupId> - <artifactId>zeppelin-spark-dependencies_${scala.binary.version}</artifactId> - <version>${project.version}</version> - <scope>provided</scope> - </dependency> - - <dependency> - <groupId>${project.groupId}</groupId> <artifactId>zeppelin-interpreter</artifactId> <version>${project.version}</version> <scope>provided</scope> http://git-wip-us.apache.org/repos/asf/zeppelin/blob/d762b528/spark-dependencies/pom.xml ---------------------------------------------------------------------- diff --git a/spark-dependencies/pom.xml b/spark-dependencies/pom.xml deleted file mode 100644 index 15138cd..0000000 --- a/spark-dependencies/pom.xml +++ /dev/null @@ -1,1042 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<!-- - ~ Licensed to the Apache Software Foundation (ASF) under one or more - ~ contributor license agreements. See the NOTICE file distributed with - ~ this work for additional information regarding copyright ownership. - ~ The ASF licenses this file to You under the Apache License, Version 2.0 - ~ (the "License"); you may not use this file except in compliance with - ~ the License. You may obtain a copy of the License at - ~ - ~ http://www.apache.org/licenses/LICENSE-2.0 - ~ - ~ Unless required by applicable law or agreed to in writing, software - ~ distributed under the License is distributed on an "AS IS" BASIS, - ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ~ See the License for the specific language governing permissions and - ~ limitations under the License. - --> - -<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" - xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> - <modelVersion>4.0.0</modelVersion> - - <parent> - <artifactId>zeppelin</artifactId> - <groupId>org.apache.zeppelin</groupId> - <version>0.9.0-SNAPSHOT</version> - <relativePath>..</relativePath> - </parent> - - <groupId>org.apache.zeppelin</groupId> - <artifactId>zeppelin-spark-dependencies_2.10</artifactId> - <packaging>jar</packaging> - <version>0.9.0-SNAPSHOT</version> - <name>Zeppelin: Spark dependencies</name> - <description>Zeppelin spark support</description> - - <properties> - <!-- library version defined in this section brought from spark 1.4.1 and it's dependency. - Therefore changing only spark.version is not going to be enough when this module - support new version of spark to make the new version as default supported version. - - Each profile (spark-2.0, spark-1.6, etc) will overrides necessary dependency version. - So we'll make one of those profile 'activateByDefault' to make it default supported version - instead of changing spark.version in this section. - --> - - <spark.version>1.4.1</spark.version> - <hadoop.version>2.3.0</hadoop.version> - <yarn.version>${hadoop.version}</yarn.version> - <avro.version>1.7.7</avro.version> - <avro.mapred.classifier/> - <jets3t.version>0.7.1</jets3t.version> - <protobuf.version>2.4.1</protobuf.version> - - <akka.group>org.spark-project.akka</akka.group> - <akka.version>2.3.4-spark</akka.version> - - <spark.archive>spark-${spark.version}</spark.archive> - <spark.src.download.url> - http://d3kbcqa49mib13.cloudfront.net/${spark.archive}.tgz - </spark.src.download.url> - <spark.bin.download.url> - http://d3kbcqa49mib13.cloudfront.net/${spark.archive}-bin-without-hadoop.tgz - </spark.bin.download.url> - <spark.py4j.version>0.8.2.1</spark.py4j.version> - - <!--plugin versions--> - <plugin.shade.version>2.3</plugin.shade.version> - </properties> - - <dependencyManagement> - <dependencies> - <dependency> - <groupId>org.apache.avro</groupId> - <artifactId>avro</artifactId> - <version>${avro.version}</version> - </dependency> - <dependency> - <groupId>org.apache.avro</groupId> - <artifactId>avro-ipc</artifactId> - <version>${avro.version}</version> - <exclusions> - <exclusion> - <groupId>io.netty</groupId> - <artifactId>netty</artifactId> - </exclusion> - <exclusion> - <groupId>org.mortbay.jetty</groupId> - <artifactId>jetty</artifactId> - </exclusion> - <exclusion> - <groupId>org.mortbay.jetty</groupId> - <artifactId>jetty-util</artifactId> - </exclusion> - <exclusion> - <groupId>org.mortbay.jetty</groupId> - <artifactId>servlet-api</artifactId> - </exclusion> - <exclusion> - <groupId>org.apache.velocity</groupId> - <artifactId>velocity</artifactId> - </exclusion> - </exclusions> - </dependency> - <dependency> - <groupId>org.apache.avro</groupId> - <artifactId>avro-mapred</artifactId> - <version>${avro.version}</version> - <classifier>${avro.mapred.classifier}</classifier> - <exclusions> - <exclusion> - <groupId>io.netty</groupId> - <artifactId>netty</artifactId> - </exclusion> - <exclusion> - <groupId>org.mortbay.jetty</groupId> - <artifactId>jetty</artifactId> - </exclusion> - <exclusion> - <groupId>org.mortbay.jetty</groupId> - <artifactId>jetty-util</artifactId> - </exclusion> - <exclusion> - <groupId>org.mortbay.jetty</groupId> - <artifactId>servlet-api</artifactId> - </exclusion> - <exclusion> - <groupId>org.apache.velocity</groupId> - <artifactId>velocity</artifactId> - </exclusion> - </exclusions> - </dependency> - - <!-- See SPARK-1556 for info on this dependency: --> - <dependency> - <groupId>net.java.dev.jets3t</groupId> - <artifactId>jets3t</artifactId> - <version>${jets3t.version}</version> - <scope>runtime</scope> - <exclusions> - <exclusion> - <groupId>commons-logging</groupId> - <artifactId>commons-logging</artifactId> - </exclusion> - </exclusions> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-yarn-api</artifactId> - <version>${yarn.version}</version> - <exclusions> - <exclusion> - <groupId>asm</groupId> - <artifactId>asm</artifactId> - </exclusion> - <exclusion> - <groupId>org.ow2.asm</groupId> - <artifactId>asm</artifactId> - </exclusion> - <exclusion> - <groupId>org.jboss.netty</groupId> - <artifactId>netty</artifactId> - </exclusion> - <exclusion> - <groupId>commons-logging</groupId> - <artifactId>commons-logging</artifactId> - </exclusion> - </exclusions> - </dependency> - - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-yarn-common</artifactId> - <version>${yarn.version}</version> - <exclusions> - <exclusion> - <groupId>asm</groupId> - <artifactId>asm</artifactId> - </exclusion> - <exclusion> - <groupId>org.ow2.asm</groupId> - <artifactId>asm</artifactId> - </exclusion> - <exclusion> - <groupId>org.jboss.netty</groupId> - <artifactId>netty</artifactId> - </exclusion> - <exclusion> - <groupId>javax.servlet</groupId> - <artifactId>servlet-api</artifactId> - </exclusion> - <exclusion> - <groupId>commons-logging</groupId> - <artifactId>commons-logging</artifactId> - </exclusion> - </exclusions> - </dependency> - - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-yarn-server-web-proxy</artifactId> - <version>${yarn.version}</version> - <exclusions> - <exclusion> - <groupId>asm</groupId> - <artifactId>asm</artifactId> - </exclusion> - <exclusion> - <groupId>org.ow2.asm</groupId> - <artifactId>asm</artifactId> - </exclusion> - <exclusion> - <groupId>org.jboss.netty</groupId> - <artifactId>netty</artifactId> - </exclusion> - <exclusion> - <groupId>javax.servlet</groupId> - <artifactId>servlet-api</artifactId> - </exclusion> - <exclusion> - <groupId>commons-logging</groupId> - <artifactId>commons-logging</artifactId> - </exclusion> - </exclusions> - </dependency> - - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-yarn-client</artifactId> - <version>${yarn.version}</version> - <exclusions> - <exclusion> - <groupId>asm</groupId> - <artifactId>asm</artifactId> - </exclusion> - <exclusion> - <groupId>org.ow2.asm</groupId> - <artifactId>asm</artifactId> - </exclusion> - <exclusion> - <groupId>org.jboss.netty</groupId> - <artifactId>netty</artifactId> - </exclusion> - <exclusion> - <groupId>javax.servlet</groupId> - <artifactId>servlet-api</artifactId> - </exclusion> - <exclusion> - <groupId>commons-logging</groupId> - <artifactId>commons-logging</artifactId> - </exclusion> - </exclusions> - </dependency> - </dependencies> - </dependencyManagement> - - <dependencies> - <!-- Spark --> - <dependency> - <groupId>org.apache.spark</groupId> - <artifactId>spark-core_${scala.binary.version}</artifactId> - <version>${spark.version}</version> - <exclusions> - <exclusion> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-client</artifactId> - </exclusion> - </exclusions> - </dependency> - - <dependency> - <groupId>org.apache.spark</groupId> - <artifactId>spark-repl_${scala.binary.version}</artifactId> - <version>${spark.version}</version> - </dependency> - - <dependency> - <groupId>org.apache.spark</groupId> - <artifactId>spark-sql_${scala.binary.version}</artifactId> - <version>${spark.version}</version> - </dependency> - - <dependency> - <groupId>org.apache.spark</groupId> - <artifactId>spark-hive_${scala.binary.version}</artifactId> - <version>${spark.version}</version> - </dependency> - - <dependency> - <groupId>org.apache.spark</groupId> - <artifactId>spark-streaming_${scala.binary.version}</artifactId> - <version>${spark.version}</version> - </dependency> - - <dependency> - <groupId>org.apache.spark</groupId> - <artifactId>spark-catalyst_${scala.binary.version}</artifactId> - <version>${spark.version}</version> - </dependency> - - <!-- hadoop --> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-client</artifactId> - <version>${hadoop.version}</version> - </dependency> - - - <dependency> - <groupId>com.google.protobuf</groupId> - <artifactId>protobuf-java</artifactId> - <version>${protobuf.version}</version> - </dependency> - - <dependency> - <groupId>${akka.group}</groupId> - <artifactId>akka-actor_${scala.binary.version}</artifactId> - <version>${akka.version}</version> - </dependency> - <dependency> - <groupId>${akka.group}</groupId> - <artifactId>akka-remote_${scala.binary.version}</artifactId> - <version>${akka.version}</version> - </dependency> - <dependency> - <groupId>${akka.group}</groupId> - <artifactId>akka-slf4j_${scala.binary.version}</artifactId> - <version>${akka.version}</version> - </dependency> - <dependency> - <groupId>${akka.group}</groupId> - <artifactId>akka-testkit_${scala.binary.version}</artifactId> - <version>${akka.version}</version> - </dependency> - <dependency> - <groupId>${akka.group}</groupId> - <artifactId>akka-zeromq_${scala.binary.version}</artifactId> - <version>${akka.version}</version> - <exclusions> - <exclusion> - <groupId>${akka.group}</groupId> - <artifactId>akka-actor_${scala.binary.version}</artifactId> - </exclusion> - </exclusions> - </dependency> - - <!-- yarn (not supported for Spark v1.5.0 or higher) --> - <dependency> - <groupId>org.apache.spark</groupId> - <artifactId>spark-yarn_${scala.binary.version}</artifactId> - <version>${spark.version}</version> - </dependency> - - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-yarn-api</artifactId> - <version>${yarn.version}</version> - </dependency> - - </dependencies> - - <profiles> - <profile> - <id>spark-1.1</id> - <dependencies> - - </dependencies> - <properties> - <spark.version>1.1.1</spark.version> - <akka.version>2.2.3-shaded-protobuf</akka.version> - </properties> - </profile> - - <profile> - <id>cassandra-spark-1.1</id> - <dependencies> - <dependency> - <groupId>com.datastax.spark</groupId> - <artifactId>spark-cassandra-connector_${scala.binary.version}</artifactId> - <version>1.1.1</version> - <exclusions> - <exclusion> - <groupId>org.joda</groupId> - <artifactId>joda-convert</artifactId> - </exclusion> - </exclusions> - </dependency> - </dependencies> - <properties> - <spark.version>1.1.1</spark.version> - <akka.version>2.2.3-shaded-protobuf</akka.version> - </properties> - </profile> - - <profile> - <id>spark-1.2</id> - <dependencies> - </dependencies> - <properties> - <spark.version>1.2.1</spark.version> - </properties> - </profile> - - <profile> - <id>cassandra-spark-1.2</id> - <properties> - <spark.version>1.2.1</spark.version> - </properties> - <dependencies> - <dependency> - <groupId>com.datastax.spark</groupId> - <artifactId>spark-cassandra-connector_${scala.binary.version}</artifactId> - <version>1.2.1</version> - <exclusions> - <exclusion> - <groupId>org.joda</groupId> - <artifactId>joda-convert</artifactId> - </exclusion> - </exclusions> - </dependency> - </dependencies> - </profile> - - <profile> - <id>spark-1.3</id> - - <properties> - <spark.version>1.3.1</spark.version> - </properties> - - <dependencies> - </dependencies> - - </profile> - - <profile> - <id>cassandra-spark-1.3</id> - <properties> - <spark.version>1.3.0</spark.version> - </properties> - - <dependencies> - <dependency> - <groupId>com.datastax.spark</groupId> - <artifactId>spark-cassandra-connector_${scala.binary.version}</artifactId> - <version>1.3.1</version> - <exclusions> - <exclusion> - <groupId>org.joda</groupId> - <artifactId>joda-convert</artifactId> - </exclusion> - </exclusions> - </dependency> - </dependencies> - </profile> - - <profile> - <id>spark-1.4</id> - <properties> - <spark.version>1.4.1</spark.version> - </properties> - - <dependencies> - </dependencies> - </profile> - - <profile> - <id>cassandra-spark-1.4</id> - <properties> - <spark.version>1.4.1</spark.version> - </properties> - - <dependencies> - <dependency> - <groupId>com.datastax.spark</groupId> - <artifactId>spark-cassandra-connector_${scala.binary.version}</artifactId> - <version>1.4.0</version> - <exclusions> - <exclusion> - <groupId>org.joda</groupId> - <artifactId>joda-convert</artifactId> - </exclusion> - </exclusions> - </dependency> - </dependencies> - </profile> - - <profile> - <id>spark-1.5</id> - <properties> - <spark.version>1.5.2</spark.version> - <akka.group>com.typesafe.akka</akka.group> - <akka.version>2.3.11</akka.version> - <protobuf.version>2.5.0</protobuf.version> - </properties> - - <dependencies> - </dependencies> - </profile> - - <profile> - <id>cassandra-spark-1.5</id> - <properties> - <spark.version>1.5.1</spark.version> - <akka.group>com.typesafe.akka</akka.group> - <akka.version>2.3.11</akka.version> - <protobuf.version>2.5.0</protobuf.version> - <guava.version>16.0.1</guava.version> - </properties> - - <dependencies> - <dependency> - <groupId>com.datastax.spark</groupId> - <artifactId>spark-cassandra-connector_${scala.binary.version}</artifactId> - <version>1.5.0</version> - <exclusions> - <exclusion> - <groupId>org.joda</groupId> - <artifactId>joda-convert</artifactId> - </exclusion> - </exclusions> - </dependency> - </dependencies> - </profile> - - <profile> - <id>spark-1.6</id> - <properties> - <spark.version>1.6.3</spark.version> - <spark.py4j.version>0.9</spark.py4j.version> - <akka.group>com.typesafe.akka</akka.group> - <akka.version>2.3.11</akka.version> - <protobuf.version>2.5.0</protobuf.version> - </properties> - </profile> - - <profile> - <id>spark-2.0</id> - <properties> - <spark.version>2.0.2</spark.version> - <protobuf.version>2.5.0</protobuf.version> - <spark.py4j.version>0.10.3</spark.py4j.version> - </properties> - </profile> - - <profile> - <id>spark-2.1</id> - <properties> - <spark.version>2.1.0</spark.version> - <protobuf.version>2.5.0</protobuf.version> - <spark.py4j.version>0.10.4</spark.py4j.version> - <scala.version>2.11.8</scala.version> - </properties> - </profile> - - <profile> - <id>spark-2.2</id> - <activation> - <activeByDefault>true</activeByDefault> - </activation> - <properties> - <spark.version>2.2.0</spark.version> - <protobuf.version>2.5.0</protobuf.version> - <spark.py4j.version>0.10.4</spark.py4j.version> - </properties> - </profile> - - <profile> - <id>hadoop-0.23</id> - <!-- SPARK-1121: Adds an explicit dependency on Avro to work around a - Hadoop 0.23.X issue --> - <dependencies> - <dependency> - <groupId>org.apache.avro</groupId> - <artifactId>avro</artifactId> - </dependency> - </dependencies> - <properties> - <hadoop.version>0.23.10</hadoop.version> - </properties> - </profile> - - <profile> - <id>hadoop-1</id> - <properties> - <hadoop.version>1.0.4</hadoop.version> - <avro.mapred.classifier>hadoop1</avro.mapred.classifier> - <codehaus.jackson.version>1.8.8</codehaus.jackson.version> - <akka.group>org.spark-project.akka</akka.group> - </properties> - </profile> - - <profile> - <id>hadoop-2.2</id> - <properties> - <hadoop.version>2.2.0</hadoop.version> - <protobuf.version>2.5.0</protobuf.version> - <avro.mapred.classifier>hadoop2</avro.mapred.classifier> - </properties> - </profile> - - <profile> - <id>hadoop-2.3</id> - <properties> - <hadoop.version>2.3.0</hadoop.version> - <protobuf.version>2.5.0</protobuf.version> - <jets3t.version>0.9.3</jets3t.version> - <avro.mapred.classifier>hadoop2</avro.mapred.classifier> - </properties> - </profile> - - <profile> - <id>hadoop-2.4</id> - <properties> - <hadoop.version>2.4.0</hadoop.version> - <protobuf.version>2.5.0</protobuf.version> - <jets3t.version>0.9.3</jets3t.version> - <avro.mapred.classifier>hadoop2</avro.mapred.classifier> - </properties> - </profile> - - <profile> - <id>hadoop-2.6</id> - <properties> - <hadoop.version>2.6.0</hadoop.version> - <protobuf.version>2.5.0</protobuf.version> - <jets3t.version>0.9.3</jets3t.version> - <avro.mapred.classifier>hadoop2</avro.mapred.classifier> - </properties> - </profile> - - <profile> - <id>hadoop-2.7</id> - <properties> - <hadoop.version>2.7.2</hadoop.version> - <protobuf.version>2.5.0</protobuf.version> - <jets3t.version>0.9.0</jets3t.version> - <avro.mapred.classifier>hadoop2</avro.mapred.classifier> - </properties> - </profile> - - <profile> - <id>mapr3</id> - <activation> - <activeByDefault>false</activeByDefault> - </activation> - <properties> - <hadoop.version>1.0.3-mapr-3.0.3</hadoop.version> - <yarn.version>2.3.0-mapr-4.0.0-FCS</yarn.version> - <jets3t.version>0.7.1</jets3t.version> - </properties> - <repositories> - <repository> - <id>mapr-releases</id> - <url>http://repository.mapr.com/maven/</url> - <snapshots> - <enabled>false</enabled> - </snapshots> - <releases> - <enabled>true</enabled> - </releases> - </repository> - </repositories> - </profile> - - <profile> - <id>mapr40</id> - <activation> - <activeByDefault>false</activeByDefault> - </activation> - <properties> - <hadoop.version>2.4.1-mapr-1503</hadoop.version> - <yarn.version>2.4.1-mapr-1503</yarn.version> - <jets3t.version>0.9.3</jets3t.version> - </properties> - <dependencies> - <dependency> - <groupId>org.apache.curator</groupId> - <artifactId>curator-recipes</artifactId> - <version>2.4.0</version> - <exclusions> - <exclusion> - <groupId>org.apache.zookeeper</groupId> - <artifactId>zookeeper</artifactId> - </exclusion> - </exclusions> - </dependency> - <dependency> - <groupId>org.apache.zookeeper</groupId> - <artifactId>zookeeper</artifactId> - <version>3.4.5-mapr-1503</version> - </dependency> - </dependencies> - <repositories> - <repository> - <id>mapr-releases</id> - <url>http://repository.mapr.com/maven/</url> - <snapshots> - <enabled>false</enabled> - </snapshots> - <releases> - <enabled>true</enabled> - </releases> - </repository> - </repositories> - </profile> - - <profile> - <id>mapr41</id> - <activation> - <activeByDefault>false</activeByDefault> - </activation> - <properties> - <hadoop.version>2.5.1-mapr-1503</hadoop.version> - <yarn.version>2.5.1-mapr-1503</yarn.version> - <jets3t.version>0.7.1</jets3t.version> - </properties> - <dependencies> - <dependency> - <groupId>org.apache.curator</groupId> - <artifactId>curator-recipes</artifactId> - <version>2.4.0</version> - <exclusions> - <exclusion> - <groupId>org.apache.zookeeper</groupId> - <artifactId>zookeeper</artifactId> - </exclusion> - </exclusions> - </dependency> - <dependency> - <groupId>org.apache.zookeeper</groupId> - <artifactId>zookeeper</artifactId> - <version>3.4.5-mapr-1503</version> - </dependency> - </dependencies> - <repositories> - <repository> - <id>mapr-releases</id> - <url>http://repository.mapr.com/maven/</url> - <snapshots> - <enabled>false</enabled> - </snapshots> - <releases> - <enabled>true</enabled> - </releases> - </repository> - </repositories> - </profile> - - <profile> - <id>mapr50</id> - <activation> - <activeByDefault>false</activeByDefault> - </activation> - <properties> - <hadoop.version>2.7.0-mapr-1506</hadoop.version> - <yarn.version>2.7.0-mapr-1506</yarn.version> - <jets3t.version>0.9.3</jets3t.version> - </properties> - <dependencies> - <dependency> - <groupId>org.apache.curator</groupId> - <artifactId>curator-recipes</artifactId> - <version>2.4.0</version> - <exclusions> - <exclusion> - <groupId>org.apache.zookeeper</groupId> - <artifactId>zookeeper</artifactId> - </exclusion> - </exclusions> - </dependency> - <dependency> - <groupId>org.apache.zookeeper</groupId> - <artifactId>zookeeper</artifactId> - <version>3.4.5-mapr-1503</version> - </dependency> - </dependencies> - <repositories> - <repository> - <id>mapr-releases</id> - <url>http://repository.mapr.com/maven/</url> - <snapshots> - <enabled>false</enabled> - </snapshots> - <releases> - <enabled>true</enabled> - </releases> - </repository> - </repositories> - </profile> - - <profile> - <id>mapr51</id> - <activation> - <activeByDefault>false</activeByDefault> - </activation> - <properties> - <hadoop.version>2.7.0-mapr-1602</hadoop.version> - <yarn.version>2.7.0-mapr-1602</yarn.version> - <jets3t.version>0.9.3</jets3t.version> - </properties> - <dependencies> - <dependency> - <groupId>org.apache.curator</groupId> - <artifactId>curator-recipes</artifactId> - <version>2.4.0</version> - <exclusions> - <exclusion> - <groupId>org.apache.zookeeper</groupId> - <artifactId>zookeeper</artifactId> - </exclusion> - </exclusions> - </dependency> - <dependency> - <groupId>org.apache.zookeeper</groupId> - <artifactId>zookeeper</artifactId> - <version>3.4.5-mapr-1503</version> - </dependency> - </dependencies> - <repositories> - <repository> - <id>mapr-releases</id> - <url>http://repository.mapr.com/maven/</url> - <snapshots> - <enabled>false</enabled> - </snapshots> - <releases> - <enabled>true</enabled> - </releases> - </repository> - </repositories> - </profile> - - </profiles> - - <build> - <plugins> - <plugin> - <artifactId>maven-enforcer-plugin</artifactId> - <executions> - <execution> - <id>enforce</id> - <phase>none</phase> - </execution> - </executions> - </plugin> - - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-surefire-plugin</artifactId> - <configuration> - <forkCount>1</forkCount> - <reuseForks>false</reuseForks> - <argLine>-Xmx1024m -XX:MaxPermSize=256m</argLine> - </configuration> - </plugin> - - <plugin> - <groupId>com.googlecode.maven-download-plugin</groupId> - <artifactId>download-maven-plugin</artifactId> - <version>${plugin.download.version}</version> - </plugin> - - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-shade-plugin</artifactId> - <version>${plugin.shade.version}</version> - <configuration> - <filters> - <filter> - <artifact>*:*</artifact> - <excludes> - <exclude>org/datanucleus/**</exclude> - <exclude>META-INF/*.SF</exclude> - <exclude>META-INF/*.DSA</exclude> - <exclude>META-INF/*.RSA</exclude> - </excludes> - </filter> - </filters> - <transformers> - <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/> - <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer"> - <resource>reference.conf</resource> - </transformer> - </transformers> - </configuration> - <executions> - <execution> - <phase>package</phase> - <goals> - <goal>shade</goal> - </goals> - </execution> - </executions> - </plugin> - - <!-- Deploy datanucleus jars to the interpreter/spark directory --> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-dependency-plugin</artifactId> - <executions> - <execution> - <id>copy-dependencies</id> - <phase>package</phase> - <goals> - <goal>copy-dependencies</goal> - </goals> - <configuration> - <outputDirectory>${project.build.directory}/../../interpreter/spark/dep</outputDirectory> - <overWriteReleases>false</overWriteReleases> - <overWriteSnapshots>false</overWriteSnapshots> - <overWriteIfNewer>true</overWriteIfNewer> - <includeGroupIds>org.datanucleus</includeGroupIds> - </configuration> - </execution> - <execution> - <phase>package</phase> - <goals> - <goal>copy</goal> - </goals> - <configuration> - <outputDirectory>${project.build.directory}/../../interpreter/spark/dep</outputDirectory> - <overWriteReleases>false</overWriteReleases> - <overWriteSnapshots>false</overWriteSnapshots> - <overWriteIfNewer>true</overWriteIfNewer> - <artifactItems> - <artifactItem> - <groupId>${project.groupId}</groupId> - <artifactId>${project.artifactId}</artifactId> - <version>${project.version}</version> - <type>${project.packaging}</type> - </artifactItem> - </artifactItems> - </configuration> - </execution> - </executions> - </plugin> - - <!-- include pyspark by default --> - <plugin> - <groupId>com.googlecode.maven-download-plugin</groupId> - <artifactId>download-maven-plugin</artifactId> - <executions> - <execution> - <id>download-pyspark-files</id> - <phase>validate</phase> - <goals> - <goal>wget</goal> - </goals> - <configuration> - <readTimeOut>60000</readTimeOut> - <retries>5</retries> - <unpack>true</unpack> - <url>${spark.src.download.url}</url> - <outputDirectory>${project.build.directory}</outputDirectory> - </configuration> - </execution> - </executions> - </plugin> - - <plugin> - <artifactId>maven-clean-plugin</artifactId> - <configuration> - <filesets> - <fileset> - <directory>${basedir}/../python/build</directory> - </fileset> - </filesets> - </configuration> - </plugin> - - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-antrun-plugin</artifactId> - <executions> - <execution> - <id>zip-pyspark-files</id> - <phase>generate-resources</phase> - <goals> - <goal>run</goal> - </goals> - <configuration> - <target> - <delete dir="../interpreter/spark/pyspark"/> - <copy todir="../interpreter/spark/pyspark" - file="${project.build.directory}/${spark.archive}/python/lib/py4j-${spark.py4j.version}-src.zip"/> - <zip destfile="${project.build.directory}/../../interpreter/spark/pyspark/pyspark.zip" - basedir="${project.build.directory}/${spark.archive}/python" - includes="pyspark/*.py,pyspark/**/*.py"/> - </target> - </configuration> - </execution> - </executions> - </plugin> - - <!-- include sparkr by default --> - <plugin> - <groupId>com.googlecode.maven-download-plugin</groupId> - <artifactId>download-maven-plugin</artifactId> - <executions> - <execution> - <id>download-sparkr-files</id> - <phase>validate</phase> - <goals> - <goal>wget</goal> - </goals> - <configuration> - <readTimeOut>60000</readTimeOut> - <retries>5</retries> - <url>${spark.bin.download.url}</url> - <unpack>true</unpack> - <outputDirectory>${project.build.directory}</outputDirectory> - </configuration> - </execution> - </executions> - </plugin> - <plugin> - <artifactId>maven-resources-plugin</artifactId> - <version>2.7</version> - <executions> - <execution> - <id>copy-sparkr-files</id> - <phase>generate-resources</phase> - <goals> - <goal>copy-resources</goal> - </goals> - <configuration> - <outputDirectory>${project.build.directory}/../../interpreter/spark/R/lib</outputDirectory> - <resources> - <resource> - <directory> - ${project.build.directory}/spark-${spark.version}-bin-without-hadoop/R/lib - </directory> - </resource> - </resources> - </configuration> - </execution> - </executions> - </plugin> - </plugins> - </build> -</project> http://git-wip-us.apache.org/repos/asf/zeppelin/blob/d762b528/spark/interpreter/figure/null-1.png ---------------------------------------------------------------------- diff --git a/spark/interpreter/figure/null-1.png b/spark/interpreter/figure/null-1.png new file mode 100644 index 0000000..8b1ce07 Binary files /dev/null and b/spark/interpreter/figure/null-1.png differ http://git-wip-us.apache.org/repos/asf/zeppelin/blob/d762b528/spark/interpreter/pom.xml ---------------------------------------------------------------------- diff --git a/spark/interpreter/pom.xml b/spark/interpreter/pom.xml new file mode 100644 index 0000000..4496462 --- /dev/null +++ b/spark/interpreter/pom.xml @@ -0,0 +1,573 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + ~ Licensed to the Apache Software Foundation (ASF) under one or more + ~ contributor license agreements. See the NOTICE file distributed with + ~ this work for additional information regarding copyright ownership. + ~ The ASF licenses this file to You under the Apache License, Version 2.0 + ~ (the "License"); you may not use this file except in compliance with + ~ the License. You may obtain a copy of the License at + ~ + ~ http://www.apache.org/licenses/LICENSE-2.0 + ~ + ~ Unless required by applicable law or agreed to in writing, software + ~ distributed under the License is distributed on an "AS IS" BASIS, + ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ~ See the License for the specific language governing permissions and + ~ limitations under the License. + --> + +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + + <parent> + <artifactId>spark-parent</artifactId> + <groupId>org.apache.zeppelin</groupId> + <version>0.9.0-SNAPSHOT</version> + <relativePath>../pom.xml</relativePath> + </parent> + + <groupId>org.apache.zeppelin</groupId> + <artifactId>spark-interpreter</artifactId> + <packaging>jar</packaging> + <version>0.9.0-SNAPSHOT</version> + <name>Zeppelin: Spark Interpreter</name> + <description>Zeppelin spark support</description> + + <properties> + <interpreter.name>spark</interpreter.name> + <!--library versions--> + <jsoup.version>1.8.2</jsoup.version> + <commons.exec.version>1.3</commons.exec.version> + <commons.compress.version>1.9</commons.compress.version> + <maven.plugin.api.version>3.0</maven.plugin.api.version> + <aether.version>1.12</aether.version> + <maven.aeither.provider.version>3.0.3</maven.aeither.provider.version> + <wagon.version>1.0</wagon.version> + + <datanucleus.rdbms.version>3.2.9</datanucleus.rdbms.version> + <datanucleus.apijdo.version>3.2.6</datanucleus.apijdo.version> + <datanucleus.core.version>3.2.10</datanucleus.core.version> + + <scala.compile.version>${scala.version}</scala.compile.version> + <!-- settings --> + <pyspark.test.exclude>**/PySparkInterpreterMatplotlibTest.java</pyspark.test.exclude> + <pyspark.test.include>**/*Test.*</pyspark.test.include> + + + <spark.archive>spark-${spark.version}</spark.archive> + <spark.src.download.url> + http://d3kbcqa49mib13.cloudfront.net/${spark.archive}.tgz + </spark.src.download.url> + <spark.bin.download.url> + http://d3kbcqa49mib13.cloudfront.net/spark-${spark.version}-bin-without-hadoop.tgz + </spark.bin.download.url> + + </properties> + + <dependencies> + <dependency> + <groupId>org.apache.zeppelin</groupId> + <artifactId>zeppelin-display</artifactId> + <version>${project.version}</version> + </dependency> + + <dependency> + <groupId>org.apache.zeppelin</groupId> + <artifactId>spark-scala-2.11</artifactId> + <version>${project.version}</version> + </dependency> + + <dependency> + <groupId>org.apache.zeppelin</groupId> + <artifactId>spark-scala-2.10</artifactId> + <version>${project.version}</version> + </dependency> + + <dependency> + <groupId>org.apache.zeppelin</groupId> + <artifactId>zeppelin-interpreter</artifactId> + <version>${project.version}</version> + </dependency> + + <dependency> + <groupId>org.apache.zeppelin</groupId> + <artifactId>zeppelin-python</artifactId> + <version>${project.version}</version> + <exclusions> + <exclusion> + <groupId>net.sf.py4j</groupId> + <artifactId>py4j</artifactId> + </exclusion> + </exclusions> + </dependency> + + <dependency> + <groupId>${project.groupId}</groupId> + <artifactId>zeppelin-python</artifactId> + <version>${project.version}</version> + <classifier>tests</classifier> + <scope>test</scope> + <exclusions> + <exclusion> + <groupId>net.sf.py4j</groupId> + <artifactId>py4j</artifactId> + </exclusion> + </exclusions> + </dependency> + + <dependency> + <groupId>org.apache.spark</groupId> + <artifactId>spark-repl_${scala.binary.version}</artifactId> + <version>${spark.version}</version> + <scope>provided</scope> + </dependency> + + <dependency> + <groupId>org.apache.spark</groupId> + <artifactId>spark-core_${scala.binary.version}</artifactId> + <version>${spark.version}</version> + <scope>provided</scope> + </dependency> + + <dependency> + <groupId>org.apache.spark</groupId> + <artifactId>spark-hive_${scala.binary.version}</artifactId> + <version>${spark.version}</version> + <scope>provided</scope> + <exclusions> + <exclusion> + <groupId>com.fasterxml.jackson.core</groupId> + <artifactId>jackson-databind</artifactId> + </exclusion> + <exclusion> + <groupId>com.fasterxml.jackson.core</groupId> + <artifactId>jackson-annotations</artifactId> + </exclusion> + </exclusions> + </dependency> + + <!-- Aether :: maven dependency resolution --> + <dependency> + <groupId>org.apache.maven</groupId> + <artifactId>maven-plugin-api</artifactId> + <version>${maven.plugin.api.version}</version> + <exclusions> + <exclusion> + <groupId>org.codehaus.plexus</groupId> + <artifactId>plexus-utils</artifactId> + </exclusion> + <exclusion> + <groupId>org.sonatype.sisu</groupId> + <artifactId>sisu-inject-plexus</artifactId> + </exclusion> + <exclusion> + <groupId>org.apache.maven</groupId> + <artifactId>maven-model</artifactId> + </exclusion> + </exclusions> + </dependency> + + <dependency> + <groupId>org.sonatype.aether</groupId> + <artifactId>aether-api</artifactId> + <version>${aether.version}</version> + </dependency> + + <dependency> + <groupId>org.sonatype.aether</groupId> + <artifactId>aether-util</artifactId> + <version>${aether.version}</version> + </dependency> + + <dependency> + <groupId>org.sonatype.aether</groupId> + <artifactId>aether-impl</artifactId> + <version>${aether.version}</version> + </dependency> + + <dependency> + <groupId>org.apache.maven</groupId> + <artifactId>maven-aether-provider</artifactId> + <version>${maven.aeither.provider.version}</version> + <exclusions> + <exclusion> + <groupId>org.sonatype.aether</groupId> + <artifactId>aether-api</artifactId> + </exclusion> + <exclusion> + <groupId>org.sonatype.aether</groupId> + <artifactId>aether-spi</artifactId> + </exclusion> + <exclusion> + <groupId>org.sonatype.aether</groupId> + <artifactId>aether-util</artifactId> + </exclusion> + <exclusion> + <groupId>org.sonatype.aether</groupId> + <artifactId>aether-impl</artifactId> + </exclusion> + <exclusion> + <groupId>org.codehaus.plexus</groupId> + <artifactId>plexus-utils</artifactId> + </exclusion> + </exclusions> + </dependency> + + <dependency> + <groupId>org.sonatype.aether</groupId> + <artifactId>aether-connector-file</artifactId> + <version>${aether.version}</version> + </dependency> + + <dependency> + <groupId>org.sonatype.aether</groupId> + <artifactId>aether-connector-wagon</artifactId> + <version>${aether.version}</version> + <exclusions> + <exclusion> + <groupId>org.apache.maven.wagon</groupId> + <artifactId>wagon-provider-api</artifactId> + </exclusion> + </exclusions> + </dependency> + + <dependency> + <groupId>org.apache.maven.wagon</groupId> + <artifactId>wagon-provider-api</artifactId> + <version>${wagon.version}</version> + <exclusions> + <exclusion> + <groupId>org.codehaus.plexus</groupId> + <artifactId>plexus-utils</artifactId> + </exclusion> + </exclusions> + </dependency> + + <dependency> + <groupId>org.apache.maven.wagon</groupId> + <artifactId>wagon-http-lightweight</artifactId> + <version>${wagon.version}</version> + <exclusions> + <exclusion> + <groupId>org.apache.maven.wagon</groupId> + <artifactId>wagon-http-shared</artifactId> + </exclusion> + </exclusions> + </dependency> + + <dependency> + <groupId>org.apache.maven.wagon</groupId> + <artifactId>wagon-http</artifactId> + <version>${wagon.version}</version> + <exclusions> + </exclusions> + </dependency> + + <dependency> + <groupId>org.apache.commons</groupId> + <artifactId>commons-exec</artifactId> + <version>${commons.exec.version}</version> + </dependency> + + <dependency> + <groupId>org.scala-lang</groupId> + <artifactId>scala-library</artifactId> + <version>${scala.version}</version> + <scope>provided</scope> + </dependency> + + <dependency> + <groupId>org.scala-lang</groupId> + <artifactId>scala-compiler</artifactId> + <version>${scala.version}</version> + <scope>provided</scope> + </dependency> + + <dependency> + <groupId>org.scala-lang</groupId> + <artifactId>scala-reflect</artifactId> + <version>${scala.version}</version> + <scope>provided</scope> + </dependency> + + <dependency> + <groupId>commons-lang</groupId> + <artifactId>commons-lang</artifactId> + <scope>provided</scope> + </dependency> + + <dependency> + <groupId>org.apache.commons</groupId> + <artifactId>commons-compress</artifactId> + <version>${commons.compress.version}</version> + <scope>provided</scope> + </dependency> + + <dependency> + <groupId>org.jsoup</groupId> + <artifactId>jsoup</artifactId> + <version>${jsoup.version}</version> + </dependency> + + <!--test libraries--> + <dependency> + <groupId>org.scalatest</groupId> + <artifactId>scalatest_${scala.binary.version}</artifactId> + <version>${scalatest.version}</version> + <scope>test</scope> + </dependency> + + <dependency> + <groupId>junit</groupId> + <artifactId>junit</artifactId> + <scope>test</scope> + </dependency> + + <dependency> + <groupId>org.datanucleus</groupId> + <artifactId>datanucleus-core</artifactId> + <version>${datanucleus.core.version}</version> + <scope>test</scope> + </dependency> + + <dependency> + <groupId>org.datanucleus</groupId> + <artifactId>datanucleus-api-jdo</artifactId> + <version>${datanucleus.apijdo.version}</version> + <scope>test</scope> + </dependency> + + <dependency> + <groupId>org.datanucleus</groupId> + <artifactId>datanucleus-rdbms</artifactId> + <version>${datanucleus.rdbms.version}</version> + <scope>test</scope> + </dependency> + + <dependency> + <groupId>org.mockito</groupId> + <artifactId>mockito-core</artifactId> + <scope>test</scope> + </dependency> + + <dependency> + <groupId>org.powermock</groupId> + <artifactId>powermock-api-mockito</artifactId> + <scope>test</scope> + </dependency> + + <dependency> + <groupId>org.powermock</groupId> + <artifactId>powermock-module-junit4</artifactId> + <scope>test</scope> + </dependency> + + </dependencies> + + <build> + <plugins> + <plugin> + <artifactId>maven-enforcer-plugin</artifactId> + <executions> + <execution> + <id>enforce</id> + <phase>none</phase> + </execution> + </executions> + + <configuration> + <rules> + <requireJavaVersion> + <version>1.7</version> + </requireJavaVersion> + </rules> + </configuration> + </plugin> + + <plugin> + <groupId>com.googlecode.maven-download-plugin</groupId> + <artifactId>download-maven-plugin</artifactId> + <executions> + <execution> + <id>download-pyspark-files</id> + <phase>validate</phase> + <goals> + <goal>wget</goal> + </goals> + <configuration> + <readTimeOut>60000</readTimeOut> + <retries>5</retries> + <unpack>true</unpack> + <url>${spark.src.download.url}</url> + <outputDirectory>${project.build.directory}</outputDirectory> + </configuration> + </execution> + </executions> + </plugin> + + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-antrun-plugin</artifactId> + <executions> + <execution> + <id>zip-pyspark-files</id> + <phase>generate-resources</phase> + <goals> + <goal>run</goal> + </goals> + <configuration> + <target> + <delete dir="../../interpreter/spark/pyspark" /> + <copy file="${project.build.directory}/${spark.archive}/python/lib/py4j-${py4j.version}-src.zip" todir="${project.build.directory}/../../../interpreter/spark/pyspark" /> + <zip basedir="${project.build.directory}/${spark.archive}/python" destfile="${project.build.directory}/../../../interpreter/spark/pyspark/pyspark.zip" includes="pyspark/*.py,pyspark/**/*.py" /> + </target> + </configuration> + </execution> + </executions> + </plugin> + + <plugin> + <groupId>org.scalatest</groupId> + <artifactId>scalatest-maven-plugin</artifactId> + </plugin> + + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-surefire-plugin</artifactId> + <configuration> + <forkCount>1</forkCount> + <reuseForks>false</reuseForks> + <argLine>-Xmx1024m -XX:MaxPermSize=256m</argLine> + <excludes> + <exclude>**/SparkRInterpreterTest.java</exclude> + <exclude>${pyspark.test.exclude}</exclude> + <exclude>${tests.to.exclude}</exclude> + </excludes> + <environmentVariables> + <PYTHONPATH>${project.build.directory}/../../../interpreter/spark/pyspark/pyspark.zip:${project.build.directory}/../../../interpreter/lib/python/:${project.build.directory}/../../../interpreter/spark/pyspark/py4j-${py4j.version}-src.zip:.</PYTHONPATH> + <ZEPPELIN_HOME>${basedir}/../../</ZEPPELIN_HOME> + </environmentVariables> + </configuration> + </plugin> + + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-shade-plugin</artifactId> + <version>${plugin.shade.version}</version> + <configuration> + <!--<createDependencyReducedPom>false</createDependencyReducedPom>--> + <filters> + <filter> + <artifact>*:*</artifact> + <excludes> + <exclude>org/datanucleus/**</exclude> + <exclude>META-INF/*.SF</exclude> + <exclude>META-INF/*.DSA</exclude> + <exclude>META-INF/*.RSA</exclude> + </excludes> + </filter> + </filters> + <transformers> + <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" /> + <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer"> + <resource>reference.conf</resource> + </transformer> + </transformers> + <relocations> + <relocation> + <pattern>io.netty</pattern> + <shadedPattern>org.apache.zeppelin.io.netty</shadedPattern> + </relocation> + <relocation> + <pattern>com.google</pattern> + <shadedPattern>org.apache.zeppelin.com.google</shadedPattern> + </relocation> + <relocation> + <pattern>py4j.</pattern> + <shadedPattern>org.apache.zeppelin.py4j.</shadedPattern> + </relocation> + </relocations> + </configuration> + <executions> + <execution> + <phase>package</phase> + <goals> + <goal>shade</goal> + </goals> + </execution> + </executions> + </plugin> + + + <plugin> + <artifactId>maven-dependency-plugin</artifactId> + <executions> + <execution> + <id>copy-dependencies</id> + <phase>none</phase> + <configuration> + <skip>true</skip> + </configuration> + </execution> + + <execution> + <id>copy-interpreter-dependencies</id> + <phase>none</phase> + <configuration> + <skip>true</skip> + </configuration> + </execution> + <execution> + <id>copy-artifact</id> + <phase>none</phase> + <configuration> + <skip>true</skip> + </configuration> + </execution> + + + <execution> + <id>copy-spark-interpreter</id> + <phase>package</phase> + <goals> + <goal>copy</goal> + </goals> + <configuration> + <outputDirectory>${project.build.directory}/../../../interpreter/spark</outputDirectory> + <overWriteReleases>false</overWriteReleases> + <overWriteSnapshots>false</overWriteSnapshots> + <overWriteIfNewer>true</overWriteIfNewer> + <artifactItems> + <artifactItem> + <groupId>${project.groupId}</groupId> + <artifactId>${project.artifactId}</artifactId> + <version>${project.version}</version> + <type>${project.packaging}</type> + </artifactItem> + </artifactItems> + </configuration> + </execution> + + </executions> + </plugin> + + <plugin> + <artifactId>maven-resources-plugin</artifactId> + <executions> + <execution> + <id>copy-interpreter-setting</id> + <phase>package</phase> + <goals> + <goal>resources</goal> + </goals> + <configuration> + <outputDirectory>${project.build.directory}/../../../interpreter/${interpreter.name}</outputDirectory> + </configuration> + </execution> + </executions> + </plugin> + + </plugins> + </build> + +</project>