This is an automated email from the ASF dual-hosted git repository. zjffdu pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/zeppelin.git
The following commit(s) were added to refs/heads/master by this push: new 9aaea3e [ZEPPELIN-5438] Add Build profile for spark-3.1 9aaea3e is described below commit 9aaea3ee01ab6f05e6a3202ab433c5d477edef5f Author: Jeff Zhang <zjf...@apache.org> AuthorDate: Fri Jul 2 13:46:55 2021 +0800 [ZEPPELIN-5438] Add Build profile for spark-3.1 ### What is this PR for? * Add maven build profile for spark 3.1. * Use spark 3.1 and scala-2.12 as the default build profile * Some code refactoring on spark test code. ### What type of PR is it? [Improvement ] ### Todos * [ ] - Task ### What is the Jira issue? * https://issues.apache.org/jira/browse/ZEPPELIN-5438 ### How should this be tested? * CI pass ### Screenshots (if appropriate) ### Questions: * Does the licenses files need update? No * Is there breaking changes for older versions? No * Does this needs documentation? No Author: Jeff Zhang <zjf...@apache.org> Closes #4182 from zjffdu/ZEPPELIN-5438 and squashes the following commits: be71d5fdc3 [Jeff Zhang] [ZEPPELIN-5438] Add build profile for spark-3.1 --- .github/workflows/core.yml | 17 +++++----- spark/interpreter/pom.xml | 4 --- .../zeppelin/spark/SparkInterpreterTest.java | 7 ++-- .../zeppelin/spark/SparkSqlInterpreterTest.java | 25 +++++++++------ spark/pom.xml | 37 +++++++++++++++------- .../zeppelin/integration/SparkIntegrationTest.java | 2 +- .../integration/SparkIntegrationTest24.java | 11 +++++++ .../integration/SparkIntegrationTest30.java | 4 +-- .../integration/SparkIntegrationTest31.java | 4 +-- 9 files changed, 70 insertions(+), 41 deletions(-) diff --git a/.github/workflows/core.yml b/.github/workflows/core.yml index fdf28ff..81e0822 100644 --- a/.github/workflows/core.yml +++ b/.github/workflows/core.yml @@ -216,7 +216,8 @@ jobs: mvn install -DskipTests -DskipRat -pl zeppelin-interpreter-integration,zeppelin-web,spark-submit,spark/spark-dependencies,markdown -am -Phadoop2 -Pintegration -B mvn clean package -pl zeppelin-plugins -amd -DskipTests -B - name: run tests - run: mvn test -DskipRat -pl zeppelin-interpreter-integration,zeppelin-web,spark-submit,spark/spark-dependencies,markdown -am -Phadoop2 -Pintegration -B -Dtest=ZeppelinSparkClusterTest24,SparkSubmitIntegrationTest,SparkIntegrationTest24,ZeppelinSparkClusterTest23,SparkIntegrationTest23,ZeppelinSparkClusterTest22,SparkIntegrationTest22,ZeppelinSparkClusterTest30,SparkIntegrationTest30 -DfailIfNoTests=false + run: mvn test -DskipRat -pl zeppelin-interpreter-integration,zeppelin-web,spark-submit,spark/spark-dependencies,markdown -am -Phadoop2 -Pintegration -B -Dtest=ZeppelinSparkClusterTest24,SparkSubmitIntegrationTest,SparkIntegrationTest24,ZeppelinSparkClusterTest23,SparkIntegrationTest23,ZeppelinSparkClusterTest22,SparkIntegrationTest22,ZeppelinSparkClusterTest30,ZeppelinSparkClusterTest31,SparkIntegrationTest30,SparkIntegrationTest31 -DfailIfNoTests=false + jdbcIntegrationTest-and-unit-test-of-Spark-2-4-with-Scala-2-11: runs-on: ubuntu-20.04 steps: @@ -258,7 +259,7 @@ jobs: - name: run tests run: mvn test -DskipRat -pl zeppelin-interpreter-integration,jdbc,zeppelin-web,spark-submit,spark/spark-dependencies,markdown -am -Pspark-2.4 -Pspark-scala-2.11 -Phadoop2 -Pintegration -B -Dtest=JdbcIntegrationTest,org.apache.zeppelin.spark.*,org.apache.zeppelin.kotlin.* -DfailIfNoTests=false - spark-2-4-and-scale-2-12: + spark-2-4-and-scala-2-12: runs-on: ubuntu-20.04 steps: - name: Checkout @@ -295,7 +296,7 @@ jobs: - name: run tests run: mvn test -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-2.4 -Pspark-scala-2.12 -Phadoop2 -B -Dtest=org.apache.zeppelin.spark.*,org.apache.zeppelin.kotlin.* -DfailIfNoTests=false - spark-2-3-and-scale-2-11-and-other-interpreter: + spark-3-0-and-scala-2-12-and-other-interpreter: runs-on: ubuntu-20.04 steps: - name: Checkout @@ -328,11 +329,11 @@ jobs: R -e "IRkernel::installspec()" - name: install environment run: | - mvn install -DskipTests -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-2.3 -Pspark-scala-2.11 -Phadoop2 -B + mvn install -DskipTests -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-3.0 -Pspark-scala-2.12 -Phadoop2 -B - name: run tests - run: mvn test -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-2.3 -Pspark-scala-2.11 -Phadoop2 -B -Dtest=org.apache.zeppelin.spark.*,apache.zeppelin.python.*,apache.zeppelin.jupyter.*,apache.zeppelin.r.* -DfailIfNoTests=false + run: mvn test -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-3.0 -Pspark-scala-2.12 -Phadoop2 -B -Dtest=org.apache.zeppelin.spark.*,apache.zeppelin.python.*,apache.zeppelin.jupyter.*,apache.zeppelin.r.* -DfailIfNoTests=false - spark-2-2-and-scale-2-10-and-other-interpreter: + spark-3-1-and-scala-2-12-and-other-interpreter: runs-on: ubuntu-20.04 steps: - name: Checkout @@ -364,9 +365,9 @@ jobs: run: | R -e "IRkernel::installspec()" - name: install environment - run: mvn install -DskipTests -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-2.2 -Pspark-scala-2.10 -Phadoop2 -B + run: mvn install -DskipTests -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-3.1 -Pspark-scala-2.12 -Phadoop2 -B - name: run tests - run: mvn test -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-2.2 -Pspark-scala-2.10 -Phadoop2 -B -Dtest=org.apache.zeppelin.spark.*,apache.zeppelin.python.*,apache.zeppelin.jupyter.*,apache.zeppelin.r.* -DfailIfNoTests=false + run: mvn test -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-3.1 -Pspark-scala-2.12 -Phadoop2 -B -Dtest=org.apache.zeppelin.spark.*,apache.zeppelin.python.*,apache.zeppelin.jupyter.*,apache.zeppelin.r.* -DfailIfNoTests=false test-livy-0-5-with-spark-2-2-0-under-python3: runs-on: ubuntu-20.04 steps: diff --git a/spark/interpreter/pom.xml b/spark/interpreter/pom.xml index bef1699..81a0cdc 100644 --- a/spark/interpreter/pom.xml +++ b/spark/interpreter/pom.xml @@ -40,10 +40,6 @@ <maven.aeither.provider.version>3.0.3</maven.aeither.provider.version> <wagon.version>2.7</wagon.version> - <datanucleus.rdbms.version>3.2.9</datanucleus.rdbms.version> - <datanucleus.apijdo.version>3.2.6</datanucleus.apijdo.version> - <datanucleus.core.version>3.2.10</datanucleus.core.version> - <scala.compile.version>${spark.scala.version}</scala.compile.version> <!-- settings --> <pyspark.test.exclude>**/PySparkInterpreterMatplotlibTest.java</pyspark.test.exclude> diff --git a/spark/interpreter/src/test/java/org/apache/zeppelin/spark/SparkInterpreterTest.java b/spark/interpreter/src/test/java/org/apache/zeppelin/spark/SparkInterpreterTest.java index c613fcf..c750ea9 100644 --- a/spark/interpreter/src/test/java/org/apache/zeppelin/spark/SparkInterpreterTest.java +++ b/spark/interpreter/src/test/java/org/apache/zeppelin/spark/SparkInterpreterTest.java @@ -225,7 +225,7 @@ public class SparkInterpreterTest { "| 1| a|\n" + "| 2|null|\n" + "+---+----+")); - } else if (version.contains("String = 2.")) { + } else { // create dataset from case class context = getInterpreterContext(); result = interpreter.interpret("case class Person(id:Int, name:String, age:Int, country:String)\n" + @@ -252,8 +252,9 @@ public class SparkInterpreterTest { } // ZeppelinContext - result = interpreter.interpret("z.show(df)", getInterpreterContext()); - assertEquals(InterpreterResult.Code.SUCCESS, result.code()); + context = getInterpreterContext(); + result = interpreter.interpret("z.show(df)", context); + assertEquals(context.out.toString(), InterpreterResult.Code.SUCCESS, result.code()); assertEquals(InterpreterResult.Type.TABLE, messageOutput.getType()); messageOutput.flush(); assertEquals("_1\t_2\n1\ta\n2\tnull\n", messageOutput.toInterpreterResultMessage().getData()); diff --git a/spark/interpreter/src/test/java/org/apache/zeppelin/spark/SparkSqlInterpreterTest.java b/spark/interpreter/src/test/java/org/apache/zeppelin/spark/SparkSqlInterpreterTest.java index 8c01130..1ce7329 100644 --- a/spark/interpreter/src/test/java/org/apache/zeppelin/spark/SparkSqlInterpreterTest.java +++ b/spark/interpreter/src/test/java/org/apache/zeppelin/spark/SparkSqlInterpreterTest.java @@ -67,21 +67,25 @@ public class SparkSqlInterpreterTest { intpGroup.get("session_1").add(sparkInterpreter); intpGroup.get("session_1").add(sqlInterpreter); - context = InterpreterContext.builder() - .setNoteId("noteId") - .setParagraphId("paragraphId") - .setParagraphTitle("title") - .setAngularObjectRegistry(new AngularObjectRegistry(intpGroup.getId(), null)) - .setResourcePool(new LocalResourcePool("id")) - .setInterpreterOut(new InterpreterOutput()) - .setIntpEventClient(mock(RemoteInterpreterEventClient.class)) - .build(); + context = getInterpreterContext(); InterpreterContext.set(context); sparkInterpreter.open(); sqlInterpreter.open(); } + private static InterpreterContext getInterpreterContext() { + return InterpreterContext.builder() + .setNoteId("noteId") + .setParagraphId("paragraphId") + .setParagraphTitle("title") + .setAngularObjectRegistry(new AngularObjectRegistry(intpGroup.getId(), null)) + .setResourcePool(new LocalResourcePool("id")) + .setInterpreterOut(new InterpreterOutput()) + .setIntpEventClient(mock(RemoteInterpreterEventClient.class)) + .build(); + } + @AfterClass public static void tearDown() throws InterpreterException { sqlInterpreter.close(); @@ -287,8 +291,9 @@ public class SparkSqlInterpreterTest { @Test public void testDDL() throws InterpreterException, IOException { + InterpreterContext context = getInterpreterContext(); InterpreterResult ret = sqlInterpreter.interpret("create table t1(id int, name string)", context); - assertEquals(InterpreterResult.Code.SUCCESS, ret.code()); + assertEquals(context.out.toString(), InterpreterResult.Code.SUCCESS, ret.code()); // spark 1.x will still return DataFrame with non-empty columns. // org.apache.spark.sql.DataFrame = [result: string] if (!sparkInterpreter.getSparkContext().version().startsWith("1.")) { diff --git a/spark/pom.xml b/spark/pom.xml index f123cad..75a4346 100644 --- a/spark/pom.xml +++ b/spark/pom.xml @@ -39,11 +39,11 @@ <datanucleus.core.version>3.2.10</datanucleus.core.version> <!-- spark versions --> - <spark.version>2.4.5</spark.version> + <spark.version>3.1.2</spark.version> <protobuf.version>2.5.0</protobuf.version> - <py4j.version>0.10.7</py4j.version> - <spark.scala.version>2.11.12</spark.scala.version> - <spark.scala.binary.version>2.11</spark.scala.binary.version> + <py4j.version>0.10.9</py4j.version> + <spark.scala.version>2.12.7</spark.scala.version> + <spark.scala.binary.version>2.12</spark.scala.binary.version> <spark.archive>spark-${spark.version}</spark.archive> <spark.src.download.url> @@ -141,6 +141,9 @@ <profile> <id>spark-scala-2.12</id> + <activation> + <activeByDefault>true</activeByDefault> + </activation> <properties> <spark.scala.version>2.12.7</spark.scala.version> <spark.scala.binary.version>2.12</spark.scala.binary.version> @@ -149,9 +152,6 @@ <profile> <id>spark-scala-2.11</id> - <activation> - <activeByDefault>true</activeByDefault> - </activation> <properties> <spark.scala.version>2.11.12</spark.scala.version> <spark.scala.binary.version>2.11</spark.scala.binary.version> @@ -169,9 +169,27 @@ <!-- profile spark-x only affect the embedded spark version in zeppelin distribution --> <profile> + <id>spark-3.1</id> + <activation> + <activeByDefault>true</activeByDefault> + </activation> + <properties> + <datanucleus.core.version>4.1.17</datanucleus.core.version> + <datanucleus.rdbms.version>4.1.19</datanucleus.rdbms.version> + <datanucleus.apijdo.version>4.2.4</datanucleus.apijdo.version> + <spark.version>3.1.2</spark.version> + <protobuf.version>2.5.0</protobuf.version> + <py4j.version>0.10.9</py4j.version> + </properties> + </profile> + + <profile> <id>spark-3.0</id> <properties> - <spark.version>3.1.1</spark.version> + <datanucleus.core.version>4.1.17</datanucleus.core.version> + <datanucleus.rdbms.version>4.1.19</datanucleus.rdbms.version> + <datanucleus.apijdo.version>4.2.4</datanucleus.apijdo.version> + <spark.version>3.0.3</spark.version> <protobuf.version>2.5.0</protobuf.version> <py4j.version>0.10.9</py4j.version> </properties> @@ -179,9 +197,6 @@ <profile> <id>spark-2.4</id> - <activation> - <activeByDefault>true</activeByDefault> - </activation> <properties> <spark.version>2.4.5</spark.version> <protobuf.version>2.5.0</protobuf.version> diff --git a/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest.java b/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest.java index 40496c7..1dec6ee 100644 --- a/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest.java +++ b/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest.java @@ -58,7 +58,7 @@ public abstract class SparkIntegrationTest { private static MiniHadoopCluster hadoopCluster; private static MiniZeppelin zeppelin; private static InterpreterFactory interpreterFactory; - private static InterpreterSettingManager interpreterSettingManager; + protected static InterpreterSettingManager interpreterSettingManager; private String sparkVersion; private String sparkHome; diff --git a/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest24.java b/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest24.java index 48d3185..9a2391d 100644 --- a/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest24.java +++ b/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest24.java @@ -17,9 +17,14 @@ package org.apache.zeppelin.integration; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.zeppelin.interpreter.InterpreterException; +import org.apache.zeppelin.interpreter.InterpreterSetting; +import org.codehaus.plexus.util.xml.pull.XmlPullParserException; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; +import java.io.IOException; import java.util.Arrays; import java.util.List; @@ -37,4 +42,10 @@ public class SparkIntegrationTest24 extends SparkIntegrationTest{ }); } + @Override + public void testYarnClusterMode() throws IOException, YarnException, InterruptedException, InterpreterException, XmlPullParserException { + InterpreterSetting sparkInterpreterSetting = interpreterSettingManager.getInterpreterSettingByName("spark"); + sparkInterpreterSetting.setProperty("spark.sql.execution.arrow.sparkr.enabled", "false"); + super.testYarnClusterMode(); + } } diff --git a/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest30.java b/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest30.java index 76cd0c9..9dedf93 100644 --- a/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest30.java +++ b/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest30.java @@ -34,8 +34,8 @@ public class SparkIntegrationTest30 extends SparkIntegrationTest { @Parameterized.Parameters public static List<Object[]> data() { return Arrays.asList(new Object[][]{ - {"3.0.2", "2.7"}, - {"3.0.2", "3.2"} + {"3.0.3", "2.7"}, + {"3.0.3", "3.2"} }); } diff --git a/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest31.java b/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest31.java index 26362ba..bfaa1ea 100644 --- a/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest31.java +++ b/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest31.java @@ -34,8 +34,8 @@ public class SparkIntegrationTest31 extends SparkIntegrationTest { @Parameterized.Parameters public static List<Object[]> data() { return Arrays.asList(new Object[][]{ - {"3.1.1", "2.7"}, - {"3.1.1", "3.2"} + {"3.1.2", "2.7"}, + {"3.1.2", "3.2"} }); }