This is an automated email from the ASF dual-hosted git repository. jongyoul pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/zeppelin.git
The following commit(s) were added to refs/heads/master by this push: new 0a115e1b22 [ZEPPELIN-5779] Remove pig interpreter (#4424) 0a115e1b22 is described below commit 0a115e1b22668cef704d12494b85708fd681ae4b Author: PARK HYE WON <38900338+hyee...@users.noreply.github.com> AuthorDate: Tue Jul 26 15:30:11 2022 +0900 [ZEPPELIN-5779] Remove pig interpreter (#4424) * [ZEPPELIN-5779] Remove pig interpreter * Automatically updated after build --- .github/workflows/core.yml | 2 +- .github/workflows/frontend.yml | 2 +- Dockerfile | 2 +- bin/interpreter.sh | 21 -- conf/interpreter-list | 1 - dev/create_release.sh | 2 +- docs/_includes/themes/zeppelin/_navigation.html | 1 - .../themes/zeppelin/img/pig_zeppelin_tutorial.png | Bin 280450 -> 0 bytes docs/index.md | 1 - docs/interpreter/pig.md | 190 ------------ docs/usage/interpreter/installation.md | 5 - docs/usage/other_features/zeppelin_context.md | 2 +- .../Using Pig for querying data_2C57UKYWR.zpln | 334 --------------------- pig/pom.xml | 171 ----------- .../apache/zeppelin/pig/BasePigInterpreter.java | 124 -------- .../org/apache/zeppelin/pig/PigInterpreter.java | 155 ---------- .../apache/zeppelin/pig/PigQueryInterpreter.java | 162 ---------- .../org/apache/zeppelin/pig/PigScriptListener.java | 92 ------ .../java/org/apache/zeppelin/pig/PigUtils.java | 50 --- pig/src/main/resources/interpreter-setting.json | 59 ---- .../zeppelin/pig/PigInterpreterSparkTest.java | 149 --------- .../apache/zeppelin/pig/PigInterpreterTest.java | 154 ---------- .../apache/zeppelin/pig/PigInterpreterTezTest.java | 157 ---------- .../zeppelin/pig/PigQueryInterpreterTest.java | 162 ---------- pig/src/test/resources/core-site.xml | 3 - pig/src/test/resources/log4j.properties | 22 -- pom.xml | 1 - zeppelin-distribution/src/bin_license/LICENSE | 1 - zeppelin-web/karma.conf.js | 1 + 29 files changed, 6 insertions(+), 2020 deletions(-) diff --git a/.github/workflows/core.yml b/.github/workflows/core.yml index 3f0c88ec2d..4f7b609f89 100644 --- a/.github/workflows/core.yml +++ b/.github/workflows/core.yml @@ -82,7 +82,7 @@ jobs: interpreter-test-non-core: runs-on: ubuntu-20.04 env: - INTERPRETERS: 'beam,hbase,pig,jdbc,file,flink-cmd,ignite,cassandra,elasticsearch,bigquery,alluxio,livy,groovy,sap,java,geode,neo4j,submarine,sparql,mongodb,influxdb,ksql' + INTERPRETERS: 'beam,hbase,jdbc,file,flink-cmd,ignite,cassandra,elasticsearch,bigquery,alluxio,livy,groovy,sap,java,geode,neo4j,submarine,sparql,mongodb,influxdb,ksql' steps: - name: Checkout uses: actions/checkout@v2 diff --git a/.github/workflows/frontend.yml b/.github/workflows/frontend.yml index c1c637a03f..defee2faa5 100644 --- a/.github/workflows/frontend.yml +++ b/.github/workflows/frontend.yml @@ -19,7 +19,7 @@ env: SPARK_PRINT_LAUNCH_COMMAND: "true" SPARK_LOCAL_IP: 127.0.0.1 ZEPPELIN_LOCAL_IP: 127.0.0.1 - INTERPRETERS: '!beam,!hbase,!pig,!jdbc,!file,!flink,!ignite,!cassandra,!elasticsearch,!bigquery,!alluxio,!livy,!groovy,!sap,!java,!geode,!neo4j,!submarine,!sparql,!mongodb' + INTERPRETERS: '!beam,!hbase,!jdbc,!file,!flink,!ignite,!cassandra,!elasticsearch,!bigquery,!alluxio,!livy,!groovy,!sap,!java,!geode,!neo4j,!submarine,!sparql,!mongodb' jobs: run-e2e-tests-in-zeppelin-web: diff --git a/Dockerfile b/Dockerfile index 2aef45aa86..63c5ee80f2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -23,7 +23,7 @@ RUN echo "unsafe-perm=true" > ~/.npmrc && \ echo '{ "allow_root": true }' > ~/.bowerrc && \ ./mvnw -B package -DskipTests -Pbuild-distr -Pspark-3.2 -Pinclude-hadoop -Phadoop3 -Pspark-scala-2.12 -Pweb-angular && \ # Example with doesn't compile all interpreters - # ./mvnw -B package -DskipTests -Pbuild-distr -Pspark-3.2 -Pinclude-hadoop -Phadoop3 -Pspark-scala-2.12 -Pweb-angular -pl '!groovy,!submarine,!livy,!hbase,!pig,!file,!flink,!ignite' && \ + # ./mvnw -B package -DskipTests -Pbuild-distr -Pspark-3.2 -Pinclude-hadoop -Phadoop3 -Pspark-scala-2.12 -Pweb-angular -pl '!groovy,!submarine,!livy,!hbase,!file,!flink,!ignite' && \ mv /workspace/zeppelin/zeppelin-distribution/target/zeppelin-*/zeppelin-* /opt/zeppelin/ && \ # Removing stuff saves time, because docker creates a temporary layer rm -rf ~/.m2 && \ diff --git a/bin/interpreter.sh b/bin/interpreter.sh index 1c724412e4..92dc091e8f 100755 --- a/bin/interpreter.sh +++ b/bin/interpreter.sh @@ -205,28 +205,7 @@ elif [[ "${INTERPRETER_ID}" == "hbase" ]]; then else echo "HBASE_HOME and HBASE_CONF_DIR are not set, configuration might not be loaded" fi -elif [[ "${INTERPRETER_ID}" == "pig" ]]; then - # autodetect HADOOP_CONF_HOME by heuristic - if [[ -n "${HADOOP_HOME}" ]] && [[ -z "${HADOOP_CONF_DIR}" ]]; then - if [[ -d "${HADOOP_HOME}/etc/hadoop" ]]; then - export HADOOP_CONF_DIR="${HADOOP_HOME}/etc/hadoop" - elif [[ -d "/etc/hadoop/conf" ]]; then - export HADOOP_CONF_DIR="/etc/hadoop/conf" - fi - fi - - if [[ -n "${HADOOP_CONF_DIR}" ]] && [[ -d "${HADOOP_CONF_DIR}" ]]; then - ZEPPELIN_INTP_CLASSPATH+=":${HADOOP_CONF_DIR}" - fi - # autodetect TEZ_CONF_DIR - if [[ -n "${TEZ_CONF_DIR}" ]]; then - ZEPPELIN_INTP_CLASSPATH+=":${TEZ_CONF_DIR}" - elif [[ -d "/etc/tez/conf" ]]; then - ZEPPELIN_INTP_CLASSPATH+=":/etc/tez/conf" - else - echo "TEZ_CONF_DIR is not set, configuration might not be loaded" - fi elif [[ "${INTERPRETER_ID}" == "flink" ]]; then addEachJarInDirRecursiveForIntp "${FLINK_HOME}/lib" diff --git a/conf/interpreter-list b/conf/interpreter-list index dbdac74400..3be3396aee 100644 --- a/conf/interpreter-list +++ b/conf/interpreter-list @@ -35,7 +35,6 @@ kotlin org.apache.zeppelin:zeppelin-kotlin:0.10.0 Kotlin livy org.apache.zeppelin:zeppelin-livy:0.10.0 Livy interpreter md org.apache.zeppelin:zeppelin-markdown:0.10.0 Markdown support neo4j org.apache.zeppelin:zeppelin-neo4j:0.10.0 Neo4j interpreter -pig org.apache.zeppelin:zeppelin-pig:0.10.0 Pig interpreter python org.apache.zeppelin:zeppelin-python:0.10.0 Python interpreter sap org.apache.zeppelin:zeppelin-sap:0.10.0 SAP Support shell org.apache.zeppelin:zeppelin-shell:0.10.0 Shell command diff --git a/dev/create_release.sh b/dev/create_release.sh index d35ff95104..0fa3b50de7 100755 --- a/dev/create_release.sh +++ b/dev/create_release.sh @@ -98,7 +98,7 @@ function make_binary_release() { git_clone make_source_package -make_binary_release netinst "-Pweb-angular -Phadoop-2.6 -pl !beam,!hbase,!pig,!jdbc,!file,!flink,!ignite,!cassandra,!elasticsearch,!bigquery,!alluxio,!livy,!groovy,!sap,!java,!geode,!neo4j,!submarine,!sparql,!mongodb,!ksql -am" +make_binary_release netinst "-Pweb-angular -Phadoop-2.6 -pl !beam,!hbase,!jdbc,!file,!flink,!ignite,!cassandra,!elasticsearch,!bigquery,!alluxio,!livy,!groovy,!sap,!java,!geode,!neo4j,!submarine,!sparql,!mongodb,!ksql -am" make_binary_release all "-Pweb-angular -Phadoop-2.6" # remove non release files and dirs diff --git a/docs/_includes/themes/zeppelin/_navigation.html b/docs/_includes/themes/zeppelin/_navigation.html index 1c93e0d764..1cc4ba02ad 100644 --- a/docs/_includes/themes/zeppelin/_navigation.html +++ b/docs/_includes/themes/zeppelin/_navigation.html @@ -159,7 +159,6 @@ <li><a href="{{BASE_PATH}}/interpreter/markdown.html">Markdown</a></li> <li><a href="{{BASE_PATH}}/interpreter/mongodb.html">MongoDB</a></li> <li><a href="{{BASE_PATH}}/interpreter/neo4j.html">Neo4j</a></li> - <li><a href="{{BASE_PATH}}/interpreter/pig.html">Pig</a></li> <li><a href="{{BASE_PATH}}/interpreter/postgresql.html">Postgresql, HAWQ</a></li> <li><a href="{{BASE_PATH}}/interpreter/sap.html">SAP</a></li> <li><a href="{{BASE_PATH}}/interpreter/shell.html">Shell</a></li> diff --git a/docs/assets/themes/zeppelin/img/pig_zeppelin_tutorial.png b/docs/assets/themes/zeppelin/img/pig_zeppelin_tutorial.png deleted file mode 100644 index b90b982e1d..0000000000 Binary files a/docs/assets/themes/zeppelin/img/pig_zeppelin_tutorial.png and /dev/null differ diff --git a/docs/index.md b/docs/index.md index e926aa7f48..e5a068530f 100644 --- a/docs/index.md +++ b/docs/index.md @@ -157,7 +157,6 @@ limitations under the License. * [Markdown](./interpreter/markdown.html) * [MongoDB](./interpreter/mongodb.html) * [Neo4j](./interpreter/neo4j.html) - * [Pig](./interpreter/pig.html) * [Postgresql, HAWQ](./interpreter/postgresql.html) * [Python](./interpreter/python.html) * [R](./interpreter/r.html) diff --git a/docs/interpreter/pig.md b/docs/interpreter/pig.md deleted file mode 100644 index e640b34852..0000000000 --- a/docs/interpreter/pig.md +++ /dev/null @@ -1,190 +0,0 @@ ---- -layout: page -title: "Pig Interpreter for Apache Zeppelin" -description: "Apache Pig is a platform for analyzing large data sets that consists of a high-level language for expressing data analysis programs, coupled with infrastructure for evaluating these programs." -group: manual ---- -<!-- -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. ---> -{% include JB/setup %} - - -# Pig Interpreter for Apache Zeppelin - -<div id="toc"></div> - -## Overview -[Apache Pig](https://pig.apache.org/) is a platform for analyzing large data sets that consists of -a high-level language for expressing data analysis programs, -coupled with infrastructure for evaluating these programs. -The salient property of Pig programs is that their structure is amenable to substantial parallelization, -which in turns enables them to handle very large data sets. - -## Supported interpreter type - - `%pig.script` (default Pig interpreter, so you can use `%pig`) - - `%pig.script` is like the Pig grunt shell. Anything you can run in Pig grunt shell can be run in `%pig.script` interpreter, it is used for running Pig script where you don’t need to visualize the data, it is suitable for data munging. - - - `%pig.query` - - `%pig.query` is a little different compared with `%pig.script`. It is used for exploratory data analysis via Pig latin where you can leverage Zeppelin’s visualization ability. There're 2 minor differences in the last statement between `%pig.script` and `%pig.query` - - No pig alias in the last statement in `%pig.query` (read the examples below). - - The last statement must be in single line in `%pig.query` - - -## How to use - -### How to setup Pig execution modes. - -- Local Mode - - Set `zeppelin.pig.execType` as `local`. - -- MapReduce Mode - - Set `zeppelin.pig.execType` as `mapreduce`. HADOOP\_CONF\_DIR needs to be specified in `ZEPPELIN_HOME/conf/zeppelin-env.sh`. - -- Tez Local Mode - - Only Tez 0.7 is supported. Set `zeppelin.pig.execType` as `tez_local`. - -- Tez Mode - - Only Tez 0.7 is supported. Set `zeppelin.pig.execType` as `tez`. HADOOP\_CONF\_DIR and TEZ\_CONF\_DIR needs to be specified in `ZEPPELIN_HOME/conf/zeppelin-env.sh`. - -- Spark Local Mode - - Only Spark 1.6.x is supported, by default it is Spark 1.6.3. Set `zeppelin.pig.execType` as `spark_local`. - -- Spark Mode - - Only Spark 1.6.x is supported, by default it is Spark 1.6.3. Set `zeppelin.pig.execType` as `spark`. For now, only yarn-client mode is supported. To enable it, you need to set property `SPARK_MASTER` to yarn-client and set `SPARK_JAR` to the spark assembly jar. - -### How to choose custom Spark Version - -By default, Pig Interpreter would use Spark 1.6.3 built with scala 2.10, if you want to use another spark version or scala version, -you need to rebuild Zeppelin by specifying the custom Spark version via -Dpig.spark.version=<custom_spark_version> and scala version via -Dpig.scala.version=<scala_version> in the maven build command. - -### How to configure interpreter - -At the Interpreters menu, you have to create a new Pig interpreter. Pig interpreter has below properties by default. -And you can set any Pig properties here which will be passed to Pig engine. (like tez.queue.name & mapred.job.queue.name). -Besides, we use paragraph title as job name if it exists, else use the last line of Pig script. -So you can use that to find app running in YARN RM UI. - -<table class="table-configuration"> - <tr> - <th>Property</th> - <th>Default</th> - <th>Description</th> - </tr> - <tr> - <td>zeppelin.pig.execType</td> - <td>mapreduce</td> - <td>Execution mode for pig runtime. local | mapreduce | tez_local | tez | spark_local | spark </td> - </tr> - <tr> - <td>zeppelin.pig.includeJobStats</td> - <td>false</td> - <td>whether display jobStats info in <code>%pig.script</code></td> - </tr> - <tr> - <td>zeppelin.pig.maxResult</td> - <td>1000</td> - <td>max row number displayed in <code>%pig.query</code></td> - </tr> - <tr> - <td>tez.queue.name</td> - <td>default</td> - <td>queue name for tez engine</td> - </tr> - <tr> - <td>mapred.job.queue.name</td> - <td>default</td> - <td>queue name for mapreduce engine</td> - </tr> - <tr> - <td>SPARK_MASTER</td> - <td>local</td> - <td>local | yarn-client</td> - </tr> - <tr> - <td>SPARK_JAR</td> - <td></td> - <td>The spark assembly jar, both jar in local or hdfs is supported. Put it on hdfs could have - performance benefit</td> - </tr> -</table> - -### Example - -##### pig - -``` -%pig - -bankText = load 'bank.csv' using PigStorage(';'); -bank = foreach bankText generate $0 as age, $1 as job, $2 as marital, $3 as education, $5 as balance; -bank = filter bank by age != '"age"'; -bank = foreach bank generate (int)age, REPLACE(job,'"','') as job, REPLACE(marital, '"', '') as marital, (int)(REPLACE(balance, '"', '')) as balance; -store bank into 'clean_bank.csv' using PigStorage(';'); -- this statement is optional, it just show you that most of time %pig.script is used for data munging before querying the data. -``` - -##### pig.query - -Get the number of each age where age is less than 30 - -``` -%pig.query - -bank_data = filter bank by age < 30; -b = group bank_data by age; -foreach b generate group, COUNT($1); -``` - -The same as above, but use dynamic text form so that use can specify the variable maxAge in textbox. -(See screenshot below). Dynamic form is a very cool feature of Zeppelin, you can refer this [link]((../usage/dynamic_form/intro.html)) for details. - -``` -%pig.query - -bank_data = filter bank by age < ${maxAge=40}; -b = group bank_data by age; -foreach b generate group, COUNT($1) as count; -``` - -Get the number of each age for specific marital type, -also use dynamic form here. User can choose the marital type in the dropdown list (see screenshot below). - -``` -%pig.query - -bank_data = filter bank by marital=='${marital=single,single|divorced|married}'; -b = group bank_data by age; -foreach b generate group, COUNT($1) as count; -``` - -The above examples are in the Pig tutorial note in Zeppelin, you can check that for details. Here's the screenshot. - -<img class="img-responsive" width="1024px" style="margin:0 auto; padding: 26px;" src="{{BASE_PATH}}/assets/themes/zeppelin/img/pig_zeppelin_tutorial.png" /> - - -Data is shared between `%pig` and `%pig.query`, so that you can do some common work in `%pig`, -and do different kinds of query based on the data of `%pig`. -Besides, we recommend you to specify alias explicitly so that the visualization can display -the column name correctly. In the above example 2 and 3 of `%pig.query`, we name `COUNT($1)` as `count`. -If you don't do this, then we will name it using position. -E.g. in the above first example of `%pig.query`, we will use `col_1` in chart to represent `COUNT($1)`. - - diff --git a/docs/usage/interpreter/installation.md b/docs/usage/interpreter/installation.md index a9705fee22..36b192f813 100644 --- a/docs/usage/interpreter/installation.md +++ b/docs/usage/interpreter/installation.md @@ -197,11 +197,6 @@ You can also find the below community managed interpreter list in `conf/interpre <td>org.apache.zeppelin:zeppelin-neo4j:0.10.0</td> <td>Neo4j interpreter</td> </tr> - <tr> - <td>pig</td> - <td>org.apache.zeppelin:zeppelin-pig:0.10.0</td> - <td>Pig interpreter</td> - </tr> <tr> <td>python</td> <td>org.apache.zeppelin:zeppelin-python:0.10.0</td> diff --git a/docs/usage/other_features/zeppelin_context.md b/docs/usage/other_features/zeppelin_context.md index 784e1f34b8..229d0a9b9c 100644 --- a/docs/usage/other_features/zeppelin_context.md +++ b/docs/usage/other_features/zeppelin_context.md @@ -235,7 +235,7 @@ dynamic-forms and object-interpolation as described below. | Interpreters that use Embedded Commands | |-------------------------------------------------------------------| -|spark.sql (\*), bigquery, cassandra, elasticsearch, file, hbase, ignite, jdbc (\*), livy, markdown, neo4j, pig, python, shell (\*), zengine | +|spark.sql (\*), bigquery, cassandra, elasticsearch, file, hbase, ignite, jdbc (\*), livy, markdown, neo4j, python, shell (\*), zengine | Dynamic forms are available in all of the interpreters in the table above, but object interpolation is only available in a small, but growing, list of interpreters diff --git a/notebook/Miscellaneous Tutorial/Using Pig for querying data_2C57UKYWR.zpln b/notebook/Miscellaneous Tutorial/Using Pig for querying data_2C57UKYWR.zpln deleted file mode 100644 index 04cb00852a..0000000000 --- a/notebook/Miscellaneous Tutorial/Using Pig for querying data_2C57UKYWR.zpln +++ /dev/null @@ -1,334 +0,0 @@ -{ - "paragraphs": [ - { - "text": "%md\n\n\n### [Apache Pig](http://pig.apache.org/) is a platform for analyzing large data sets that consists of a high-level language for expressing data analysis programs, coupled with infrastructure for evaluating these programs. The salient property of Pig programs is that their structure is amenable to substantial parallelization, which in turns enables them to handle very large data sets.\n\nPig\u0027s language layer currently consists of a textual language called Pig [...] - "user": "anonymous", - "dateUpdated": "Jan 22, 2017 12:48:50 PM", - "config": { - "colWidth": 12.0, - "enabled": true, - "results": {}, - "editorSetting": { - "language": "markdown", - "editOnDblClick": true - }, - "editorMode": "ace/mode/markdown", - "editorHide": true, - "tableHide": false - }, - "settings": { - "params": {}, - "forms": {} - }, - "results": { - "code": "SUCCESS", - "msg": [ - { - "type": "HTML", - "data": "\u003cdiv class\u003d\"markdown-body\"\u003e\n\u003ch3\u003e\u003ca href\u003d\"http://pig.apache.org/\"\u003eApache Pig\u003c/a\u003e is a platform for analyzing large data sets that consists of a high-level language for expressing data analysis programs, coupled with infrastructure for evaluating these programs. The salient property of Pig programs is that their structure is amenable to substantial parallelization, which in turns enables them to handle very large d [...] - } - ] - }, - "apps": [], - "jobName": "paragraph_1483277502513_1156234051", - "id": "20170101-213142_1565013608", - "dateCreated": "Jan 1, 2017 9:31:42 PM", - "dateStarted": "Jan 22, 2017 12:48:50 PM", - "dateFinished": "Jan 22, 2017 12:48:51 PM", - "status": "FINISHED", - "progressUpdateIntervalMs": 500 - }, - { - "text": "%md\n\nThis pig tutorial use pig to do the same thing as spark tutorial. The default mode is mapreduce, you can also use other modes like local/tez_local/tez. For mapreduce mode, you need to have hadoop installed and export `HADOOP_CONF_DIR` in `zeppelin-env.sh`\n\nThe tutorial consists of 3 steps.\n\n* Use shell interpreter to download bank.csv and upload it to hdfs\n* use `%pig` to process the data\n* use `%pig.query` to query the data", - "user": "anonymous", - "dateUpdated": "Jan 22, 2017 12:48:55 PM", - "config": { - "colWidth": 12.0, - "enabled": true, - "results": {}, - "editorSetting": { - "language": "markdown", - "editOnDblClick": true - }, - "editorMode": "ace/mode/markdown", - "editorHide": true, - "tableHide": false - }, - "settings": { - "params": {}, - "forms": {} - }, - "results": { - "code": "SUCCESS", - "msg": [ - { - "type": "HTML", - "data": "\u003cdiv class\u003d\"markdown-body\"\u003e\n\u003cp\u003eThis pig tutorial use pig to do the same thing as spark tutorial. The default mode is mapreduce, you can also use other modes like local/tez_local/tez. For mapreduce mode, you need to have hadoop installed and export \u003ccode\u003eHADOOP_CONF_DIR\u003c/code\u003e in \u003ccode\u003ezeppelin-env.sh\u003c/code\u003e\u003c/p\u003e\n\u003cp\u003eThe tutorial consists of 3 steps.\u003c/p\u003e\n\u003cul\u003e\n [...] - } - ] - }, - "apps": [], - "jobName": "paragraph_1483689316217_-629483391", - "id": "20170106-155516_1050601059", - "dateCreated": "Jan 6, 2017 3:55:16 PM", - "dateStarted": "Jan 22, 2017 12:48:55 PM", - "dateFinished": "Jan 22, 2017 12:48:55 PM", - "status": "FINISHED", - "progressUpdateIntervalMs": 500 - }, - { - "text": "%sh\n\nwget https://s3.amazonaws.com/apache-zeppelin/tutorial/bank/bank.csv\nhadoop fs -put bank.csv .\n", - "user": "anonymous", - "dateUpdated": "Jan 22, 2017 12:51:48 PM", - "config": { - "colWidth": 12.0, - "enabled": true, - "results": {}, - "editorSetting": { - "language": "text", - "editOnDblClick": false - }, - "editorMode": "ace/mode/text" - }, - "settings": { - "params": {}, - "forms": {} - }, - "results": { - "code": "SUCCESS", - "msg": [ - { - "type": "TEXT", - "data": "--2017-01-22 12:51:48-- https://s3.amazonaws.com/apache-zeppelin/tutorial/bank/bank.csv\nResolving s3.amazonaws.com... 52.216.80.227\nConnecting to s3.amazonaws.com|52.216.80.227|:443... connected.\nHTTP request sent, awaiting response... 200 OK\nLength: 461474 (451K) [application/octet-stream]\nSaving to: \u0027bank.csv.3\u0027\n\n 0K .......... .......... .......... .......... .......... 11% 141K 3s\n 50K .......... .......... .......... .......... ....... [...] - } - ] - }, - "apps": [], - "jobName": "paragraph_1485058437578_-1906301827", - "id": "20170122-121357_640055590", - "dateCreated": "Jan 22, 2017 12:13:57 PM", - "dateStarted": "Jan 22, 2017 12:51:48 PM", - "dateFinished": "Jan 22, 2017 12:51:52 PM", - "status": "FINISHED", - "progressUpdateIntervalMs": 500 - }, - { - "text": "%pig\n\nbankText \u003d load \u0027bank.csv\u0027 using PigStorage(\u0027;\u0027);\nbank \u003d foreach bankText generate $0 as age, $1 as job, $2 as marital, $3 as education, $5 as balance; \nbank \u003d filter bank by age !\u003d \u0027\"age\"\u0027;\nbank \u003d foreach bank generate (int)age, REPLACE(job,\u0027\"\u0027,\u0027\u0027) as job, REPLACE(marital, \u0027\"\u0027, \u0027\u0027) as marital, (int)(REPLACE(balance, \u0027\"\u0027, \u0027\u0027)) as balance;\n\n-- [...] - "user": "anonymous", - "dateUpdated": "Feb 24, 2017 5:08:08 PM", - "config": { - "colWidth": 12.0, - "editorMode": "ace/mode/pig", - "results": {}, - "enabled": true, - "editorSetting": { - "language": "pig", - "editOnDblClick": false - } - }, - "settings": { - "params": {}, - "forms": {} - }, - "results": { - "code": "SUCCESS", - "msg": [] - }, - "apps": [], - "jobName": "paragraph_1483277250237_-466604517", - "id": "20161228-140640_1560978333", - "dateCreated": "Jan 1, 2017 9:27:30 PM", - "dateStarted": "Feb 24, 2017 5:08:08 PM", - "dateFinished": "Feb 24, 2017 5:08:11 PM", - "status": "FINISHED", - "progressUpdateIntervalMs": 500 - }, - { - "text": "%pig.query\n\nbank_data \u003d filter bank by age \u003c 30;\nb \u003d group bank_data by age;\nforeach b generate group, COUNT($1);\n\n", - "user": "anonymous", - "dateUpdated": "Feb 24, 2017 5:08:13 PM", - "config": { - "colWidth": 4.0, - "editorMode": "ace/mode/pig", - "results": { - "0": { - "graph": { - "mode": "multiBarChart", - "height": 300.0, - "optionOpen": false - }, - "helium": {} - } - }, - "enabled": true, - "editorSetting": { - "language": "pig", - "editOnDblClick": false - } - }, - "settings": { - "params": {}, - "forms": {} - }, - "results": { - "code": "SUCCESS", - "msg": [ - { - "type": "TABLE", - "data": "group\tcol_1\n19\t4\n20\t3\n21\t7\n22\t9\n23\t20\n24\t24\n25\t44\n26\t77\n27\t94\n28\t103\n29\t97\n" - } - ] - }, - "apps": [], - "jobName": "paragraph_1483277250238_-465450270", - "id": "20161228-140730_1903342877", - "dateCreated": "Jan 1, 2017 9:27:30 PM", - "dateStarted": "Feb 24, 2017 5:08:13 PM", - "dateFinished": "Feb 24, 2017 5:08:26 PM", - "status": "FINISHED", - "progressUpdateIntervalMs": 500 - }, - { - "text": "%pig.query\n\nbank_data \u003d filter bank by age \u003c ${maxAge\u003d40};\nb \u003d group bank_data by age;\nforeach b generate group, COUNT($1) as count;", - "user": "anonymous", - "dateUpdated": "Feb 24, 2017 5:08:14 PM", - "config": { - "colWidth": 4.0, - "editorMode": "ace/mode/pig", - "results": { - "0": { - "graph": { - "mode": "pieChart", - "height": 300.0, - "optionOpen": false - }, - "helium": {} - } - }, - "enabled": true, - "editorSetting": { - "language": "pig", - "editOnDblClick": false - } - }, - "settings": { - "params": { - "maxAge": "36" - }, - "forms": { - "maxAge": { - "name": "maxAge", - "defaultValue": "40", - "hidden": false - } - } - }, - "results": { - "code": "SUCCESS", - "msg": [ - { - "type": "TABLE", - "data": "group\tcount\n19\t4\n20\t3\n21\t7\n22\t9\n23\t20\n24\t24\n25\t44\n26\t77\n27\t94\n28\t103\n29\t97\n30\t150\n31\t199\n32\t224\n33\t186\n34\t231\n35\t180\n" - } - ] - }, - "apps": [], - "jobName": "paragraph_1483277250239_-465835019", - "id": "20161228-154918_1551591203", - "dateCreated": "Jan 1, 2017 9:27:30 PM", - "dateStarted": "Feb 24, 2017 5:08:14 PM", - "dateFinished": "Feb 24, 2017 5:08:29 PM", - "status": "FINISHED", - "progressUpdateIntervalMs": 500 - }, - { - "text": "%pig.query\n\nbank_data \u003d filter bank by marital\u003d\u003d\u0027${marital\u003dsingle,single|divorced|married}\u0027;\nb \u003d group bank_data by age;\nforeach b generate group, COUNT($1) as count;\n\n\n", - "user": "anonymous", - "dateUpdated": "Feb 24, 2017 5:08:15 PM", - "config": { - "colWidth": 4.0, - "editorMode": "ace/mode/pig", - "results": { - "0": { - "graph": { - "mode": "scatterChart", - "height": 300.0, - "optionOpen": false - }, - "helium": {} - } - }, - "enabled": true, - "editorSetting": { - "language": "pig", - "editOnDblClick": false - } - }, - "settings": { - "params": { - "marital": "married" - }, - "forms": { - "marital": { - "name": "marital", - "defaultValue": "single", - "options": [ - { - "value": "single" - }, - { - "value": "divorced" - }, - { - "value": "married" - } - ], - "hidden": false - } - } - }, - "results": { - "code": "SUCCESS", - "msg": [ - { - "type": "TABLE", - "data": "group\tcount\n23\t3\n24\t11\n25\t11\n26\t18\n27\t26\n28\t23\n29\t37\n30\t56\n31\t104\n32\t105\n33\t103\n34\t142\n35\t109\n36\t117\n37\t100\n38\t99\n39\t88\n40\t105\n41\t97\n42\t91\n43\t79\n44\t68\n45\t76\n46\t82\n47\t78\n48\t91\n49\t87\n50\t74\n51\t63\n52\t66\n53\t75\n54\t56\n55\t68\n56\t50\n57\t78\n58\t67\n59\t56\n60\t36\n61\t15\n62\t5\n63\t7\n64\t6\n65\t4\n66\t7\n67\t5\n68\t1\n69\t5\n70\t5\n71\t5\n72\t4\n73\t6\n74\t2\n75\t3\n76\t1\n77\t5\n78\t2\n79\t3\n80\t6\n81\t1 [...] - } - ] - }, - "apps": [], - "jobName": "paragraph_1483277250240_-480070728", - "id": "20161228-142259_575675591", - "dateCreated": "Jan 1, 2017 9:27:30 PM", - "dateStarted": "Feb 24, 2017 5:08:27 PM", - "dateFinished": "Feb 24, 2017 5:08:31 PM", - "status": "FINISHED", - "progressUpdateIntervalMs": 500 - }, - { - "text": "%pig\n", - "dateUpdated": "Jan 1, 2017 9:27:30 PM", - "config": {}, - "settings": { - "params": {}, - "forms": {} - }, - "apps": [], - "jobName": "paragraph_1483277250240_-480070728", - "id": "20161228-155036_1854903164", - "dateCreated": "Jan 1, 2017 9:27:30 PM", - "status": "READY", - "errorMessage": "", - "progressUpdateIntervalMs": 500 - } - ], - "name": "Using Pig for querying data", - "id": "2C57UKYWR", - "angularObjects": { - "2C3RWCVAG:shared_process": [], - "2C9KGCHDE:shared_process": [], - "2C8X2BS16:shared_process": [] - }, - "config": {}, - "info": {} -} \ No newline at end of file diff --git a/pig/pom.xml b/pig/pom.xml deleted file mode 100644 index 2170406ccd..0000000000 --- a/pig/pom.xml +++ /dev/null @@ -1,171 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<!-- - ~ Licensed to the Apache Software Foundation (ASF) under one or more - ~ contributor license agreements. See the NOTICE file distributed with - ~ this work for additional information regarding copyright ownership. - ~ The ASF licenses this file to You under the Apache License, Version 2.0 - ~ (the "License"); you may not use this file except in compliance with - ~ the License. You may obtain a copy of the License at - ~ - ~ http://www.apache.org/licenses/LICENSE-2.0 - ~ - ~ Unless required by applicable law or agreed to in writing, software - ~ distributed under the License is distributed on an "AS IS" BASIS, - ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ~ See the License for the specific language governing permissions and - ~ limitations under the License. - --> - -<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" - xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd"> - <modelVersion>4.0.0</modelVersion> - - <parent> - <artifactId>zeppelin-interpreter-parent</artifactId> - <groupId>org.apache.zeppelin</groupId> - <version>0.11.0-SNAPSHOT</version> - <relativePath>../zeppelin-interpreter-parent/pom.xml</relativePath> - </parent> - - <artifactId>zeppelin-pig</artifactId> - <packaging>jar</packaging> - <name>Zeppelin: Apache Pig Interpreter</name> - <description>Zeppelin interpreter for Apache Pig</description> - <url>https://zeppelin.apache.org</url> - - <properties> - <interpreter.name>pig</interpreter.name> - <pig.version>0.17.0</pig.version> - <hadoop.version>${hadoop2.6.version}</hadoop.version> - <tez.version>0.7.0</tez.version> - <pig.spark.version>1.6.3</pig.spark.version> - <pig.scala.version>2.10</pig.scala.version> - </properties> - - <dependencies> - <dependency> - <groupId>org.apache.pig</groupId> - <artifactId>pig</artifactId> - <version>${pig.version}</version> - <exclusions> - <exclusion> - <groupId>javax.servlet</groupId> - <artifactId>servlet-api</artifactId> - </exclusion> - <exclusion> - <groupId>org.mortbay.jetty</groupId> - <artifactId>servlet-api</artifactId> - </exclusion> - <exclusion> - <groupId>org.mortbay.jetty</groupId> - <artifactId>servlet-api-2.5</artifactId> - </exclusion> - </exclusions> - </dependency> - - <dependency> - <groupId>org.python</groupId> - <artifactId>jython-standalone</artifactId> - <version>2.7.0</version> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-client</artifactId> - <version>${hadoop.version}</version> - </dependency> - - <dependency> - <groupId>org.apache.tez</groupId> - <artifactId>tez-api</artifactId> - <version>${tez.version}</version> - <exclusions> - <exclusion> - <groupId>javax.servlet</groupId> - <artifactId>servlet-api</artifactId> - </exclusion> - </exclusions> - </dependency> - - <dependency> - <groupId>org.apache.tez</groupId> - <artifactId>tez-common</artifactId> - <version>${tez.version}</version> - </dependency> - - <dependency> - <groupId>org.apache.tez</groupId> - <artifactId>tez-dag</artifactId> - <version>${tez.version}</version> - <exclusions> - <exclusion> - <groupId>javax.servlet</groupId> - <artifactId>servlet-api</artifactId> - </exclusion> - </exclusions> - </dependency> - - <dependency> - <groupId>org.apache.tez</groupId> - <artifactId>tez-runtime-library</artifactId> - <version>${tez.version}</version> - </dependency> - - <dependency> - <groupId>org.apache.tez</groupId> - <artifactId>tez-runtime-internals</artifactId> - <version>${tez.version}</version> - </dependency> - - <dependency> - <groupId>org.apache.tez</groupId> - <artifactId>tez-mapreduce</artifactId> - <version>${tez.version}</version> - </dependency> - - <dependency> - <groupId>org.apache.tez</groupId> - <artifactId>tez-yarn-timeline-history-with-acls</artifactId> - <version>${tez.version}</version> - </dependency> - - <dependency> - <groupId>org.apache.spark</groupId> - <artifactId>spark-core_${pig.scala.version}</artifactId> - <version>${pig.spark.version}</version> - </dependency> - <dependency> - <groupId>org.apache.spark</groupId> - <artifactId>spark-yarn_${pig.scala.version}</artifactId> - <version>${pig.spark.version}</version> - </dependency> - - </dependencies> - - <build> - <plugins> - <plugin> - <artifactId>maven-enforcer-plugin</artifactId> - </plugin> - <plugin> - <artifactId>maven-resources-plugin</artifactId> - </plugin> - <plugin> - <artifactId>maven-shade-plugin</artifactId> - </plugin> - <plugin> - <artifactId>maven-surefire-plugin</artifactId> - <configuration> - <forkCount>1</forkCount> - <reuseForks>false</reuseForks> - </configuration> - </plugin> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-checkstyle-plugin</artifactId> - <configuration> - <skip>false</skip> - </configuration> - </plugin> - </plugins> - </build> -</project> diff --git a/pig/src/main/java/org/apache/zeppelin/pig/BasePigInterpreter.java b/pig/src/main/java/org/apache/zeppelin/pig/BasePigInterpreter.java deleted file mode 100644 index bad673ce1d..0000000000 --- a/pig/src/main/java/org/apache/zeppelin/pig/BasePigInterpreter.java +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.zeppelin.pig; - -import org.apache.commons.lang3.StringUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.pig.PigServer; -import org.apache.pig.backend.BackendException; -import org.apache.pig.backend.hadoop.executionengine.HExecutionEngine; -import org.apache.pig.backend.hadoop.executionengine.Launcher; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.lang.reflect.Field; -import java.util.Properties; -import java.util.Set; -import java.util.concurrent.ConcurrentHashMap; - -import org.apache.zeppelin.interpreter.Interpreter; -import org.apache.zeppelin.interpreter.InterpreterContext; -import org.apache.zeppelin.scheduler.Scheduler; -import org.apache.zeppelin.scheduler.SchedulerFactory; - -/** - * - */ -public abstract class BasePigInterpreter extends Interpreter { - private static final Logger LOGGER = LoggerFactory.getLogger(BasePigInterpreter.class); - - protected ConcurrentHashMap<String, PigScriptListener> listenerMap = new ConcurrentHashMap<>(); - - public BasePigInterpreter(Properties property) { - super(property); - } - - @Override - public void cancel(InterpreterContext context) { - LOGGER.info("Cancel paragraph:" + context.getParagraphId()); - PigScriptListener listener = listenerMap.get(context.getParagraphId()); - if (listener != null) { - Set<String> jobIds = listener.getJobIds(); - if (jobIds.isEmpty()) { - LOGGER.info("No job is started, so can not cancel paragraph:" + context.getParagraphId()); - } - for (String jobId : jobIds) { - LOGGER.info("Kill jobId:" + jobId); - HExecutionEngine engine = - (HExecutionEngine) getPigServer().getPigContext().getExecutionEngine(); - try { - Field launcherField = HExecutionEngine.class.getDeclaredField("launcher"); - launcherField.setAccessible(true); - Launcher launcher = (Launcher) launcherField.get(engine); - // It doesn't work for Tez Engine due to PIG-5035 - launcher.killJob(jobId, new Configuration()); - } catch (NoSuchFieldException | BackendException | IllegalAccessException e) { - LOGGER.error("Fail to cancel paragraph:" + context.getParagraphId(), e); - } - } - } else { - LOGGER.warn("No PigScriptListener found, can not cancel paragraph:" - + context.getParagraphId()); - } - } - - @Override - public FormType getFormType() { - return FormType.SIMPLE; - } - - @Override - public int getProgress(InterpreterContext context) { - PigScriptListener listener = listenerMap.get(context.getParagraphId()); - if (listener != null) { - return listener.getProgress(); - } - return 0; - } - - @Override - public Scheduler getScheduler() { - return SchedulerFactory.singleton().createOrGetFIFOScheduler( - PigInterpreter.class.getName() + this.hashCode()); - } - - public abstract PigServer getPigServer(); - - /** - * Use paragraph title if it exists, else use the last line of pig script. - * @param cmd - * @param context - * @return - */ - protected String createJobName(String cmd, InterpreterContext context) { - String pTitle = context.getParagraphTitle(); - if (!StringUtils.isBlank(pTitle)) { - return pTitle; - } else { - // use the last non-empty line of pig script as the job name. - String[] lines = cmd.split("\n"); - for (int i = lines.length - 1; i >= 0; --i) { - if (!StringUtils.isBlank(lines[i])) { - return lines[i]; - } - } - // in case all the lines are empty, but usually it is almost impossible - return "empty_job"; - } - } -} diff --git a/pig/src/main/java/org/apache/zeppelin/pig/PigInterpreter.java b/pig/src/main/java/org/apache/zeppelin/pig/PigInterpreter.java deleted file mode 100644 index 31c39e9436..0000000000 --- a/pig/src/main/java/org/apache/zeppelin/pig/PigInterpreter.java +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.zeppelin.pig; - -import org.apache.commons.io.output.ByteArrayOutputStream; -import org.apache.commons.lang3.exception.ExceptionUtils; -import org.apache.pig.PigServer; -import org.apache.pig.impl.logicalLayer.FrontendException; -import org.apache.pig.tools.pigscript.parser.ParseException; -import org.apache.pig.tools.pigstats.PigStats; -import org.apache.pig.tools.pigstats.ScriptState; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.File; -import java.io.IOException; -import java.io.PrintStream; -import java.util.Map; -import java.util.Properties; - -import org.apache.zeppelin.interpreter.InterpreterContext; -import org.apache.zeppelin.interpreter.InterpreterResult; -import org.apache.zeppelin.interpreter.InterpreterResult.Code; - -/** - * Pig interpreter for Zeppelin. - */ -public class PigInterpreter extends BasePigInterpreter { - private static final Logger LOGGER = LoggerFactory.getLogger(PigInterpreter.class); - - private PigServer pigServer; - private boolean includeJobStats = false; - - public PigInterpreter(Properties property) { - super(property); - } - - @Override - public void open() { - String execType = getProperty("zeppelin.pig.execType"); - if (execType == null) { - execType = "mapreduce"; - } - String includeJobStats = getProperty("zeppelin.pig.includeJobStats"); - if (includeJobStats != null) { - this.includeJobStats = Boolean.parseBoolean(includeJobStats); - } - try { - pigServer = new PigServer(execType); - for (Map.Entry entry : getProperties().entrySet()) { - if (!entry.getKey().toString().startsWith("zeppelin.")) { - pigServer.getPigContext().getProperties().setProperty(entry.getKey().toString(), - entry.getValue().toString()); - } - } - } catch (IOException e) { - LOGGER.error("Fail to initialize PigServer", e); - throw new RuntimeException("Fail to initialize PigServer", e); - } - } - - @Override - public void close() { - pigServer = null; - } - - - @Override - public InterpreterResult interpret(String cmd, InterpreterContext contextInterpreter) { - // remember the origial stdout, because we will redirect stdout to capture - // the pig dump output. - PrintStream originalStdOut = System.out; - ByteArrayOutputStream bytesOutput = new ByteArrayOutputStream(); - File tmpFile = null; - try { - pigServer.setJobName(createJobName(cmd, contextInterpreter)); - tmpFile = PigUtils.createTempPigScript(cmd); - System.setOut(new PrintStream(bytesOutput)); - // each thread should its own ScriptState & PigStats - ScriptState.start(pigServer.getPigContext().getExecutionEngine().instantiateScriptState()); - // reset PigStats, otherwise you may get the PigStats of last job in the same thread - // because PigStats is ThreadLocal variable - PigStats.start(pigServer.getPigContext().getExecutionEngine().instantiatePigStats()); - PigScriptListener scriptListener = new PigScriptListener(); - ScriptState.get().registerListener(scriptListener); - listenerMap.put(contextInterpreter.getParagraphId(), scriptListener); - pigServer.registerScript(tmpFile.getAbsolutePath()); - } catch (IOException e) { - // 1. catch FrontendException, FrontendException happens in the query compilation phase. - // 2. catch ParseException for syntax error - // 3. PigStats, This is execution error - // 4. Other errors. - if (e instanceof FrontendException) { - FrontendException fe = (FrontendException) e; - if (!fe.getMessage().contains("Backend error :")) { - // If the error message contains "Backend error :", that means the exception is from - // backend. - LOGGER.error("Fail to run pig script.", e); - return new InterpreterResult(Code.ERROR, ExceptionUtils.getStackTrace(e)); - } - } - if (e.getCause() instanceof ParseException) { - return new InterpreterResult(Code.ERROR, e.getCause().getMessage()); - } - PigStats stats = PigStats.get(); - if (stats != null) { - String errorMsg = stats.getDisplayString(); - if (errorMsg != null) { - LOGGER.error("Fail to run pig script, " + errorMsg); - return new InterpreterResult(Code.ERROR, errorMsg); - } - } - LOGGER.error("Fail to run pig script.", e); - return new InterpreterResult(Code.ERROR, ExceptionUtils.getStackTrace(e)); - } finally { - System.setOut(originalStdOut); - listenerMap.remove(contextInterpreter.getParagraphId()); - if (tmpFile != null) { - tmpFile.delete(); - } - } - StringBuilder outputBuilder = new StringBuilder(); - PigStats stats = PigStats.get(); - if (stats != null && includeJobStats) { - String jobStats = stats.getDisplayString(); - if (jobStats != null) { - outputBuilder.append(jobStats); - } - } - outputBuilder.append(bytesOutput.toString()); - return new InterpreterResult(Code.SUCCESS, outputBuilder.toString()); - } - - - public PigServer getPigServer() { - return pigServer; - } - -} - diff --git a/pig/src/main/java/org/apache/zeppelin/pig/PigQueryInterpreter.java b/pig/src/main/java/org/apache/zeppelin/pig/PigQueryInterpreter.java deleted file mode 100644 index 84401ffe0e..0000000000 --- a/pig/src/main/java/org/apache/zeppelin/pig/PigQueryInterpreter.java +++ /dev/null @@ -1,162 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.zeppelin.pig; - -import org.apache.commons.lang3.StringUtils; -import org.apache.commons.lang3.exception.ExceptionUtils; -import org.apache.pig.PigServer; -import org.apache.pig.data.Tuple; -import org.apache.pig.impl.logicalLayer.FrontendException; -import org.apache.pig.impl.logicalLayer.schema.Schema; -import org.apache.pig.tools.pigscript.parser.ParseException; -import org.apache.pig.tools.pigstats.PigStats; -import org.apache.pig.tools.pigstats.ScriptState; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; -import java.util.Properties; - -import org.apache.zeppelin.interpreter.InterpreterContext; -import org.apache.zeppelin.interpreter.InterpreterException; -import org.apache.zeppelin.interpreter.InterpreterResult; -import org.apache.zeppelin.interpreter.InterpreterResult.Code; -import org.apache.zeppelin.interpreter.ResultMessages; - -/** - * - */ -public class PigQueryInterpreter extends BasePigInterpreter { - private static final Logger LOGGER = LoggerFactory.getLogger(PigQueryInterpreter.class); - private static final String MAX_RESULTS = "zeppelin.pig.maxResult"; - private PigServer pigServer; - private int maxResult; - - public PigQueryInterpreter(Properties properties) { - super(properties); - } - - @Override - public void open() throws InterpreterException { - pigServer = getInterpreterInTheSameSessionByClassName(PigInterpreter.class).getPigServer(); - maxResult = Integer.parseInt(getProperty(MAX_RESULTS)); - } - - @Override - public void close() { - - } - - @Override - public InterpreterResult interpret(String st, InterpreterContext context) { - // '-' is invalid for pig alias - String alias = "paragraph_" + context.getParagraphId().replace("-", "_"); - String[] lines = st.split("\n"); - List<String> queries = new ArrayList<>(); - for (int i = 0; i < lines.length; ++i) { - if (i == lines.length - 1) { - lines[i] = alias + " = " + lines[i]; - } - queries.add(lines[i]); - } - - StringBuilder resultBuilder = new StringBuilder("%table "); - try { - pigServer.setJobName(createJobName(st, context)); - File tmpScriptFile = PigUtils.createTempPigScript(queries); - // each thread should its own ScriptState & PigStats - ScriptState.start(pigServer.getPigContext().getExecutionEngine().instantiateScriptState()); - // reset PigStats, otherwise you may get the PigStats of last job in the same thread - // because PigStats is ThreadLocal variable - PigStats.start(pigServer.getPigContext().getExecutionEngine().instantiatePigStats()); - PigScriptListener scriptListener = new PigScriptListener(); - ScriptState.get().registerListener(scriptListener); - listenerMap.put(context.getParagraphId(), scriptListener); - pigServer.registerScript(tmpScriptFile.getAbsolutePath()); - Schema schema = pigServer.dumpSchema(alias); - boolean schemaKnown = (schema != null); - if (schemaKnown) { - for (int i = 0; i < schema.size(); ++i) { - Schema.FieldSchema field = schema.getField(i); - resultBuilder.append(field.alias != null ? field.alias : "col_" + i); - if (i != schema.size() - 1) { - resultBuilder.append("\t"); - } - } - resultBuilder.append("\n"); - } - Iterator<Tuple> iter = pigServer.openIterator(alias); - boolean firstRow = true; - int index = 0; - while (iter.hasNext() && index < maxResult) { - index++; - Tuple tuple = iter.next(); - if (firstRow && !schemaKnown) { - for (int i = 0; i < tuple.size(); ++i) { - resultBuilder.append("c_" + i + "\t"); - } - resultBuilder.append("\n"); - firstRow = false; - } - resultBuilder.append(StringUtils.join(tuple.iterator(), "\t")); - resultBuilder.append("\n"); - } - if (index >= maxResult && iter.hasNext()) { - resultBuilder.append("\n"); - resultBuilder.append(ResultMessages.getExceedsLimitRowsMessage(maxResult, MAX_RESULTS)); - } - } catch (IOException e) { - // Extract error in the following order - // 1. catch FrontendException, FrontendException happens in the query compilation phase. - // 2. catch ParseException for syntax error - // 3. PigStats, This is execution error - // 4. Other errors. - if (e instanceof FrontendException) { - FrontendException fe = (FrontendException) e; - if (!fe.getMessage().contains("Backend error :")) { - LOGGER.error("Fail to run pig query.", e); - return new InterpreterResult(Code.ERROR, ExceptionUtils.getStackTrace(e)); - } - } - if (e.getCause() instanceof ParseException) { - return new InterpreterResult(Code.ERROR, e.getMessage()); - } - PigStats stats = PigStats.get(); - if (stats != null) { - String errorMsg = stats.getDisplayString(); - if (errorMsg != null) { - return new InterpreterResult(Code.ERROR, errorMsg); - } - } - LOGGER.error("Fail to run pig query.", e); - return new InterpreterResult(Code.ERROR, ExceptionUtils.getStackTrace(e)); - } finally { - listenerMap.remove(context.getParagraphId()); - } - return new InterpreterResult(Code.SUCCESS, resultBuilder.toString()); - } - - @Override - public PigServer getPigServer() { - return this.pigServer; - } -} diff --git a/pig/src/main/java/org/apache/zeppelin/pig/PigScriptListener.java b/pig/src/main/java/org/apache/zeppelin/pig/PigScriptListener.java deleted file mode 100644 index 8ff1bf8986..0000000000 --- a/pig/src/main/java/org/apache/zeppelin/pig/PigScriptListener.java +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.zeppelin.pig; - -import org.apache.pig.impl.plan.OperatorPlan; -import org.apache.pig.tools.pigstats.JobStats; -import org.apache.pig.tools.pigstats.OutputStats; -import org.apache.pig.tools.pigstats.PigProgressNotificationListener; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.HashSet; -import java.util.Set; - -/** - * - */ -public class PigScriptListener implements PigProgressNotificationListener { - private static final Logger LOGGER = LoggerFactory.getLogger(PigScriptListener.class); - - private Set<String> jobIds = new HashSet(); - private int progress; - - @Override - public void initialPlanNotification(String scriptId, OperatorPlan<?> plan) { - - } - - @Override - public void launchStartedNotification(String scriptId, int numJobsToLaunch) { - - } - - @Override - public void jobsSubmittedNotification(String scriptId, int numJobsSubmitted) { - - } - - @Override - public void jobStartedNotification(String scriptId, String assignedJobId) { - this.jobIds.add(assignedJobId); - } - - @Override - public void jobFinishedNotification(String scriptId, JobStats jobStats) { - - } - - @Override - public void jobFailedNotification(String scriptId, JobStats jobStats) { - - } - - @Override - public void outputCompletedNotification(String scriptId, OutputStats outputStats) { - - } - - @Override - public void progressUpdatedNotification(String scriptId, int progress) { - LOGGER.debug("scriptId:" + scriptId + ", progress:" + progress); - this.progress = progress; - } - - @Override - public void launchCompletedNotification(String scriptId, int numJobsSucceeded) { - - } - - public Set<String> getJobIds() { - return jobIds; - } - - public int getProgress() { - return progress; - } -} diff --git a/pig/src/main/java/org/apache/zeppelin/pig/PigUtils.java b/pig/src/main/java/org/apache/zeppelin/pig/PigUtils.java deleted file mode 100644 index c7690f399f..0000000000 --- a/pig/src/main/java/org/apache/zeppelin/pig/PigUtils.java +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.zeppelin.pig; - -import org.apache.commons.io.IOUtils; -import org.apache.commons.lang3.StringUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.util.List; - -/** - * - */ -public class PigUtils { - private static final Logger LOGGER = LoggerFactory.getLogger(PigUtils.class); - - protected static final String DATE_FORMAT = "yyyy-MM-dd HH:mm:ss"; - - public static File createTempPigScript(String content) throws IOException { - File tmpFile = File.createTempFile("zeppelin", "pig"); - LOGGER.debug("Create pig script file:" + tmpFile.getAbsolutePath()); - FileWriter writer = new FileWriter(tmpFile); - IOUtils.write(content, writer); - writer.close(); - return tmpFile.getAbsoluteFile(); - } - - public static File createTempPigScript(List<String> lines) throws IOException { - return createTempPigScript(StringUtils.join(lines, "\n")); - } -} diff --git a/pig/src/main/resources/interpreter-setting.json b/pig/src/main/resources/interpreter-setting.json deleted file mode 100644 index 058e71ba70..0000000000 --- a/pig/src/main/resources/interpreter-setting.json +++ /dev/null @@ -1,59 +0,0 @@ -[ - { - "group": "pig", - "name": "script", - "className": "org.apache.zeppelin.pig.PigInterpreter", - "properties": { - "zeppelin.pig.execType": { - "envName": null, - "propertyName": "zeppelin.pig.execType", - "defaultValue": "mapreduce", - "description": "local | mapreduce | tez_local | tez | spark_local | spark", - "type": "string" - }, - "zeppelin.pig.includeJobStats": { - "envName": null, - "propertyName": "zeppelin.pig.includeJobStats", - "defaultValue": false, - "description": "flag to include job stats in output", - "type": "checkbox" - }, - "SPARK_MASTER": { - "envName": "SPARK_MASTER", - "propertyName": "SPARK_MASTER", - "defaultValue": "local", - "description": "local | yarn-client", - "type": "string" - }, - "SPARK_JAR": { - "envName": "SPARK_JAR", - "propertyName": "SPARK_JAR", - "defaultValue": "", - "description": "spark assembly jar uploaded in hdfs", - "type": "textarea" - } - }, - "editor": { - "language": "pig", - "editOnDblClick": false - } - }, - { - "group": "pig", - "name": "query", - "className": "org.apache.zeppelin.pig.PigQueryInterpreter", - "properties": { - "zeppelin.pig.maxResult": { - "envName": null, - "propertyName": "zeppelin.pig.maxResult", - "defaultValue": "1000", - "description": "max row number for %pig.query", - "type": "number" - } - }, - "editor": { - "language": "pig", - "editOnDblClick": false - } - } -] diff --git a/pig/src/test/java/org/apache/zeppelin/pig/PigInterpreterSparkTest.java b/pig/src/test/java/org/apache/zeppelin/pig/PigInterpreterSparkTest.java deleted file mode 100644 index ea1a3f833a..0000000000 --- a/pig/src/test/java/org/apache/zeppelin/pig/PigInterpreterSparkTest.java +++ /dev/null @@ -1,149 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * <p> - * http://www.apache.org/licenses/LICENSE-2.0 - * <p> - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.zeppelin.pig; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - -import org.apache.commons.io.IOUtils; -import org.junit.After; -import org.junit.Test; - -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.util.Properties; - -import org.apache.zeppelin.interpreter.InterpreterContext; -import org.apache.zeppelin.interpreter.InterpreterResult; - -public class PigInterpreterSparkTest { - private PigInterpreter pigInterpreter; - private InterpreterContext context; - - public void setUpSpark(boolean includeJobStats) { - Properties properties = new Properties(); - properties.put("zeppelin.pig.execType", "spark_local"); - properties.put("zeppelin.pig.includeJobStats", includeJobStats + ""); - pigInterpreter = new PigInterpreter(properties); - pigInterpreter.open(); - context = InterpreterContext.builder().setParagraphId("paragraphId").build(); - - } - @After - public void tearDown() { - pigInterpreter.close(); - } - - @Test - public void testBasics() throws IOException { - setUpSpark(false); - - String content = "1\tandy\n" - + "2\tpeter\n"; - File tmpFile = File.createTempFile("zeppelin", "test"); - FileWriter writer = new FileWriter(tmpFile); - IOUtils.write(content, writer); - writer.close(); - - // simple pig script using dump - String pigscript = "a = load '" + tmpFile.getAbsolutePath() + "';" - + "dump a;"; - InterpreterResult result = pigInterpreter.interpret(pigscript, context); - assertEquals(InterpreterResult.Type.TEXT, result.message().get(0).getType()); - assertEquals(InterpreterResult.Code.SUCCESS, result.code()); - assertTrue(result.message().get(0).getData().contains("(1,andy)\n(2,peter)")); - - // describe - pigscript = "a = load '" + tmpFile.getAbsolutePath() + "' as (id: int, name: bytearray);" - + "describe a;"; - result = pigInterpreter.interpret(pigscript, context); - assertEquals(InterpreterResult.Type.TEXT, result.message().get(0).getType()); - assertEquals(InterpreterResult.Code.SUCCESS, result.code()); - assertTrue(result.message().get(0).getData().contains("a: {id: int,name: bytearray}")); - - // syntax error (compilation error) - pigscript = "a = loa '" + tmpFile.getAbsolutePath() + "';" - + "describe a;"; - result = pigInterpreter.interpret(pigscript, context); - assertEquals(InterpreterResult.Type.TEXT, result.message().get(0).getType()); - assertEquals(InterpreterResult.Code.ERROR, result.code()); - assertTrue(result.message().get(0).getData().contains( - "Syntax error, unexpected symbol at or near 'a'")); - - // syntax error - pigscript = "a = load '" + tmpFile.getAbsolutePath() + "';" - + "foreach a generate $0;"; - result = pigInterpreter.interpret(pigscript, context); - assertEquals(InterpreterResult.Type.TEXT, result.message().get(0).getType()); - assertEquals(InterpreterResult.Code.ERROR, result.code()); - assertTrue(result.message().get(0).getData().contains("expecting one of")); - } - - @Test - public void testIncludeJobStats() throws IOException { - setUpSpark(true); - - String content = "1\tandy\n" - + "2\tpeter\n"; - File tmpFile = File.createTempFile("zeppelin", "test"); - FileWriter writer = new FileWriter(tmpFile); - IOUtils.write(content, writer); - writer.close(); - - // simple pig script using dump - String pigscript = "a = load '" + tmpFile.getAbsolutePath() + "';" - + "dump a;"; - InterpreterResult result = pigInterpreter.interpret(pigscript, context); - assertEquals(InterpreterResult.Type.TEXT, result.message().get(0).getType()); - assertEquals(InterpreterResult.Code.SUCCESS, result.code()); - assertTrue(result.message().get(0).getData().contains("Spark Job")); - assertTrue(result.message().get(0).getData().contains("(1,andy)\n(2,peter)")); - - // describe - pigscript = "a = load '" + tmpFile.getAbsolutePath() + "' as (id: int, name: bytearray);" - + "describe a;"; - result = pigInterpreter.interpret(pigscript, context); - assertEquals(InterpreterResult.Type.TEXT, result.message().get(0).getType()); - assertEquals(InterpreterResult.Code.SUCCESS, result.code()); - // no job is launched, so no jobStats - assertTrue(result.message().get(0).getData().contains("a: {id: int,name: bytearray}")); - - // syntax error (compilation error) - pigscript = "a = loa '" + tmpFile.getAbsolutePath() + "';" - + "describe a;"; - result = pigInterpreter.interpret(pigscript, context); - assertEquals(InterpreterResult.Type.TEXT, result.message().get(0).getType()); - assertEquals(InterpreterResult.Code.ERROR, result.code()); - // no job is launched, so no jobStats - assertTrue(result.message().get(0).getData().contains( - "Syntax error, unexpected symbol at or near 'a'")); - - // execution error - pigscript = "a = load 'invalid_path';" - + "dump a;"; - result = pigInterpreter.interpret(pigscript, context); - assertEquals(InterpreterResult.Type.TEXT, result.message().get(0).getType()); - assertEquals(InterpreterResult.Code.ERROR, result.code()); - assertTrue(result.message().get(0).getData().contains("Failed to read data from")); - } - -} - - diff --git a/pig/src/test/java/org/apache/zeppelin/pig/PigInterpreterTest.java b/pig/src/test/java/org/apache/zeppelin/pig/PigInterpreterTest.java deleted file mode 100644 index 5a21bb3be0..0000000000 --- a/pig/src/test/java/org/apache/zeppelin/pig/PigInterpreterTest.java +++ /dev/null @@ -1,154 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * <p> - * http://www.apache.org/licenses/LICENSE-2.0 - * <p> - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.zeppelin.pig; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - -import org.apache.commons.io.IOUtils; -import org.junit.After; -import org.junit.Test; - -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.util.Properties; - -import org.apache.zeppelin.interpreter.InterpreterContext; -import org.apache.zeppelin.interpreter.InterpreterResult; -import org.apache.zeppelin.interpreter.InterpreterResult.Code; -import org.apache.zeppelin.interpreter.InterpreterResult.Type; - -public class PigInterpreterTest { - - private PigInterpreter pigInterpreter; - private InterpreterContext context; - - private void setUpLocal(boolean includeJobStats) { - Properties properties = new Properties(); - properties.put("zeppelin.pig.execType", "local"); - properties.put("zeppelin.pig.includeJobStats", includeJobStats + ""); - pigInterpreter = new PigInterpreter(properties); - pigInterpreter.open(); - context = InterpreterContext.builder().setParagraphId("paragraphId").build(); - } - - @After - public void tearDown() { - pigInterpreter.close(); - } - - @Test - public void testBasics() throws IOException { - setUpLocal(false); - - String content = "1\tandy\n" - + "2\tpeter\n"; - File tmpFile = File.createTempFile("zeppelin", "test"); - FileWriter writer = new FileWriter(tmpFile); - IOUtils.write(content, writer); - writer.close(); - - // simple pig script using dump - String pigscript = "a = load '" + tmpFile.getAbsolutePath() + "';" - + "dump a;"; - InterpreterResult result = pigInterpreter.interpret(pigscript, context); - assertEquals(Type.TEXT, result.message().get(0).getType()); - assertEquals(Code.SUCCESS, result.code()); - assertTrue(result.message().get(0).getData().contains("(1,andy)\n(2,peter)")); - - // describe - pigscript = "a = load '" + tmpFile.getAbsolutePath() + "' as (id: int, name: bytearray);" - + "describe a;"; - result = pigInterpreter.interpret(pigscript, context); - assertEquals(Type.TEXT, result.message().get(0).getType()); - assertEquals(Code.SUCCESS, result.code()); - assertTrue(result.message().get(0).getData().contains("a: {id: int,name: bytearray}")); - - // syntax error (compilation error) - pigscript = "a = loa '" + tmpFile.getAbsolutePath() + "';" - + "describe a;"; - result = pigInterpreter.interpret(pigscript, context); - assertEquals(Type.TEXT, result.message().get(0).getType()); - assertEquals(Code.ERROR, result.code()); - assertTrue(result.message().get(0).getData().contains( - "Syntax error, unexpected symbol at or near 'a'")); - - // execution error - pigscript = "a = load 'invalid_path';" - + "dump a;"; - result = pigInterpreter.interpret(pigscript, context); - assertEquals(Type.TEXT, result.message().get(0).getType()); - assertEquals(Code.ERROR, result.code()); - assertTrue(result.message().get(0).getData().contains("Input path does not exist")); - } - - - @Test - public void testIncludeJobStats() throws IOException { - setUpLocal(true); - - String content = "1\tandy\n" - + "2\tpeter\n"; - File tmpFile = File.createTempFile("zeppelin", "test"); - FileWriter writer = new FileWriter(tmpFile); - IOUtils.write(content, writer); - writer.close(); - - // simple pig script using dump - String pigscript = "a = load '" + tmpFile.getAbsolutePath() + "';" - + "dump a;"; - InterpreterResult result = pigInterpreter.interpret(pigscript, context); - assertEquals(Type.TEXT, result.message().get(0).getType()); - assertEquals(Code.SUCCESS, result.code()); - assertTrue(result.message().get(0).getData().contains("Counters:")); - assertTrue(result.message().get(0).getData().contains("(1,andy)\n(2,peter)")); - - // describe - pigscript = "a = load '" + tmpFile.getAbsolutePath() + "' as (id: int, name: bytearray);" - + "describe a;"; - result = pigInterpreter.interpret(pigscript, context); - assertEquals(Type.TEXT, result.message().get(0).getType()); - assertEquals(Code.SUCCESS, result.code()); - // no job is launched, so no jobStats - assertTrue(!result.message().get(0).getData().contains("Counters:")); - assertTrue(result.message().get(0).getData().contains("a: {id: int,name: bytearray}")); - - // syntax error (compilation error) - pigscript = "a = loa '" + tmpFile.getAbsolutePath() + "';" - + "describe a;"; - result = pigInterpreter.interpret(pigscript, context); - assertEquals(Type.TEXT, result.message().get(0).getType()); - assertEquals(Code.ERROR, result.code()); - // no job is launched, so no jobStats - assertTrue(!result.message().get(0).getData().contains("Counters:")); - assertTrue(result.message().get(0).getData().contains( - "Syntax error, unexpected symbol at or near 'a'")); - - // execution error - pigscript = "a = load 'invalid_path';" - + "dump a;"; - result = pigInterpreter.interpret(pigscript, context); - assertEquals(Type.TEXT, result.message().get(0).getType()); - assertEquals(Code.ERROR, result.code()); - assertTrue(result.message().get(0).getData().contains("Counters:")); - assertTrue(result.message().get(0).getData().contains("Input path does not exist")); - } - -} diff --git a/pig/src/test/java/org/apache/zeppelin/pig/PigInterpreterTezTest.java b/pig/src/test/java/org/apache/zeppelin/pig/PigInterpreterTezTest.java deleted file mode 100644 index ec09a883e2..0000000000 --- a/pig/src/test/java/org/apache/zeppelin/pig/PigInterpreterTezTest.java +++ /dev/null @@ -1,157 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * <p> - * http://www.apache.org/licenses/LICENSE-2.0 - * <p> - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.zeppelin.pig; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - -import org.apache.commons.io.IOUtils; -import org.junit.After; -import org.junit.Test; - -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.util.Properties; - -import org.apache.zeppelin.interpreter.InterpreterContext; -import org.apache.zeppelin.interpreter.InterpreterResult; -import org.apache.zeppelin.interpreter.InterpreterResult.Code; -import org.apache.zeppelin.interpreter.InterpreterResult.Type; - -public class PigInterpreterTezTest { - - private PigInterpreter pigInterpreter; - private InterpreterContext context; - - public void setUpTez(boolean includeJobStats) { - Properties properties = new Properties(); - properties.put("zeppelin.pig.execType", "tez_local"); - properties.put("zeppelin.pig.includeJobStats", includeJobStats + ""); - properties.put("tez.queue.name", "test"); - pigInterpreter = new PigInterpreter(properties); - pigInterpreter.open(); - context = InterpreterContext.builder().setParagraphId("paragraphId").build(); - - } - @After - public void tearDown() { - pigInterpreter.close(); - } - - @Test - public void testBasics() throws IOException { - setUpTez(false); - - assertEquals("test", - pigInterpreter.getPigServer().getPigContext().getProperties() - .getProperty("tez.queue.name")); - - String content = "1\tandy\n" - + "2\tpeter\n"; - File tmpFile = File.createTempFile("zeppelin", "test"); - FileWriter writer = new FileWriter(tmpFile); - IOUtils.write(content, writer); - writer.close(); - - // simple pig script using dump - String pigscript = "a = load '" + tmpFile.getAbsolutePath() + "';" - + "dump a;"; - InterpreterResult result = pigInterpreter.interpret(pigscript, context); - assertEquals(Type.TEXT, result.message().get(0).getType()); - assertEquals(Code.SUCCESS, result.code()); - assertTrue(result.message().get(0).getData().contains("(1,andy)\n(2,peter)")); - - // describe - pigscript = "a = load '" + tmpFile.getAbsolutePath() + "' as (id: int, name: bytearray);" - + "describe a;"; - result = pigInterpreter.interpret(pigscript, context); - assertEquals(Type.TEXT, result.message().get(0).getType()); - assertEquals(Code.SUCCESS, result.code()); - assertTrue(result.message().get(0).getData().contains("a: {id: int,name: bytearray}")); - - // syntax error (compilation error) - pigscript = "a = loa '" + tmpFile.getAbsolutePath() + "';" - + "describe a;"; - result = pigInterpreter.interpret(pigscript, context); - assertEquals(Type.TEXT, result.message().get(0).getType()); - assertEquals(Code.ERROR, result.code()); - assertTrue(result.message().get(0).getData().contains( - "Syntax error, unexpected symbol at or near 'a'")); - - // syntax error - pigscript = "a = load '" + tmpFile.getAbsolutePath() + "';" - + "foreach a generate $0;"; - result = pigInterpreter.interpret(pigscript, context); - assertEquals(Type.TEXT, result.message().get(0).getType()); - assertEquals(Code.ERROR, result.code()); - assertTrue(result.message().get(0).getData().contains("expecting one of")); - } - - @Test - public void testIncludeJobStats() throws IOException { - setUpTez(true); - - String content = "1\tandy\n" - + "2\tpeter\n"; - File tmpFile = File.createTempFile("zeppelin", "test"); - FileWriter writer = new FileWriter(tmpFile); - IOUtils.write(content, writer); - writer.close(); - - // simple pig script using dump - String pigscript = "a = load '" + tmpFile.getAbsolutePath() + "';" - + "dump a;"; - InterpreterResult result = pigInterpreter.interpret(pigscript, context); - assertEquals(Type.TEXT, result.message().get(0).getType()); - assertEquals(Code.SUCCESS, result.code()); - assertTrue(result.message().get(0).getData().contains("Vertex Stats")); - assertTrue(result.message().get(0).getData().contains("(1,andy)\n(2,peter)")); - - // describe - pigscript = "a = load '" + tmpFile.getAbsolutePath() + "' as (id: int, name: bytearray);" - + "describe a;"; - result = pigInterpreter.interpret(pigscript, context); - assertEquals(Type.TEXT, result.message().get(0).getType()); - assertEquals(Code.SUCCESS, result.code()); - // no job is launched, so no jobStats - assertTrue(!result.message().get(0).getData().contains("Vertex Stats")); - assertTrue(result.message().get(0).getData().contains("a: {id: int,name: bytearray}")); - - // syntax error (compilation error) - pigscript = "a = loa '" + tmpFile.getAbsolutePath() + "';" - + "describe a;"; - result = pigInterpreter.interpret(pigscript, context); - assertEquals(Type.TEXT, result.message().get(0).getType()); - assertEquals(Code.ERROR, result.code()); - // no job is launched, so no jobStats - assertTrue(!result.message().get(0).getData().contains("Vertex Stats")); - assertTrue(result.message().get(0).getData().contains( - "Syntax error, unexpected symbol at or near 'a'")); - - // execution error - pigscript = "a = load 'invalid_path';" - + "dump a;"; - result = pigInterpreter.interpret(pigscript, context); - assertEquals(Type.TEXT, result.message().get(0).getType()); - assertEquals(Code.ERROR, result.code()); - assertTrue(!result.message().get(0).getData().contains("Vertex Stats")); - assertTrue(result.message().get(0).getData().contains("Input path does not exist")); - } -} diff --git a/pig/src/test/java/org/apache/zeppelin/pig/PigQueryInterpreterTest.java b/pig/src/test/java/org/apache/zeppelin/pig/PigQueryInterpreterTest.java deleted file mode 100644 index 01f0a20ced..0000000000 --- a/pig/src/test/java/org/apache/zeppelin/pig/PigQueryInterpreterTest.java +++ /dev/null @@ -1,162 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * <p> - * http://www.apache.org/licenses/LICENSE-2.0 - * <p> - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.zeppelin.pig; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - -import org.apache.commons.io.IOUtils; -import org.apache.zeppelin.interpreter.LazyOpenInterpreter; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; - -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Properties; - -import org.apache.zeppelin.interpreter.Interpreter; -import org.apache.zeppelin.interpreter.InterpreterContext; -import org.apache.zeppelin.interpreter.InterpreterException; -import org.apache.zeppelin.interpreter.InterpreterGroup; -import org.apache.zeppelin.interpreter.InterpreterResult; - -/** - * - */ -public class PigQueryInterpreterTest { - - private Interpreter pigInterpreter; - private Interpreter pigQueryInterpreter; - private InterpreterContext context; - - @Before - public void setUp() throws InterpreterException { - Properties properties = new Properties(); - properties.put("zeppelin.pig.execType", "local"); - properties.put("zeppelin.pig.maxResult", "20"); - - pigInterpreter = new LazyOpenInterpreter(new PigInterpreter(properties)); - pigQueryInterpreter = new LazyOpenInterpreter(new PigQueryInterpreter(properties)); - List<Interpreter> interpreters = new ArrayList(); - interpreters.add(pigInterpreter); - interpreters.add(pigQueryInterpreter); - InterpreterGroup group = new InterpreterGroup(); - group.put("note_id", interpreters); - pigInterpreter.setInterpreterGroup(group); - pigQueryInterpreter.setInterpreterGroup(group); - pigInterpreter.open(); - pigQueryInterpreter.open(); - - context = InterpreterContext.builder().setParagraphId("paragraphId").build(); - } - - @After - public void tearDown() throws InterpreterException { - pigInterpreter.close(); - pigQueryInterpreter.close(); - } - - @Test - public void testBasics() throws IOException, InterpreterException { - String content = "andy\tmale\t10\n" - + "peter\tmale\t20\n" - + "amy\tfemale\t14\n"; - File tmpFile = File.createTempFile("zeppelin", "test"); - FileWriter writer = new FileWriter(tmpFile); - IOUtils.write(content, writer); - writer.close(); - - // run script in PigInterpreter - String pigscript = "a = load '" + tmpFile.getAbsolutePath() + "' as (name, gender, age);\n" - + "a2 = load 'invalid_path' as (name, gender, age);\n" - + "dump a;"; - InterpreterResult result = pigInterpreter.interpret(pigscript, context); - assertEquals(InterpreterResult.Type.TEXT, result.message().get(0).getType()); - assertEquals(InterpreterResult.Code.SUCCESS, result.code()); - assertTrue(result.message().get(0).getData().contains( - "(andy,male,10)\n(peter,male,20)\n(amy,female,14)")); - - // run single line query in PigQueryInterpreter - String query = "foreach a generate name, age;"; - result = pigQueryInterpreter.interpret(query, context); - assertEquals(InterpreterResult.Type.TABLE, result.message().get(0).getType()); - assertEquals(InterpreterResult.Code.SUCCESS, result.code()); - assertEquals("name\tage\nandy\t10\npeter\t20\namy\t14\n", result.message().get(0).getData()); - - // run multiple line query in PigQueryInterpreter - query = "b = group a by gender;\nforeach b generate group as gender, COUNT($1) as count;"; - result = pigQueryInterpreter.interpret(query, context); - assertEquals(InterpreterResult.Type.TABLE, result.message().get(0).getType()); - assertEquals(InterpreterResult.Code.SUCCESS, result.code()); - assertEquals("gender\tcount\nmale\t2\nfemale\t1\n", result.message().get(0).getData()); - - // generate alias with unknown schema - query = "b = group a by gender;\nforeach b generate group, COUNT($1);"; - result = pigQueryInterpreter.interpret(query, context); - assertEquals(InterpreterResult.Type.TABLE, result.message().get(0).getType()); - assertEquals(InterpreterResult.Code.SUCCESS, result.code()); - assertEquals("group\tcol_1\nmale\t2\nfemale\t1\n", result.message().get(0).getData()); - - // syntax error in PigQueryInterpereter - query = "b = group a by invalid_column;\nforeach b generate group as gender, " + - "COUNT($1) as count;"; - result = pigQueryInterpreter.interpret(query, context); - assertEquals(InterpreterResult.Type.TEXT, result.message().get(0).getType()); - assertEquals(InterpreterResult.Code.ERROR, result.code()); - assertTrue(result.message().get(0).getData().contains( - "Projected field [invalid_column] does not exist in schema")); - - // execution error in PigQueryInterpreter - query = "foreach a2 generate name, age;"; - result = pigQueryInterpreter.interpret(query, context); - assertEquals(InterpreterResult.Type.TEXT, result.message().get(0).getType()); - assertEquals(InterpreterResult.Code.ERROR, result.code()); - assertTrue(result.message().get(0).getData().contains("Input path does not exist")); - } - - @Test - public void testMaxResult() throws IOException, InterpreterException { - StringBuilder content = new StringBuilder(); - for (int i = 0; i < 30; ++i) { - content.append(i + "\tname_" + i + "\n"); - } - File tmpFile = File.createTempFile("zeppelin", "test"); - FileWriter writer = new FileWriter(tmpFile); - IOUtils.write(content, writer); - writer.close(); - - // run script in PigInterpreter - String pigscript = "a = load '" + tmpFile.getAbsolutePath() + "' as (id, name);"; - InterpreterResult result = pigInterpreter.interpret(pigscript, context); - assertEquals(0, result.message().size()); - assertEquals(InterpreterResult.Code.SUCCESS, result.code()); - - // run single line query in PigQueryInterpreter - String query = "foreach a generate id;"; - result = pigQueryInterpreter.interpret(query, context); - assertEquals(InterpreterResult.Type.TABLE, result.message().get(0).getType()); - assertEquals(InterpreterResult.Code.SUCCESS, result.code()); - assertTrue(result.message().get(0).getData().contains("id\n0\n1\n2")); - assertTrue(result.message().get(1).getData().contains("alert-warning")); - } -} diff --git a/pig/src/test/resources/core-site.xml b/pig/src/test/resources/core-site.xml deleted file mode 100644 index f1fe563815..0000000000 --- a/pig/src/test/resources/core-site.xml +++ /dev/null @@ -1,3 +0,0 @@ -<configuration> - -</configuration> \ No newline at end of file diff --git a/pig/src/test/resources/log4j.properties b/pig/src/test/resources/log4j.properties deleted file mode 100644 index 8daee59d60..0000000000 --- a/pig/src/test/resources/log4j.properties +++ /dev/null @@ -1,22 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -log4j.rootLogger = INFO, stdout - -log4j.appender.stdout = org.apache.log4j.ConsoleAppender -log4j.appender.stdout.layout = org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=%5p [%d] ({%t} %F[%M]:%L) - %m%n diff --git a/pom.xml b/pom.xml index 12c6059c48..879e61f726 100644 --- a/pom.xml +++ b/pom.xml @@ -71,7 +71,6 @@ <module>shell</module> <module>livy</module> <module>hbase</module> - <module>pig</module> <module>jdbc</module> <module>file</module> <module>flink</module> diff --git a/zeppelin-distribution/src/bin_license/LICENSE b/zeppelin-distribution/src/bin_license/LICENSE index 2692811edd..1c4081f412 100644 --- a/zeppelin-distribution/src/bin_license/LICENSE +++ b/zeppelin-distribution/src/bin_license/LICENSE @@ -165,7 +165,6 @@ The following components are provided under Apache License. (Apache 2.0) Tachyon Project Core (org.tachyonproject:tachyon:0.6.4 - http://tachyonproject.org/tachyon/) (Apache 2.0) Tachyon Project Client (org.tachyonproject:tachyon-client:0.6.4 - http://tachyonproject.org/tachyon-client/) (Apache 2.0) javax.inject (javax.inject:javax.inject:1 - http://code.google.com/p/atinject/) - (Apache 2.0) Apache Pig (org.apache.pig:0.16 - http://pig.apache.org) (Apache 2.0) tez-api (org.apache.tez:tez-api:0.7.0 - http://tez.apache.org) (Apache 2.0) tez-common (org.apache.tez:tez-common:0.7.0 - http://tez.apache.org) (Apache 2.0) tez-dag (org.apache.tez:tez-dag:0.7.0 - http://tez.apache.org) diff --git a/zeppelin-web/karma.conf.js b/zeppelin-web/karma.conf.js index 018ccb5b11..9e16589fba 100644 --- a/zeppelin-web/karma.conf.js +++ b/zeppelin-web/karma.conf.js @@ -67,6 +67,7 @@ module.exports = function(config) { 'bower_components/ace-builds/src-noconflict/keybinding-emacs.js', 'bower_components/ace-builds/src-noconflict/ext-language_tools.js', 'bower_components/ace-builds/src-noconflict/theme-chrome.js', + 'bower_components/ace-builds/src-noconflict/mode-javascript.js', 'bower_components/angular-ui-ace/ui-ace.js', 'bower_components/jquery.scrollTo/jquery.scrollTo.js', 'bower_components/d3/d3.js',