# IGNITE-463: Done.
Project: http://git-wip-us.apache.org/repos/asf/incubator-ignite/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-ignite/commit/a9fc9b6b Tree: http://git-wip-us.apache.org/repos/asf/incubator-ignite/tree/a9fc9b6b Diff: http://git-wip-us.apache.org/repos/asf/incubator-ignite/diff/a9fc9b6b Branch: refs/heads/ignite-443 Commit: a9fc9b6b38d69e1856279fd295ef2a2765d651ac Parents: cad3d1f Author: vozerov-gridgain <voze...@gridgain.com> Authored: Thu Mar 12 13:38:40 2015 +0300 Committer: vozerov-gridgain <voze...@gridgain.com> Committed: Thu Mar 12 13:38:40 2015 +0300 ---------------------------------------------------------------------- assembly/release-hadoop.xml | 16 +-- docs/core-site.ignite.xml | 90 ------------- docs/hadoop_readme.md | 134 ------------------ docs/hadoop_readme.pdf | Bin 82297 -> 0 bytes docs/hive-site.ignite.xml | 37 ----- docs/mapred-site.ignite.xml | 66 --------- modules/hadoop/config/core-site.ignite.xml | 90 +++++++++++++ modules/hadoop/config/hive-site.ignite.xml | 37 +++++ modules/hadoop/config/mapred-site.ignite.xml | 66 +++++++++ modules/hadoop/docs/hadoop_readme.md | 135 +++++++++++++++++++ modules/hadoop/docs/hadoop_readme.pdf | Bin 0 -> 82297 bytes .../internal/processors/hadoop/HadoopSetup.java | 33 ++++- .../hadoop/HadoopCommandLineTest.java | 16 ++- 13 files changed, 377 insertions(+), 343 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-ignite/blob/a9fc9b6b/assembly/release-hadoop.xml ---------------------------------------------------------------------- diff --git a/assembly/release-hadoop.xml b/assembly/release-hadoop.xml index c53df09..640ca35 100644 --- a/assembly/release-hadoop.xml +++ b/assembly/release-hadoop.xml @@ -35,20 +35,20 @@ <files> <file> - <source>docs/hadoop_readme.pdf</source> - <outputDirectory>/docs</outputDirectory> + <source>modules/hadoop/docs/hadoop_readme.pdf</source> + <outputDirectory>.</outputDirectory> </file> <file> - <source>docs/core-site.ignite.xml</source> - <outputDirectory>/docs</outputDirectory> + <source>modules/hadoop/config/core-site.ignite.xml</source> + <outputDirectory>/config/hadoop</outputDirectory> </file> <file> - <source>docs/mapred-site.ignite.xml</source> - <outputDirectory>/docs</outputDirectory> + <source>modules/hadoop/config/mapred-site.ignite.xml</source> + <outputDirectory>/config/hadoop</outputDirectory> </file> <file> - <source>docs/hive-site.ignite.xml</source> - <outputDirectory>/docs</outputDirectory> + <source>modules/hadoop/config/hive-site.ignite.xml</source> + <outputDirectory>/config/hadoop</outputDirectory> </file> </files> http://git-wip-us.apache.org/repos/asf/incubator-ignite/blob/a9fc9b6b/docs/core-site.ignite.xml ---------------------------------------------------------------------- diff --git a/docs/core-site.ignite.xml b/docs/core-site.ignite.xml deleted file mode 100644 index 8b8e634..0000000 --- a/docs/core-site.ignite.xml +++ /dev/null @@ -1,90 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> - -<!-- - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. ---> - -<!-- - This template file contains settings needed to run Apache Hadoop jobs - with Apache Ignite's distributed in-memory file system IGFS. - - You can replace '$HADOOP_HOME/etc/hadoop/core-site.xml' file with this one - to work with IGFS nodes running on localhost (these local nodes can be - a part of distributed cluster though). To work with file system on remote - hosts you need to change the host of file system URI to any host running - IGFS node. - - Note that Ignite jars must be in Apache Hadoop client classpath to work - with this configuration. - - Run script '$IGNITE_HOME/bin/setup-hadoop.{sh|bat}' for Apache Hadoop client setup. ---> - -<configuration> - <!-- - Set default file system to IGFS instance named "igfs" configured in Ignite. - --> - <property> - <name>fs.default.name</name> - <value>igfs://igfs@localhost</value> - </property> - - <!-- - Set Hadoop 1.* file system implementation class for IGFS. - --> - <property> - <name>fs.igfs.impl</name> - <value>org.apache.ignite.hadoop.fs.v1.IgniteHadoopFileSystem</value> - </property> - - <!-- - Set Hadoop 2.* file system implementation class for IGFS. - --> - <property> - <name>fs.AbstractFileSystem.igfs.impl</name> - <value>org.apache.ignite.hadoop.fs.v2.IgniteHadoopFileSystem</value> - </property> - - <!-- - Disallow data node replacement since it does not make sense for IGFS nodes. - --> - <property> - <name>dfs.client.block.write.replace-datanode-on-failure.policy</name> - <value>NEVER</value> - </property> - - <!-- - Allow to write the job statistics into IGFS. - --> - <!-- - <property> - <name>ignite.counters.writer</name> - <value>org.apache.ignite.hadoop.fs.IgniteHadoopFileSystemCounterWriter</value> - </property> - --> - - <!-- - By default data is placed into the file /user/<user_name>/<job_id>/performance - You can override this path with using macro ${USER} that is to injection of submitter user name. - --> - <!-- - <property> - <name>ignite.counters.fswriter.directory</name> - <value>/user/${USER}</value> - </property> - --> -</configuration> http://git-wip-us.apache.org/repos/asf/incubator-ignite/blob/a9fc9b6b/docs/hadoop_readme.md ---------------------------------------------------------------------- diff --git a/docs/hadoop_readme.md b/docs/hadoop_readme.md deleted file mode 100644 index 01b49d0..0000000 --- a/docs/hadoop_readme.md +++ /dev/null @@ -1,134 +0,0 @@ -<!-- - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. ---> - -<center> - -</center> - -## 1. Apache Ignite In-Memory Accelerator For Apache Hadoop - -Ignite In-Memory Accelerator For Apache Hadoop is designed to deliver uncompromised performance for existing Apache -Hadoop 2.2 or above applications with zero code change as well as simplicity of installation and configuration across all the -supported platforms. - -## 2. Installation - -Ignite distribution comes in a ZIP file that simply needs to be unzipped. The Accelerator requires Apache Hadoop of -version 2.2 or above to be already installed on the system either using Apache Bigtop packages or manually (manual installation -just means that Apache Hadoop binary distribution must be unpacked somewhere on the system). In case of manual -installation `HADOOP_HOME` environment variable must point to the installation directory of Apache Hadoop. - -> **NOTE:** You do not need any Apache Hadoop processes to be started, you only need to deploy the Apache Hadoop -> distribution on your system. Nevertheless you can run Apache Hadoop jobs with Ignite Accelerator over HDFS, -> in this case up and running HDFS infrastructure will be needed. - -The Accelerator comes with command line setup tool `bin/setup-hadoop.sh` (`bin/setup-hadoop.bat` on Windows) which -will guide you through all the needed setup steps (note that the setup tool will require write permissions to the -Apache Hadoop installation directory). - -Installation requirements: - -1. Windows, Linux, or MacOS environment. -2. Java 7 or 8 (latest update is advisable). -3. Point `JAVA_HOME` environment variable to your JDK or JRE installation. -4. Apache Hadoop 2.2 or above installed. -5. Point `HADOOP_HOME` environment variable to the installation directory of Apache Hadoop. -6. Run `bin/setup-hadoop.{sh|bat}` setup script and follow instructions. - -> **NOTE:** On Windows platform Apache Hadoop client requires `JAVA_HOME` path to not contain space characters. -> Java installed to `C:\\Program Files\` will not work, install JRE to correct location and point `JAVA_HOME` there. - -### 2.1 Check Apache Ignite Installation - -After setup script successfully completed, you can execute the Ignite startup script. -The following command will startup Ignite node with default configuration using multicast node discovery. - - bin/ignite.{sh|bat} - -If Ignite was installed successfully, the output from above commands should produce no exceptions or errors. -Note that you may see some other warnings during startup, but this is OK as they are meant to inform that certain -functionality is turned on or off by default. - -You can execute the above commands multiple times on the same machine and make sure that nodes discover each other. -Here is an example of log printout when 2 nodes join topology: - - ... Topology snapshot [nodes=2, CPUs=8, hash=0xD551B245] - -You can also start Ignite Management Console, called Visor, and observe started nodes. To startup Visor, you should execute the following script: - - /bin/ignitevisorcmd.{sh|bat} - -## 3. Configuration - -To configure Ignite nodes you can change configuration files at `config` directory of Ignite installation. Those are conventional Spring files. Please refer to shipped configuration files and Ignite javadocs for more details. - -### 3.1 Distributed File System Configuration - -Ignite has it's own distributed in-memory file system called IgniteFS. Hadoop jobs can use it instead of HDFS to achieve maximum performance and scalability. Setting up IGFS is much simpler than HDFS, it requires just few tweaks of Ignite node configuration and does not require starting any additional processes. Default configuration shipped with the Accelerator contains one configured instance named "ignitefs" which can be used as reference. - -Generally URI for IgniteFS which will be used by Apache Hadoop looks like: - - igfs://igfs_name@host_name - -Where `igfs_name` is IgniteFS instance name, `host_name` is any host running Ignite node with that IgniteFS instance configured. -For more details please refer to IgniteFS documentation. - -### 3.2 Apache Hadoop Client Configuration - -To run Apache Hadoop jobs with Ignite cluster you need to configure `core-site.xml` and `mapred-site.xml` at -`$HADOOP_HOME/etc/hadoop` directory the same way as it is done in templates shipped with the Accelerator. -The setup tool `bin/setup-hadoop.{sh|bat}` will ask you to replace those files with Ignite templates or -you can find these templates at `docs/core-site.ignite.xml` and `docs/mapred-site.ignite.xml` respectively and perform the needed configuration manually. - -Apache Hadoop client will need to have Ignite jar files in classpath, the setup tool will care of that as well. - -## 4. Running Apache Hadoop Job With Ignite In-Memory Accelerator - -To run Apache Hadoop job with Ignite cluster you have to start one or multiple Ignite nodes and make sure they successfully discovered each other. - -When all the configuration is complete and Ignite nodes are started, running Apache Hadoop job will be the same as with conventional Apache Hadoop distribution except that all Ignite nodes are equal and any of them can be treated as Job Tracker and DFS Name Node. - -To run "Word Count" example you can load some text files to IGFS using standard Apache Hadoop tools: - - cd $HADOOP_HOME/bin - - ./hadoop fs -mkdir /input - - ./hadoop fs -copyFromLocal $HADOOP_HOME/README.txt /input/WORD_COUNT_ME.txt - -Run the job: - - ./hadoop jar $HADOOP_HOME/share/hadoop/mapreduce/*-mapreduce-examples-*.jar wordcount /input /output - -Check results: - - ./hadoop fs -ls /output - - ./hadoop fs -cat /output/part-r-00000 - -A job can be ran on multiple nodes on localhost or in cluster environment the same way. The only changes needed to -switch Apache Hadoop client to a cluster are to fix host in default DFS URI in `core-site.xml` and host in job tracker -address in `mapred-site.xml`. - -## 5. Management & Monitoring with Visor -Ignite comes with CLI (command) based DevOps Managements Console, called Visor, delivering advance set of management and monitoring capabilities. - -To start Visor in console mode you should execute the following command: - - `bin/ignitevisorcmd.sh` - -On Windows, run the same commands with `.bat` extension. \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-ignite/blob/a9fc9b6b/docs/hadoop_readme.pdf ---------------------------------------------------------------------- diff --git a/docs/hadoop_readme.pdf b/docs/hadoop_readme.pdf deleted file mode 100644 index d56d950..0000000 Binary files a/docs/hadoop_readme.pdf and /dev/null differ http://git-wip-us.apache.org/repos/asf/incubator-ignite/blob/a9fc9b6b/docs/hive-site.ignite.xml ---------------------------------------------------------------------- diff --git a/docs/hive-site.ignite.xml b/docs/hive-site.ignite.xml deleted file mode 100644 index f278aab..0000000 --- a/docs/hive-site.ignite.xml +++ /dev/null @@ -1,37 +0,0 @@ -<?xml version="1.0"?> -<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> - -<!-- - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. ---> - -<!-- - This template file contains settings needed to run Apache Hive queries - with Ignite In-Memory Accelerator. - - You can replace '$HIVE_HOME/conf/hive-site.xml' file with this one or - run script '$IGNITE_HOME/bin/setup-hadoop.{sh|bat}' for Apache Hadoop - and Hive client setup. ---> -<configuration> - <!-- - Ignite requires query plan to be passed not using local resource. - --> - <property> - <name>hive.rpc.query.plan</name> - <value>true</value> - </property> -</configuration> \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-ignite/blob/a9fc9b6b/docs/mapred-site.ignite.xml ---------------------------------------------------------------------- diff --git a/docs/mapred-site.ignite.xml b/docs/mapred-site.ignite.xml deleted file mode 100644 index a2ed437..0000000 --- a/docs/mapred-site.ignite.xml +++ /dev/null @@ -1,66 +0,0 @@ -<?xml version="1.0"?> -<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> - -<!-- - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. ---> - -<!-- - This template file contains settings needed to run Apache Hadoop jobs - with Apache Ignite In-Memory Accelerator. - - You can replace '$HADOOP_HOME/etc/hadoop/mapred-site.xml' file with this one - to run jobs on localhost (local node can be a part of distributed cluster though). - To run jobs on remote host you have to change jobtracker address to the REST address - of any running Ignite node. - - Note that Ignite jars must be in Apache Hadoop client classpath to work - with this configuration. - - Run script '$IGNITE_HOME/bin/setup-hadoop.{sh|bat}' for Apache Hadoop client setup. ---> - -<configuration> - <!-- - Framework name must be set to 'ignite'. - --> - <property> - <name>mapreduce.framework.name</name> - <value>ignite</value> - </property> - - <!-- - Job tracker address must be set to the REST address of any running Ignite node. - --> - <property> - <name>mapreduce.jobtracker.address</name> - <value>localhost:11211</value> - </property> - - <!-- Parameters for job tuning. --> - <!-- - <property> - <name>mapreduce.job.reduces</name> - <value>1</value> - </property> - - <property> - <name>mapreduce.job.maps</name> - <value>4</value> - </property> - --> - -</configuration> http://git-wip-us.apache.org/repos/asf/incubator-ignite/blob/a9fc9b6b/modules/hadoop/config/core-site.ignite.xml ---------------------------------------------------------------------- diff --git a/modules/hadoop/config/core-site.ignite.xml b/modules/hadoop/config/core-site.ignite.xml new file mode 100644 index 0000000..8b8e634 --- /dev/null +++ b/modules/hadoop/config/core-site.ignite.xml @@ -0,0 +1,90 @@ +<?xml version="1.0" encoding="UTF-8"?> +<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> + +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + +<!-- + This template file contains settings needed to run Apache Hadoop jobs + with Apache Ignite's distributed in-memory file system IGFS. + + You can replace '$HADOOP_HOME/etc/hadoop/core-site.xml' file with this one + to work with IGFS nodes running on localhost (these local nodes can be + a part of distributed cluster though). To work with file system on remote + hosts you need to change the host of file system URI to any host running + IGFS node. + + Note that Ignite jars must be in Apache Hadoop client classpath to work + with this configuration. + + Run script '$IGNITE_HOME/bin/setup-hadoop.{sh|bat}' for Apache Hadoop client setup. +--> + +<configuration> + <!-- + Set default file system to IGFS instance named "igfs" configured in Ignite. + --> + <property> + <name>fs.default.name</name> + <value>igfs://igfs@localhost</value> + </property> + + <!-- + Set Hadoop 1.* file system implementation class for IGFS. + --> + <property> + <name>fs.igfs.impl</name> + <value>org.apache.ignite.hadoop.fs.v1.IgniteHadoopFileSystem</value> + </property> + + <!-- + Set Hadoop 2.* file system implementation class for IGFS. + --> + <property> + <name>fs.AbstractFileSystem.igfs.impl</name> + <value>org.apache.ignite.hadoop.fs.v2.IgniteHadoopFileSystem</value> + </property> + + <!-- + Disallow data node replacement since it does not make sense for IGFS nodes. + --> + <property> + <name>dfs.client.block.write.replace-datanode-on-failure.policy</name> + <value>NEVER</value> + </property> + + <!-- + Allow to write the job statistics into IGFS. + --> + <!-- + <property> + <name>ignite.counters.writer</name> + <value>org.apache.ignite.hadoop.fs.IgniteHadoopFileSystemCounterWriter</value> + </property> + --> + + <!-- + By default data is placed into the file /user/<user_name>/<job_id>/performance + You can override this path with using macro ${USER} that is to injection of submitter user name. + --> + <!-- + <property> + <name>ignite.counters.fswriter.directory</name> + <value>/user/${USER}</value> + </property> + --> +</configuration> http://git-wip-us.apache.org/repos/asf/incubator-ignite/blob/a9fc9b6b/modules/hadoop/config/hive-site.ignite.xml ---------------------------------------------------------------------- diff --git a/modules/hadoop/config/hive-site.ignite.xml b/modules/hadoop/config/hive-site.ignite.xml new file mode 100644 index 0000000..f278aab --- /dev/null +++ b/modules/hadoop/config/hive-site.ignite.xml @@ -0,0 +1,37 @@ +<?xml version="1.0"?> +<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> + +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + +<!-- + This template file contains settings needed to run Apache Hive queries + with Ignite In-Memory Accelerator. + + You can replace '$HIVE_HOME/conf/hive-site.xml' file with this one or + run script '$IGNITE_HOME/bin/setup-hadoop.{sh|bat}' for Apache Hadoop + and Hive client setup. +--> +<configuration> + <!-- + Ignite requires query plan to be passed not using local resource. + --> + <property> + <name>hive.rpc.query.plan</name> + <value>true</value> + </property> +</configuration> \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-ignite/blob/a9fc9b6b/modules/hadoop/config/mapred-site.ignite.xml ---------------------------------------------------------------------- diff --git a/modules/hadoop/config/mapred-site.ignite.xml b/modules/hadoop/config/mapred-site.ignite.xml new file mode 100644 index 0000000..a2ed437 --- /dev/null +++ b/modules/hadoop/config/mapred-site.ignite.xml @@ -0,0 +1,66 @@ +<?xml version="1.0"?> +<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> + +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + +<!-- + This template file contains settings needed to run Apache Hadoop jobs + with Apache Ignite In-Memory Accelerator. + + You can replace '$HADOOP_HOME/etc/hadoop/mapred-site.xml' file with this one + to run jobs on localhost (local node can be a part of distributed cluster though). + To run jobs on remote host you have to change jobtracker address to the REST address + of any running Ignite node. + + Note that Ignite jars must be in Apache Hadoop client classpath to work + with this configuration. + + Run script '$IGNITE_HOME/bin/setup-hadoop.{sh|bat}' for Apache Hadoop client setup. +--> + +<configuration> + <!-- + Framework name must be set to 'ignite'. + --> + <property> + <name>mapreduce.framework.name</name> + <value>ignite</value> + </property> + + <!-- + Job tracker address must be set to the REST address of any running Ignite node. + --> + <property> + <name>mapreduce.jobtracker.address</name> + <value>localhost:11211</value> + </property> + + <!-- Parameters for job tuning. --> + <!-- + <property> + <name>mapreduce.job.reduces</name> + <value>1</value> + </property> + + <property> + <name>mapreduce.job.maps</name> + <value>4</value> + </property> + --> + +</configuration> http://git-wip-us.apache.org/repos/asf/incubator-ignite/blob/a9fc9b6b/modules/hadoop/docs/hadoop_readme.md ---------------------------------------------------------------------- diff --git a/modules/hadoop/docs/hadoop_readme.md b/modules/hadoop/docs/hadoop_readme.md new file mode 100644 index 0000000..8f73ede --- /dev/null +++ b/modules/hadoop/docs/hadoop_readme.md @@ -0,0 +1,135 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + +<center> + +</center> + +## 1. Apache Ignite In-Memory Accelerator For Apache Hadoop + +Ignite In-Memory Accelerator For Apache Hadoop is designed to deliver uncompromised performance for existing Apache +Hadoop 2.2 or above applications with zero code change as well as simplicity of installation and configuration across all the +supported platforms. + +## 2. Installation + +Ignite distribution comes in a ZIP file that simply needs to be unzipped. The Accelerator requires Apache Hadoop of +version 2.2 or above to be already installed on the system either using Apache Bigtop packages or manually (manual installation +just means that Apache Hadoop binary distribution must be unpacked somewhere on the system). In case of manual +installation `HADOOP_HOME` environment variable must point to the installation directory of Apache Hadoop. + +> **NOTE:** You do not need any Apache Hadoop processes to be started, you only need to deploy the Apache Hadoop +> distribution on your system. Nevertheless you can run Apache Hadoop jobs with Ignite Accelerator over HDFS, +> in this case up and running HDFS infrastructure will be needed. + +The Accelerator comes with command line setup tool `bin/setup-hadoop.sh` (`bin/setup-hadoop.bat` on Windows) which +will guide you through all the needed setup steps (note that the setup tool will require write permissions to the +Apache Hadoop installation directory). + +Installation requirements: + +1. Windows, Linux, or MacOS environment. +2. Java 7 or 8 (latest update is advisable). +3. Point `JAVA_HOME` environment variable to your JDK or JRE installation. +4. Apache Hadoop 2.2 or above installed. +5. Point `HADOOP_HOME` environment variable to the installation directory of Apache Hadoop. +6. Run `bin/setup-hadoop.{sh|bat}` setup script and follow instructions. + +> **NOTE:** On Windows platform Apache Hadoop client requires `JAVA_HOME` path to not contain space characters. +> Java installed to `C:\\Program Files\` will not work, install JRE to correct location and point `JAVA_HOME` there. + +### 2.1 Check Apache Ignite Installation + +After setup script successfully completed, you can execute the Ignite startup script. +The following command will startup Ignite node with default configuration using multicast node discovery. + + bin/ignite.{sh|bat} + +If Ignite was installed successfully, the output from above commands should produce no exceptions or errors. +Note that you may see some other warnings during startup, but this is OK as they are meant to inform that certain +functionality is turned on or off by default. + +You can execute the above commands multiple times on the same machine and make sure that nodes discover each other. +Here is an example of log printout when 2 nodes join topology: + + ... Topology snapshot [nodes=2, CPUs=8, hash=0xD551B245] + +You can also start Ignite Management Console, called Visor, and observe started nodes. To startup Visor, you should execute the following script: + + /bin/ignitevisorcmd.{sh|bat} + +## 3. Configuration + +To configure Ignite nodes you can change configuration files at `config` directory of Ignite installation. Those are conventional Spring files. Please refer to shipped configuration files and Ignite javadocs for more details. + +### 3.1 Distributed File System Configuration + +Ignite has it's own distributed in-memory file system called IgniteFS. Hadoop jobs can use it instead of HDFS to achieve maximum performance and scalability. Setting up IGFS is much simpler than HDFS, it requires just few tweaks of Ignite node configuration and does not require starting any additional processes. Default configuration shipped with the Accelerator contains one configured instance named "ignitefs" which can be used as reference. + +Generally URI for IgniteFS which will be used by Apache Hadoop looks like: + + igfs://igfs_name@host_name + +Where `igfs_name` is IgniteFS instance name, `host_name` is any host running Ignite node with that IgniteFS instance configured. +For more details please refer to IgniteFS documentation. + +### 3.2 Apache Hadoop Client Configuration + +To run Apache Hadoop jobs with Ignite cluster you need to configure `core-site.xml` and `mapred-site.xml` at +`$HADOOP_HOME/etc/hadoop` directory the same way as it is done in templates shipped with the Accelerator. +The setup tool `bin/setup-hadoop.{sh|bat}` will ask you to replace those files with Ignite templates or +you can find these templates at `config/hadoop/core-site.ignite.xml` and `config/hadoop/mapred-site.ignite.xml` +respectively and perform the needed configuration manually. + +Apache Hadoop client will need to have Ignite jar files in classpath, the setup tool will care of that as well. + +## 4. Running Apache Hadoop Job With Ignite In-Memory Accelerator + +To run Apache Hadoop job with Ignite cluster you have to start one or multiple Ignite nodes and make sure they successfully discovered each other. + +When all the configuration is complete and Ignite nodes are started, running Apache Hadoop job will be the same as with conventional Apache Hadoop distribution except that all Ignite nodes are equal and any of them can be treated as Job Tracker and DFS Name Node. + +To run "Word Count" example you can load some text files to IGFS using standard Apache Hadoop tools: + + cd $HADOOP_HOME/bin + + ./hadoop fs -mkdir /input + + ./hadoop fs -copyFromLocal $HADOOP_HOME/README.txt /input/WORD_COUNT_ME.txt + +Run the job: + + ./hadoop jar $HADOOP_HOME/share/hadoop/mapreduce/*-mapreduce-examples-*.jar wordcount /input /output + +Check results: + + ./hadoop fs -ls /output + + ./hadoop fs -cat /output/part-r-00000 + +A job can be ran on multiple nodes on localhost or in cluster environment the same way. The only changes needed to +switch Apache Hadoop client to a cluster are to fix host in default DFS URI in `core-site.xml` and host in job tracker +address in `mapred-site.xml`. + +## 5. Management & Monitoring with Visor +Ignite comes with CLI (command) based DevOps Managements Console, called Visor, delivering advance set of management and monitoring capabilities. + +To start Visor in console mode you should execute the following command: + + `bin/ignitevisorcmd.sh` + +On Windows, run the same commands with `.bat` extension. \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-ignite/blob/a9fc9b6b/modules/hadoop/docs/hadoop_readme.pdf ---------------------------------------------------------------------- diff --git a/modules/hadoop/docs/hadoop_readme.pdf b/modules/hadoop/docs/hadoop_readme.pdf new file mode 100644 index 0000000..d56d950 Binary files /dev/null and b/modules/hadoop/docs/hadoop_readme.pdf differ http://git-wip-us.apache.org/repos/asf/incubator-ignite/blob/a9fc9b6b/modules/hadoop/src/main/java/org/apache/ignite/internal/processors/hadoop/HadoopSetup.java ---------------------------------------------------------------------- diff --git a/modules/hadoop/src/main/java/org/apache/ignite/internal/processors/hadoop/HadoopSetup.java b/modules/hadoop/src/main/java/org/apache/ignite/internal/processors/hadoop/HadoopSetup.java index c37551e..c33c1f1 100644 --- a/modules/hadoop/src/main/java/org/apache/ignite/internal/processors/hadoop/HadoopSetup.java +++ b/modules/hadoop/src/main/java/org/apache/ignite/internal/processors/hadoop/HadoopSetup.java @@ -231,17 +231,19 @@ public class HadoopSetup { File hadoopEtc = new File(hadoopDir, "etc" + File.separator + "hadoop"); - File igniteDocs = new File(igniteHome, "docs"); + File igniteHadoopCfg = igniteHadoopConfig(igniteHome); - if (!igniteDocs.canRead()) - exit("Failed to read Ignite 'docs' folder at '" + igniteDocs.getAbsolutePath() + "'.", null); + if (!igniteHadoopCfg.canRead()) + exit("Failed to read Ignite Hadoop 'config' folder at '" + igniteHadoopCfg.getAbsolutePath() + "'.", null); if (hadoopEtc.canWrite()) { // TODO Bigtop if (ask("Replace 'core-site.xml' and 'mapred-site.xml' files with preconfigured templates " + "(existing files will be backed up)?")) { - replaceWithBackup(new File(igniteDocs, "core-site.ignite.xml"), new File(hadoopEtc, "core-site.xml")); + replaceWithBackup(new File(igniteHadoopCfg, "core-site.ignite.xml"), + new File(hadoopEtc, "core-site.xml")); - replaceWithBackup(new File(igniteDocs, "mapred-site.ignite.xml"), new File(hadoopEtc, "mapred-site.xml")); + replaceWithBackup(new File(igniteHadoopCfg, "mapred-site.ignite.xml"), + new File(hadoopEtc, "mapred-site.xml")); } else println("Ok. You can configure them later, the templates are available at Ignite's 'docs' directory..."); @@ -254,7 +256,8 @@ public class HadoopSetup { warn("Can not write to '" + hiveConfDir.getAbsolutePath() + "'. To run Hive queries you have to " + "configure 'hive-site.xml' manually. The template is available at Ignite's 'docs' directory."); else if (ask("Replace 'hive-site.xml' with preconfigured template (existing file will be backed up)?")) - replaceWithBackup(new File(igniteDocs, "hive-site.ignite.xml"), new File(hiveConfDir, "hive-site.xml")); + replaceWithBackup(new File(igniteHadoopCfg, "hive-site.ignite.xml"), + new File(hiveConfDir, "hive-site.xml")); else println("Ok. You can configure it later, the template is available at Ignite's 'docs' directory..."); } @@ -263,6 +266,24 @@ public class HadoopSetup { } /** + * Get Ignite Hadoop config directory. + * + * @param igniteHome Ignite home. + * @return Ignite Hadoop config directory. + */ + private static File igniteHadoopConfig(String igniteHome) { + Path path = Paths.get(igniteHome, "modules", "hadoop", "config"); + + if (!Files.exists(path)) + path = Paths.get(igniteHome, "config", "hadoop"); + + if (Files.exists(path)) + return path.toFile(); + else + return new File(igniteHome, "docs"); + } + + /** * @param jarFiles Jars. * @param folder Folder. */ http://git-wip-us.apache.org/repos/asf/incubator-ignite/blob/a9fc9b6b/modules/hadoop/src/test/java/org/apache/ignite/internal/processors/hadoop/HadoopCommandLineTest.java ---------------------------------------------------------------------- diff --git a/modules/hadoop/src/test/java/org/apache/ignite/internal/processors/hadoop/HadoopCommandLineTest.java b/modules/hadoop/src/test/java/org/apache/ignite/internal/processors/hadoop/HadoopCommandLineTest.java index 33fa358..f2f9467 100644 --- a/modules/hadoop/src/test/java/org/apache/ignite/internal/processors/hadoop/HadoopCommandLineTest.java +++ b/modules/hadoop/src/test/java/org/apache/ignite/internal/processors/hadoop/HadoopCommandLineTest.java @@ -156,9 +156,9 @@ public class HadoopCommandLineTest extends GridCommonAbstractTest { testWorkDir = Files.createTempDirectory("hadoop-cli-test").toFile(); - U.copy(U.resolveIgnitePath("docs/core-site.ignite.xml"), new File(testWorkDir, "core-site.xml"), false); + U.copy(resolveHadoopConfig("core-site.ignite.xml"), new File(testWorkDir, "core-site.xml"), false); - File srcFile = U.resolveIgnitePath("docs/mapred-site.ignite.xml"); + File srcFile = resolveHadoopConfig("mapred-site.ignite.xml"); File dstFile = new File(testWorkDir, "mapred-site.xml"); try (BufferedReader in = new BufferedReader(new FileReader(srcFile)); @@ -184,6 +184,18 @@ public class HadoopCommandLineTest extends GridCommonAbstractTest { generateHiveTestFiles(); } + /** + * Resolve Hadoop configuration file. + * + * @param name File name. + * @return Resolve file. + */ + private static File resolveHadoopConfig(String name) { + File path = U.resolveIgnitePath("modules/hadoop/config/" + name); + + return path != null ? path : U.resolveIgnitePath("config/hadoop/" + name); + } + /** {@inheritDoc} */ @Override protected void afterTestsStopped() throws Exception { super.afterTestsStopped();