Jekyll build from master:f261f85 Add post about running on Fedora 25
Project: http://git-wip-us.apache.org/repos/asf/accumulo-website/repo Commit: http://git-wip-us.apache.org/repos/asf/accumulo-website/commit/ac9e7868 Tree: http://git-wip-us.apache.org/repos/asf/accumulo-website/tree/ac9e7868 Diff: http://git-wip-us.apache.org/repos/asf/accumulo-website/diff/ac9e7868 Branch: refs/heads/asf-site Commit: ac9e7868f262eaab233759764d072b0f1d61716f Parents: b866c15 Author: Christopher Tubbs <ctubb...@apache.org> Authored: Mon Dec 19 18:41:55 2016 -0500 Committer: Christopher Tubbs <ctubb...@apache.org> Committed: Mon Dec 19 18:41:55 2016 -0500 ---------------------------------------------------------------------- blog/2016/12/19/running-on-fedora-25.html | 532 +++++++++++++++++++++++ feed.xml | 571 ++++++++++++++++--------- index.html | 14 +- news/index.html | 8 + 4 files changed, 922 insertions(+), 203 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/accumulo-website/blob/ac9e7868/blog/2016/12/19/running-on-fedora-25.html ---------------------------------------------------------------------- diff --git a/blog/2016/12/19/running-on-fedora-25.html b/blog/2016/12/19/running-on-fedora-25.html new file mode 100644 index 0000000..9d6170d --- /dev/null +++ b/blog/2016/12/19/running-on-fedora-25.html @@ -0,0 +1,532 @@ +<!DOCTYPE html> +<html lang="en"> +<head> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<meta charset="utf-8"> +<meta http-equiv="X-UA-Compatible" content="IE=edge"> +<meta name="viewport" content="width=device-width, initial-scale=1"> +<link href="https://maxcdn.bootstrapcdn.com/bootswatch/3.3.7/paper/bootstrap.min.css" rel="stylesheet" integrity="sha384-awusxf8AUojygHf2+joICySzB780jVvQaVCAt1clU3QsyAitLGul28Qxb2r1e5g+" crossorigin="anonymous"> +<link href="//netdna.bootstrapcdn.com/font-awesome/4.0.3/css/font-awesome.css" rel="stylesheet"> +<link rel="stylesheet" type="text/css" href="https://cdn.datatables.net/v/bs/jq-2.2.3/dt-1.10.12/datatables.min.css"> +<link href="/css/accumulo.css" rel="stylesheet" type="text/css"> + +<title>Running Accumulo on Fedora 25</title> + +<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.2.4/jquery.min.js"></script> +<script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/js/bootstrap.min.js" integrity="sha384-Tc5IQib027qvyjSMfHjOMaLkfuWVxZxUPnCJA7l2mCWNIpG9mGCD8wGNIcPD7Txa" crossorigin="anonymous"></script> +<script type="text/javascript" src="https://cdn.datatables.net/v/bs/jq-2.2.3/dt-1.10.12/datatables.min.js"></script> +<script> + // show location of canonical site if not currently on the canonical site + $(function() { + var host = window.location.host; + if (typeof host !== 'undefined' && host !== 'accumulo.apache.org') { + $('#non-canonical').show(); + } + }); + + $(function() { + // decorate section headers with anchors + return $("h2, h3, h4, h5, h6").each(function(i, el) { + var $el, icon, id; + $el = $(el); + id = $el.attr('id'); + icon = '<i class="fa fa-link"></i>'; + if (id) { + return $el.append($("<a />").addClass("header-link").attr("href", "#" + id).html(icon)); + } + }); + }); + + // configure Google Analytics + (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){ + (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o), + m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) + })(window,document,'script','//www.google-analytics.com/analytics.js','ga'); + + if (ga.hasOwnProperty('loaded') && ga.loaded === true) { + ga('create', 'UA-50934829-1', 'apache.org'); + ga('send', 'pageview'); + } +</script> + +</head> +<body style="padding-top: 100px"> + + <nav class="navbar navbar-default navbar-fixed-top"> + <div class="container"> + <div class="navbar-header"> + <button type="button" class="navbar-toggle" data-toggle="collapse" data-target="#navbar-items"> + <span class="sr-only">Toggle navigation</span> + <span class="icon-bar"></span> + <span class="icon-bar"></span> + <span class="icon-bar"></span> + </button> + <a href="/"><img id="nav-logo" alt="Apache Accumulo" class="img-responsive" src="/images/accumulo-logo.png" width="200"/></a> + </div> + <div class="collapse navbar-collapse" id="navbar-items"> + <ul class="nav navbar-nav"> + <li class="nav-link"><a href="/downloads">Download</a></li> + <li class="dropdown"> + <a class="dropdown-toggle" data-toggle="dropdown" href="#">Releases<span class="caret"></span></a> + <ul class="dropdown-menu"> + <li><a href="/release/accumulo-1.8.0/">1.8.0 (Latest)</a></li> + <li><a href="/release/accumulo-1.7.2/">1.7.2</a></li> + <li><a href="/release/accumulo-1.6.6/">1.6.6</a></li> + <li><a href="/release/">Archive</a></li> + </ul> + </li> + <li class="dropdown"> + <a class="dropdown-toggle" data-toggle="dropdown" href="#">Documentation<span class="caret"></span></a> + <ul class="dropdown-menu"> + <li><a href="/1.8/accumulo_user_manual.html">User Manual (1.8)</a></li> + <li><a href="/1.8/apidocs">Javadocs (1.8)</a></li> + <li><a href="/1.8/examples">Examples (1.8)</a></li> + <li><a href="/features">Features</a></li> + <li><a href="/glossary">Glossary</a></li> + <li><a href="/external-docs">External Docs</a></li> + <li><a href="/docs-archive/">Archive</a></li> + </ul> + </li> + <li class="dropdown"> + <a class="dropdown-toggle" data-toggle="dropdown" href="#">Community<span class="caret"></span></a> + <ul class="dropdown-menu"> + <li><a href="/get_involved">Get Involved</a></li> + <li><a href="/mailing_list">Mailing Lists</a></li> + <li><a href="/people">People</a></li> + <li><a href="/related-projects">Related Projects</a></li> + <li><a href="/contributor/">Contributor Guide</a></li> + </ul> + </li> + </ul> + <ul class="nav navbar-nav navbar-right"> + <li class="dropdown"> + <a class="dropdown-toggle" data-toggle="dropdown" href="#">Apache Software Foundation<span class="caret"></span></a> + <ul class="dropdown-menu"> + <li><a href="https://www.apache.org">Apache Homepage <i class="fa fa-external-link"></i></a></li> + <li><a href="https://www.apache.org/licenses/LICENSE-2.0">License <i class="fa fa-external-link"></i></a></li> + <li><a href="https://www.apache.org/foundation/sponsorship">Sponsorship <i class="fa fa-external-link"></i></a></li> + <li><a href="https://www.apache.org/security">Security <i class="fa fa-external-link"></i></a></li> + <li><a href="https://www.apache.org/foundation/thanks">Thanks <i class="fa fa-external-link"></i></a></li> + <li><a href="https://www.apache.org/foundation/policies/conduct">Code of Conduct <i class="fa fa-external-link"></i></a></li> + </ul> + </li> + </ul> + </div> + </div> +</nav> + + + <div class="container"> + <div class="row"> + <div class="col-md-12"> + + <div id="non-canonical" style="display: none; background-color: #F0E68C; padding-left: 1em;"> + Visit the official site at: <a href="https://accumulo.apache.org">https://accumulo.apache.org</a> + </div> + <div id="content"> + + <h1 class="title">Running Accumulo on Fedora 25</h1> + + <table> +<tr><td><b>Author</b></td><td> Christopher Tubbs and Mike Miller </td></tr> +<tr><td><b>Date </b></td><td> 19 Dec 2016 </td></tr> +<tr><td><b>Reviewer(s) </b></td><td> Keith Turner, Mike Walch </td></tr> +</table> +<p> + +<p>Apache Accumulo has been available in <a href="https://getfedora.org/">Fedora</a> since F20. Recently, the Fedora +packages have been updated to Accumulo version <code class="highlighter-rouge">1.6.6</code> and have made some +improvements to the default configuration and launch scripts to provide a good +out-of-box experience. This post will discuss the basic setup procedures for +running Accumulo in the latest version, <code class="highlighter-rouge">Fedora 25</code>.</p> + +<h2 id="prepare-the-system">Prepare the system</h2> + +<p><strong>WARNING</strong>: Before you start, be sure youâve got plenty of free disk space. +Otherwise, you could run into this <a href="https://bugzilla.redhat.com/show_bug.cgi?id=1404888">bug</a> or see other problems.</p> + +<p>These instructions will assume youâre using Fedora 25, fully up-to-date (<code class="highlighter-rouge">sudo +dnf --refresh upgrade</code>).</p> + +<h3 id="install-packages">Install packages</h3> + +<p>Fedora provides a meta-package to install Accumulo and all of its dependencies. +Itâs a good idea to install the JDK, so youâll have access to the <code class="highlighter-rouge">jps</code> +command, and <code class="highlighter-rouge">tuned</code> for setting system performance tuning parameters from a +profile. Itâs also a good idea to ensure the optional hadoop native libraries +are installed, and you have a good editor (replace <code class="highlighter-rouge">vim</code> with your preferred +editor):</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>sudo dnf install accumulo java-1.8.0-openjdk-devel tuned vim hadoop-common-native +</code></pre> +</div> + +<p>It is possible to install only a specific Accumulo service. For the single node +setup, almost everything is needed. For the multi-node setup, it might make +more sense to be selective about which you choose to install on each node (for +example, to only install <code class="highlighter-rouge">accumulo-tserver</code>).</p> + +<h3 id="set-up-tuned">Set up tuned</h3> + +<p>(Optional) <code class="highlighter-rouge">tuned</code> can optimize your server settings, adjusting things like +your <code class="highlighter-rouge">vm.swappiness</code>. To set up <code class="highlighter-rouge">tuned</code>, do:</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>sudo systemctl start tuned.service <span class="c"># start service</span> +sudo tuned-adm profile network-latency <span class="c"># pick a good profile</span> +sudo tuned-adm active <span class="c"># verify the selected profile</span> +sudo systemctl <span class="nb">enable </span>tuned.service <span class="c"># auto-start on reboots</span> +</code></pre> +</div> + +<h3 id="set-up-zookeeper">Set up ZooKeeper</h3> + +<p>Youâll need to set up ZooKeeper, regardless of whether youâll be running a +single node or many. So, letâs create its configuration file (the defaults are +fine):</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>sudo cp /etc/zookeeper/zoo_sample.cfg /etc/zookeeper/zoo.cfg +</code></pre> +</div> + +<p>Now, letâs start ZooKeeper (and set it to run on reboot):</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>sudo systemctl start zookeeper.service +sudo systemctl <span class="nb">enable </span>zookeeper.service +</code></pre> +</div> + +<p>Note that the default port for ZooKeeper is <code class="highlighter-rouge">2181</code>. Remember the hostname of +the node where ZooKeeper is running, referred to as <code class="highlighter-rouge"><zk-dns-name></code> later.</p> + +<h2 id="running-a-single-node">Running a single node</h2> + +<h3 id="configure-accumulo">Configure Accumulo</h3> + +<p>To run on a single node, you donât need to run HDFS. Accumulo can use the local +filesystem as a volume instead. By default, it uses <code class="highlighter-rouge">/tmp/accumulo</code>. Letâs +change that to something which will survive a reboot:</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>sudo vim /etc/accumulo/accumulo-site.xml +</code></pre> +</div> + +<p>Change the value of the <code class="highlighter-rouge">instance.volumes</code> property from <code class="highlighter-rouge">file:///tmp/accumulo</code> +to <code class="highlighter-rouge">file:///var/tmp/accumulo</code> in the configuration file (or another preferred +location).</p> + +<p>While you are editing the Accumulo configuration file, you should also change +the default <code class="highlighter-rouge">instance.secret</code> from <code class="highlighter-rouge">DEFAULT</code> to something else. You can also +change the credentials used by the <code class="highlighter-rouge">tracer</code> service now, too. If you use the +<code class="highlighter-rouge">root</code> user, youâll have to set its password to the same one youâll use later +when you initialize Accumulo. If you use another user name, youâll have to +create that user later.</p> + +<h3 id="configure-hadoop-client">Configure Hadoop client</h3> + +<p>Hadoopâs default local filesystem handler isnât very good at ensuring files are +written to disk when services are stopped. So, letâs use a better filesystem +implementation for <code class="highlighter-rouge">file://</code> locations. This implementation may not be as +robust as a full HDFS instance, but itâs more reliable than the default. Even +though youâre not going to be running HDFS, the Hadoop client code used in +Accumulo can still be configured by modifying Hadoopâs configuration file:</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>sudo vim /etc/hadoop/core-site.xml +</code></pre> +</div> + +<p>Add a new property:</p> + +<div class="language-xml highlighter-rouge"><pre class="highlight"><code> <span class="nt"><property></span> + <span class="nt"><name></span>fs.file.impl<span class="nt"></name></span> + <span class="nt"><value></span>org.apache.hadoop.fs.RawLocalFileSystem<span class="nt"></value></span> + <span class="nt"></property></span> +</code></pre> +</div> + +<h3 id="initialize-accumulo">Initialize Accumulo</h3> + +<p>Now, initialize Accumulo. Youâll need to do this as the <code class="highlighter-rouge">accumulo</code> user, +because the Accumulo services run as the <code class="highlighter-rouge">accumulo</code> user. This user is created +automatically by the RPMs if it doesnât exist when the RPMs are installed. If +you already have a user and/or group by this name, it will probably not be a +problem, but be aware that this user will have permissions for the server +configuration files. To initialize Accumulo as a specific user, use <code class="highlighter-rouge">sudo -u</code>:</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>sudo -u accumulo accumulo init +</code></pre> +</div> + +<p>As expected, this command will fail if ZooKeeper is not running, or if the +destination volume (<code class="highlighter-rouge">file:///var/tmp/accumulo</code>) already exists.</p> + +<h3 id="start-accumulo-services">Start Accumulo services</h3> + +<p>Now that Accumulo is initialized, you can start its services:</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>sudo systemctl start accumulo-<span class="o">{</span>master,tserver,gc,tracer,monitor<span class="o">}</span>.service +</code></pre> +</div> + +<p>Enable the commands to start at boot:</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>sudo systemctl <span class="nb">enable </span>accumulo-<span class="o">{</span>master,tserver,gc,tracer,monitor<span class="o">}</span>.service +</code></pre> +</div> + +<h2 id="running-multiple-nodes">Running multiple nodes</h2> + +<h3 id="amazon-ec2-setup">Amazon EC2 setup</h3> + +<p>For a multi-node setup, the authors tested these instructions with a Fedora 25 +Cloud AMI on Amazon EC2 with the following characteristics:</p> + +<ul> + <li><code class="highlighter-rouge">us-east-1</code> availability zone</li> + <li><code class="highlighter-rouge">ami-e5757bf2</code> (latest in <code class="highlighter-rouge">us-east-1</code> at time of writing)</li> + <li><code class="highlighter-rouge">HVM</code> virtualization type</li> + <li><code class="highlighter-rouge">gp2</code> disk type</li> + <li><code class="highlighter-rouge">64GB EBS</code> root volume (no additional storage)</li> + <li><code class="highlighter-rouge">m4.large</code> and <code class="highlighter-rouge">m4.xlarge</code> instance types (tested on both)</li> + <li><code class="highlighter-rouge">3</code> nodes</li> +</ul> + +<p>For this setup, you should have a name service configured properly. For +convenience, we used the EC2 provided internal DNS, with internal IP addresses. +Make sure the nodes can communicate with each other using these names. If +youâre using EC2, this means making sure they are in the same security group, +and the security group has an inbound rule for âAll trafficâ with the source +set to itself (<code class="highlighter-rouge">sg-xxxxxxxx</code>).</p> + +<p>The default user is <code class="highlighter-rouge">fedora</code> for the Fedora Cloud AMIs. For the best +experience, donât forget to make sure they are fully up-to-date (<code class="highlighter-rouge">sudo dnf +--refresh upgrade</code>).</p> + +<h3 id="configure-and-run-hadoop">Configure and run Hadoop</h3> + +<p>Configuring HDFS is the primary difference between the single and multi-node +setup. For both Hadoop and Accumulo, you can edit the configuration files on +one machine, and copy them to the others.</p> + +<p>Pick a server to be the NameNode and identify its DNS name, +(<code class="highlighter-rouge"><namenode-dns-name></code>). Edit Hadoopâs configuration to set the default +filesystem name to this location:</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>sudo vim /etc/hadoop/core-site.xml +</code></pre> +</div> + +<p>Set the value for the property <code class="highlighter-rouge">fs.default.name</code> to +<code class="highlighter-rouge">hdfs://<namenode-dns-name>:8020</code>.</p> + +<p>Distribute copies of the changed configuration files to each node.</p> + +<p>Now, format the NameNode. Youâll need to do this as the <code class="highlighter-rouge">hdfs</code> user on the +NameNode instance:</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>sudo -u hdfs hdfs namenode -format +</code></pre> +</div> + +<p>On the NameNode, start the NameNode service and enable it on reboot:</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>sudo systemctl start hadoop-namenode.service +sudo systemctl <span class="nb">enable </span>hadoop-namenode.service +</code></pre> +</div> + +<p>On each DataNode, start the DataNode service:</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>sudo systemctl start hadoop-datanode.service +sudo systemctl <span class="nb">enable </span>hadoop-datanode.service +</code></pre> +</div> + +<h3 id="configure-and-run-accumulo">Configure and run Accumulo</h3> + +<p>Update Accumuloâs configuration to use this HDFS filesystem:</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>sudo vim /etc/accumulo/accumulo-site.xml +</code></pre> +</div> + +<p>Change the value of the <code class="highlighter-rouge">instance.volumes</code> to +<code class="highlighter-rouge">hdfs://<namenode-dns-name>:8020/accumulo</code> in the configuration file. Donât +forget to also change the default <code class="highlighter-rouge">instance.secret</code> and the trace userâs +credentials, if necessary. Also, since you will have multiple nodes, you cannot +use <code class="highlighter-rouge">localhost:2181</code> for ZooKeeper, so set <code class="highlighter-rouge">instance.zookeeper.host</code> to +<code class="highlighter-rouge"><zk-dns-name>:2181</code>.</p> + +<p>Distribute copies of the changed configuration files to each node.</p> + +<p>With HDFS now running, make sure Accumulo has permission to create its +directory in HDFS, and initialize Accumulo:</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>sudo -u hdfs hdfs dfs -chmod 777 / +sudo -u accumulo accumulo init +</code></pre> +</div> + +<p>After Accumulo has created its directory structure, you can change the +permissions for the root back to what they were:</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>sudo -u hdfs hdfs dfs -chmod 755 / +</code></pre> +</div> + +<p>Now, you can start Accumulo.</p> + +<p>On the NameNode, start all the Accumulo services and enable on reboot:</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>sudo systemctl start accumulo-<span class="o">{</span>master,tserver,gc,tracer,monitor<span class="o">}</span>.service +sudo systemctl <span class="nb">enable </span>accumulo-<span class="o">{</span>master,tserver,gc,tracer,monitor<span class="o">}</span>.service +</code></pre> +</div> + +<p>On each DataNode, start just the <code class="highlighter-rouge">tserver</code> and enable it on reboot:</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>sudo systemctl start accumulo-tserver.service +sudo systemctl <span class="nb">enable </span>accumulo-tserver.service +</code></pre> +</div> + +<h2 id="watching-and-using-accumulo">Watching and using Accumulo</h2> + +<h3 id="run-the-shell">Run the shell</h3> + +<p>Run a shell as Accumuloâs root user (the instance name and root password are +the ones you selected during the initialize step above:</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>accumulo shell -u root -zh <zk-dns-name>:2181 -zi <instanceName> +</code></pre> +</div> + +<h3 id="view-the-monitor-pages">View the monitor pages</h3> + +<p>You should also be able to view the NameNode monitor page and the Accumulo +monitor pages. If you are running this in EC2, you can view these over an SSH +tunnel using the NameNodeâs public IP address. If you didnât give this node a +public IP address, you can allocate one in EC2 and associate it with this node:</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>ssh -L50070:localhost:50070 -L50095:localhost:50095 <user>@<host> +</code></pre> +</div> + +<p>Replace <code class="highlighter-rouge"><user></code> with your username (probably <code class="highlighter-rouge">fedora</code> if using the Fedora +AMI), and <code class="highlighter-rouge"><host></code> with the public IP or hostname for your EC2 instance. Now, +in your local browser, you should be able to navigate to these addresses in +your localhost: <a href="http://localhost:50070">Hadoop monitor (http://localhost:50070)</a> and <a href="http://localhost:50095">Accumulo +monitor (http://localhost:50095)</a>.</p> + +<h2 id="debugging-commands">Debugging commands</h2> + +<p>Check the status of a service:</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>sudo systemctl status <ServiceName>.service +</code></pre> +</div> + +<p>Check running Java processes:</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>sudo jps -ml +</code></pre> +</div> + +<p>Check the system logs for a specific service within the last 10 minutes:</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>sudo journalctl -u <ServiceName> --since <span class="s1">'10 minutes ago'</span> +</code></pre> +</div> + +<p>Check listening ports:</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>sudo netstat -tlnp +</code></pre> +</div> + +<p>Check DNS name for a given IP address:</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>getent hosts <ipaddress> <span class="c"># OR</span> +hostname -A +</code></pre> +</div> + +<p>Perform forward and reverse DNS lookups:</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>sudo dnf install <span class="nb">bind</span>-utils +dig +short <hostname> <span class="c"># forward DNS lookup</span> +dig +short -x <ipaddress> <span class="c"># reverse DNS lookup</span> +</code></pre> +</div> + +<p>Find the instance ID for your instance name:</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>zkCli.sh -server <host>:2181 <span class="c"># replace <host> with your ZooKeeper server DNS name</span> +<span class="gp">> </span>get /accumulo/instances/<name> <span class="c"># replace <name> with your instance name</span> +<span class="gp">> </span>quit +</code></pre> +</div> + +<p>If the NameNode is listening on the loopback address, youâll probably need to +restart the service manually, as well as any Accumulo services which failed. +This is a <a href="https://bugzilla.redhat.com/show_bug.cgi?id=1406165">known issue with Hadoop</a>:</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>sudo systemctl restart hadoop-namenode.service +</code></pre> +</div> + +<p>Some helpful rpm commands:</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>rpm -q -i <installed-package-name> <span class="c"># to see info about an installed package</span> +rpm -q -i -p <rpm-file-name> <span class="c"># to see info about an rpm file</span> +rpm -q --provides <installed-package-name> <span class="c"># see what a package provides</span> +rpm -q --requires <installed-package-name> <span class="c"># see what a package requires</span> +rpm -q -l <installed-package-name> <span class="c"># list package files</span> +rpm -q --whatprovides <file> <span class="c"># find rpm which owns <file></span> +rpm -q --whatrequires <span class="s1">'mvn(groupId:artifactId)'</span> <span class="c"># find rpm which requires maven coords</span> +</code></pre> +</div> + +<h2 id="helping-out">Helping out</h2> + +<p>Feel free to get involved with the <a href="https://fedoraproject.org/wiki/Join_the_package_collection_maintainers">Fedora</a> or <a href="https://fedoraproject.org/wiki/EPEL">Fedora EPEL</a> +(for RHEL/CentOS users) packaging. Contact the Fedora <a href="https://admin.fedoraproject.org/pkgdb/package/rpms/accumulo/">maintainers</a> (user <code class="highlighter-rouge">at</code> +fedoraproject <code class="highlighter-rouge">dot</code> org) for the Accumulo packages to see how you can help +patching bugs, adapting the upstream packages to the Fedora packaging +standards, testing updates, maintaining dependency packages, and more.</p> + + + +<p><strong>View all posts in the <a href="/news">news archive</a></strong></p> + + </div> + + +<footer> + + <p><a href="https://www.apache.org"><img src="/images/feather-small.gif" alt="Apache Software Foundation" id="asf-logo" height="100" /></a></p> + + <p>Copyright © 2011-2016 The Apache Software Foundation. Licensed under the <a href="https://www.apache.org/licenses/LICENSE-2.0">Apache License, Version 2.0</a>.</p> + +</footer> + + + </div> + </div> + </div> +</body> +</html> http://git-wip-us.apache.org/repos/asf/accumulo-website/blob/ac9e7868/feed.xml ---------------------------------------------------------------------- diff --git a/feed.xml b/feed.xml index 7f7d62c..bc019fd 100644 --- a/feed.xml +++ b/feed.xml @@ -6,9 +6,381 @@ </description> <link>https://accumulo.apache.org/</link> <atom:link href="https://accumulo.apache.org/feed.xml" rel="self" type="application/rss+xml"/> - <pubDate>Wed, 14 Dec 2016 09:56:14 -0500</pubDate> - <lastBuildDate>Wed, 14 Dec 2016 09:56:14 -0500</lastBuildDate> - <generator>Jekyll v3.3.0</generator> + <pubDate>Mon, 19 Dec 2016 18:41:49 -0500</pubDate> + <lastBuildDate>Mon, 19 Dec 2016 18:41:49 -0500</lastBuildDate> + <generator>Jekyll v3.3.1</generator> + + <item> + <title>Running Accumulo on Fedora 25</title> + <description><p>Apache Accumulo has been available in <a href="https://getfedora.org/">Fedora</a> since F20. Recently, the Fedora +packages have been updated to Accumulo version <code class="highlighter-rouge">1.6.6</code> and have made some +improvements to the default configuration and launch scripts to provide a good +out-of-box experience. This post will discuss the basic setup procedures for +running Accumulo in the latest version, <code class="highlighter-rouge">Fedora 25</code>.</p> + +<h2 id="prepare-the-system">Prepare the system</h2> + +<p><strong>WARNING</strong>: Before you start, be sure youâve got plenty of free disk space. +Otherwise, you could run into this <a href="https://bugzilla.redhat.com/show_bug.cgi?id=1404888">bug</a> or see other problems.</p> + +<p>These instructions will assume youâre using Fedora 25, fully up-to-date (<code class="highlighter-rouge">sudo +dnf --refresh upgrade</code>).</p> + +<h3 id="install-packages">Install packages</h3> + +<p>Fedora provides a meta-package to install Accumulo and all of its dependencies. +Itâs a good idea to install the JDK, so youâll have access to the <code class="highlighter-rouge">jps</code> +command, and <code class="highlighter-rouge">tuned</code> for setting system performance tuning parameters from a +profile. Itâs also a good idea to ensure the optional hadoop native libraries +are installed, and you have a good editor (replace <code class="highlighter-rouge">vim</code> with your preferred +editor):</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>sudo dnf install accumulo java-1.8.0-openjdk-devel tuned vim hadoop-common-native +</code></pre> +</div> + +<p>It is possible to install only a specific Accumulo service. For the single node +setup, almost everything is needed. For the multi-node setup, it might make +more sense to be selective about which you choose to install on each node (for +example, to only install <code class="highlighter-rouge">accumulo-tserver</code>).</p> + +<h3 id="set-up-tuned">Set up tuned</h3> + +<p>(Optional) <code class="highlighter-rouge">tuned</code> can optimize your server settings, adjusting things like +your <code class="highlighter-rouge">vm.swappiness</code>. To set up <code class="highlighter-rouge">tuned</code>, do:</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>sudo systemctl start tuned.service <span class="c"># start service</span> +sudo tuned-adm profile network-latency <span class="c"># pick a good profile</span> +sudo tuned-adm active <span class="c"># verify the selected profile</span> +sudo systemctl <span class="nb">enable </span>tuned.service <span class="c"># auto-start on reboots</span> +</code></pre> +</div> + +<h3 id="set-up-zookeeper">Set up ZooKeeper</h3> + +<p>Youâll need to set up ZooKeeper, regardless of whether youâll be running a +single node or many. So, letâs create its configuration file (the defaults are +fine):</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>sudo cp /etc/zookeeper/zoo_sample.cfg /etc/zookeeper/zoo.cfg +</code></pre> +</div> + +<p>Now, letâs start ZooKeeper (and set it to run on reboot):</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>sudo systemctl start zookeeper.service +sudo systemctl <span class="nb">enable </span>zookeeper.service +</code></pre> +</div> + +<p>Note that the default port for ZooKeeper is <code class="highlighter-rouge">2181</code>. Remember the hostname of +the node where ZooKeeper is running, referred to as <code class="highlighter-rouge">&lt;zk-dns-name&gt;</code> later.</p> + +<h2 id="running-a-single-node">Running a single node</h2> + +<h3 id="configure-accumulo">Configure Accumulo</h3> + +<p>To run on a single node, you donât need to run HDFS. Accumulo can use the local +filesystem as a volume instead. By default, it uses <code class="highlighter-rouge">/tmp/accumulo</code>. Letâs +change that to something which will survive a reboot:</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>sudo vim /etc/accumulo/accumulo-site.xml +</code></pre> +</div> + +<p>Change the value of the <code class="highlighter-rouge">instance.volumes</code> property from <code class="highlighter-rouge">file:///tmp/accumulo</code> +to <code class="highlighter-rouge">file:///var/tmp/accumulo</code> in the configuration file (or another preferred +location).</p> + +<p>While you are editing the Accumulo configuration file, you should also change +the default <code class="highlighter-rouge">instance.secret</code> from <code class="highlighter-rouge">DEFAULT</code> to something else. You can also +change the credentials used by the <code class="highlighter-rouge">tracer</code> service now, too. If you use the +<code class="highlighter-rouge">root</code> user, youâll have to set its password to the same one youâll use later +when you initialize Accumulo. If you use another user name, youâll have to +create that user later.</p> + +<h3 id="configure-hadoop-client">Configure Hadoop client</h3> + +<p>Hadoopâs default local filesystem handler isnât very good at ensuring files are +written to disk when services are stopped. So, letâs use a better filesystem +implementation for <code class="highlighter-rouge">file://</code> locations. This implementation may not be as +robust as a full HDFS instance, but itâs more reliable than the default. Even +though youâre not going to be running HDFS, the Hadoop client code used in +Accumulo can still be configured by modifying Hadoopâs configuration file:</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>sudo vim /etc/hadoop/core-site.xml +</code></pre> +</div> + +<p>Add a new property:</p> + +<div class="language-xml highlighter-rouge"><pre class="highlight"><code> <span class="nt">&lt;property&gt;</span> + <span class="nt">&lt;name&gt;</span>fs.file.impl<span class="nt">&lt;/name&gt;</span> + <span class="nt">&lt;value&gt;</span>org.apache.hadoop.fs.RawLocalFileSystem<span class="nt">&lt;/value&gt;</span> + <span class="nt">&lt;/property&gt;</span> +</code></pre> +</div> + +<h3 id="initialize-accumulo">Initialize Accumulo</h3> + +<p>Now, initialize Accumulo. Youâll need to do this as the <code class="highlighter-rouge">accumulo</code> user, +because the Accumulo services run as the <code class="highlighter-rouge">accumulo</code> user. This user is created +automatically by the RPMs if it doesnât exist when the RPMs are installed. If +you already have a user and/or group by this name, it will probably not be a +problem, but be aware that this user will have permissions for the server +configuration files. To initialize Accumulo as a specific user, use <code class="highlighter-rouge">sudo -u</code>:</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>sudo -u accumulo accumulo init +</code></pre> +</div> + +<p>As expected, this command will fail if ZooKeeper is not running, or if the +destination volume (<code class="highlighter-rouge">file:///var/tmp/accumulo</code>) already exists.</p> + +<h3 id="start-accumulo-services">Start Accumulo services</h3> + +<p>Now that Accumulo is initialized, you can start its services:</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>sudo systemctl start accumulo-<span class="o">{</span>master,tserver,gc,tracer,monitor<span class="o">}</span>.service +</code></pre> +</div> + +<p>Enable the commands to start at boot:</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>sudo systemctl <span class="nb">enable </span>accumulo-<span class="o">{</span>master,tserver,gc,tracer,monitor<span class="o">}</span>.service +</code></pre> +</div> + +<h2 id="running-multiple-nodes">Running multiple nodes</h2> + +<h3 id="amazon-ec2-setup">Amazon EC2 setup</h3> + +<p>For a multi-node setup, the authors tested these instructions with a Fedora 25 +Cloud AMI on Amazon EC2 with the following characteristics:</p> + +<ul> + <li><code class="highlighter-rouge">us-east-1</code> availability zone</li> + <li><code class="highlighter-rouge">ami-e5757bf2</code> (latest in <code class="highlighter-rouge">us-east-1</code> at time of writing)</li> + <li><code class="highlighter-rouge">HVM</code> virtualization type</li> + <li><code class="highlighter-rouge">gp2</code> disk type</li> + <li><code class="highlighter-rouge">64GB EBS</code> root volume (no additional storage)</li> + <li><code class="highlighter-rouge">m4.large</code> and <code class="highlighter-rouge">m4.xlarge</code> instance types (tested on both)</li> + <li><code class="highlighter-rouge">3</code> nodes</li> +</ul> + +<p>For this setup, you should have a name service configured properly. For +convenience, we used the EC2 provided internal DNS, with internal IP addresses. +Make sure the nodes can communicate with each other using these names. If +youâre using EC2, this means making sure they are in the same security group, +and the security group has an inbound rule for âAll trafficâ with the source +set to itself (<code class="highlighter-rouge">sg-xxxxxxxx</code>).</p> + +<p>The default user is <code class="highlighter-rouge">fedora</code> for the Fedora Cloud AMIs. For the best +experience, donât forget to make sure they are fully up-to-date (<code class="highlighter-rouge">sudo dnf +--refresh upgrade</code>).</p> + +<h3 id="configure-and-run-hadoop">Configure and run Hadoop</h3> + +<p>Configuring HDFS is the primary difference between the single and multi-node +setup. For both Hadoop and Accumulo, you can edit the configuration files on +one machine, and copy them to the others.</p> + +<p>Pick a server to be the NameNode and identify its DNS name, +(<code class="highlighter-rouge">&lt;namenode-dns-name&gt;</code>). Edit Hadoopâs configuration to set the default +filesystem name to this location:</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>sudo vim /etc/hadoop/core-site.xml +</code></pre> +</div> + +<p>Set the value for the property <code class="highlighter-rouge">fs.default.name</code> to +<code class="highlighter-rouge">hdfs://&lt;namenode-dns-name&gt;:8020</code>.</p> + +<p>Distribute copies of the changed configuration files to each node.</p> + +<p>Now, format the NameNode. Youâll need to do this as the <code class="highlighter-rouge">hdfs</code> user on the +NameNode instance:</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>sudo -u hdfs hdfs namenode -format +</code></pre> +</div> + +<p>On the NameNode, start the NameNode service and enable it on reboot:</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>sudo systemctl start hadoop-namenode.service +sudo systemctl <span class="nb">enable </span>hadoop-namenode.service +</code></pre> +</div> + +<p>On each DataNode, start the DataNode service:</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>sudo systemctl start hadoop-datanode.service +sudo systemctl <span class="nb">enable </span>hadoop-datanode.service +</code></pre> +</div> + +<h3 id="configure-and-run-accumulo">Configure and run Accumulo</h3> + +<p>Update Accumuloâs configuration to use this HDFS filesystem:</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>sudo vim /etc/accumulo/accumulo-site.xml +</code></pre> +</div> + +<p>Change the value of the <code class="highlighter-rouge">instance.volumes</code> to +<code class="highlighter-rouge">hdfs://&lt;namenode-dns-name&gt;:8020/accumulo</code> in the configuration file. Donât +forget to also change the default <code class="highlighter-rouge">instance.secret</code> and the trace userâs +credentials, if necessary. Also, since you will have multiple nodes, you cannot +use <code class="highlighter-rouge">localhost:2181</code> for ZooKeeper, so set <code class="highlighter-rouge">instance.zookeeper.host</code> to +<code class="highlighter-rouge">&lt;zk-dns-name&gt;:2181</code>.</p> + +<p>Distribute copies of the changed configuration files to each node.</p> + +<p>With HDFS now running, make sure Accumulo has permission to create its +directory in HDFS, and initialize Accumulo:</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>sudo -u hdfs hdfs dfs -chmod 777 / +sudo -u accumulo accumulo init +</code></pre> +</div> + +<p>After Accumulo has created its directory structure, you can change the +permissions for the root back to what they were:</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>sudo -u hdfs hdfs dfs -chmod 755 / +</code></pre> +</div> + +<p>Now, you can start Accumulo.</p> + +<p>On the NameNode, start all the Accumulo services and enable on reboot:</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>sudo systemctl start accumulo-<span class="o">{</span>master,tserver,gc,tracer,monitor<span class="o">}</span>.service +sudo systemctl <span class="nb">enable </span>accumulo-<span class="o">{</span>master,tserver,gc,tracer,monitor<span class="o">}</span>.service +</code></pre> +</div> + +<p>On each DataNode, start just the <code class="highlighter-rouge">tserver</code> and enable it on reboot:</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>sudo systemctl start accumulo-tserver.service +sudo systemctl <span class="nb">enable </span>accumulo-tserver.service +</code></pre> +</div> + +<h2 id="watching-and-using-accumulo">Watching and using Accumulo</h2> + +<h3 id="run-the-shell">Run the shell</h3> + +<p>Run a shell as Accumuloâs root user (the instance name and root password are +the ones you selected during the initialize step above:</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>accumulo shell -u root -zh &lt;zk-dns-name&gt;:2181 -zi &lt;instanceName&gt; +</code></pre> +</div> + +<h3 id="view-the-monitor-pages">View the monitor pages</h3> + +<p>You should also be able to view the NameNode monitor page and the Accumulo +monitor pages. If you are running this in EC2, you can view these over an SSH +tunnel using the NameNodeâs public IP address. If you didnât give this node a +public IP address, you can allocate one in EC2 and associate it with this node:</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>ssh -L50070:localhost:50070 -L50095:localhost:50095 &lt;user&gt;@&lt;host&gt; +</code></pre> +</div> + +<p>Replace <code class="highlighter-rouge">&lt;user&gt;</code> with your username (probably <code class="highlighter-rouge">fedora</code> if using the Fedora +AMI), and <code class="highlighter-rouge">&lt;host&gt;</code> with the public IP or hostname for your EC2 instance. Now, +in your local browser, you should be able to navigate to these addresses in +your localhost: <a href="http://localhost:50070">Hadoop monitor (http://localhost:50070)</a> and <a href="http://localhost:50095">Accumulo +monitor (http://localhost:50095)</a>.</p> + +<h2 id="debugging-commands">Debugging commands</h2> + +<p>Check the status of a service:</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>sudo systemctl status &lt;ServiceName&gt;.service +</code></pre> +</div> + +<p>Check running Java processes:</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>sudo jps -ml +</code></pre> +</div> + +<p>Check the system logs for a specific service within the last 10 minutes:</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>sudo journalctl -u &lt;ServiceName&gt; --since <span class="s1">'10 minutes ago'</span> +</code></pre> +</div> + +<p>Check listening ports:</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>sudo netstat -tlnp +</code></pre> +</div> + +<p>Check DNS name for a given IP address:</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>getent hosts &lt;ipaddress&gt; <span class="c"># OR</span> +hostname -A +</code></pre> +</div> + +<p>Perform forward and reverse DNS lookups:</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>sudo dnf install <span class="nb">bind</span>-utils +dig +short &lt;hostname&gt; <span class="c"># forward DNS lookup</span> +dig +short -x &lt;ipaddress&gt; <span class="c"># reverse DNS lookup</span> +</code></pre> +</div> + +<p>Find the instance ID for your instance name:</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>zkCli.sh -server &lt;host&gt;:2181 <span class="c"># replace &lt;host&gt; with your ZooKeeper server DNS name</span> +<span class="gp">&gt; </span>get /accumulo/instances/&lt;name&gt; <span class="c"># replace &lt;name&gt; with your instance name</span> +<span class="gp">&gt; </span>quit +</code></pre> +</div> + +<p>If the NameNode is listening on the loopback address, youâll probably need to +restart the service manually, as well as any Accumulo services which failed. +This is a <a href="https://bugzilla.redhat.com/show_bug.cgi?id=1406165">known issue with Hadoop</a>:</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>sudo systemctl restart hadoop-namenode.service +</code></pre> +</div> + +<p>Some helpful rpm commands:</p> + +<div class="language-bash highlighter-rouge"><pre class="highlight"><code>rpm -q -i &lt;installed-package-name&gt; <span class="c"># to see info about an installed package</span> +rpm -q -i -p &lt;rpm-file-name&gt; <span class="c"># to see info about an rpm file</span> +rpm -q --provides &lt;installed-package-name&gt; <span class="c"># see what a package provides</span> +rpm -q --requires &lt;installed-package-name&gt; <span class="c"># see what a package requires</span> +rpm -q -l &lt;installed-package-name&gt; <span class="c"># list package files</span> +rpm -q --whatprovides &lt;file&gt; <span class="c"># find rpm which owns &lt;file&gt;</span> +rpm -q --whatrequires <span class="s1">'mvn(groupId:artifactId)'</span> <span class="c"># find rpm which requires maven coords</span> +</code></pre> +</div> + +<h2 id="helping-out">Helping out</h2> + +<p>Feel free to get involved with the <a href="https://fedoraproject.org/wiki/Join_the_package_collection_maintainers">Fedora</a> or <a href="https://fedoraproject.org/wiki/EPEL">Fedora EPEL</a> +(for RHEL/CentOS users) packaging. Contact the Fedora <a href="https://admin.fedoraproject.org/pkgdb/package/rpms/accumulo/">maintainers</a> (user <code class="highlighter-rouge">at</code> +fedoraproject <code class="highlighter-rouge">dot</code> org) for the Accumulo packages to see how you can help +patching bugs, adapting the upstream packages to the Fedora packaging +standards, testing updates, maintaining dependency packages, and more.</p> + +</description> + <pubDate>Mon, 19 Dec 2016 00:00:00 -0500</pubDate> + <link>https://accumulo.apache.org/blog/2016/12/19/running-on-fedora-25.html</link> + <guid isPermaLink="true">https://accumulo.apache.org/blog/2016/12/19/running-on-fedora-25.html</guid> + + + <category>blog</category> + + </item> <item> <title>Simpler scripts and configuration coming in Accumulo 2.0.0</title> @@ -1373,198 +1745,5 @@ HDFS High-Availability instances, forcing NameNode fail-over.</p> </item> - <item> - <title>Apache Accumulo 1.5.2</title> - <description><p>Apache Accumulo 1.5.2 is a maintenance release on the 1.5 version branch. -This release contains changes from over 100 issues, comprised of bug fixes -(client side and server side), new test cases, and updated Hadoop support -contributed by over 30 different contributors and committers.</p> - -<p>Below are resources for this release:</p> - -<ul> - <li><a href="/1.5/accumulo_user_manual.html">User Manual</a></li> - <li><a href="/1.5/apidocs">Javadocs</a></li> - <li><a href="/1.5/examples">Examples</a></li> -</ul> - -<p>As this is a maintenance release, Apache Accumulo 1.5.2 has no client API -incompatibilities over Apache Accumulo 1.5.0 and 1.5.1 and requires no manual upgrade -process. Users of 1.5.0 or 1.5.1 are strongly encouraged to update as soon as possible -to benefit from the improvements.</p> - -<p>Users who are new to Accumulo are encouraged to use a 1.6 release as opposed -to the 1.5 line as development has already shifted towards the 1.6 line. For those -who cannot or do not want to upgrade to 1.6, 1.5.2 is still an excellent choice -over earlier versions in the 1.5 line.</p> - -<h2 id="performance-improvements">Performance Improvements</h2> - -<p>Apache Accumulo 1.5.2 includes a number of performance-related fixes over previous versions.</p> - -<h3 id="write-ahead-log-sync-performance">Write-Ahead Log sync performance</h3> - -<p>The Write-Ahead Log (WAL) files are used to ensure durability of updates made to Accumulo. -A sync is called on the file in HDFS to make sure that the changes to the WAL are persisted -to disk, which allows Accumulo to recover in the case of failure. <a href="https://issues.apache.org/jira/browse/ACCUMULO-2766">ACCUMULO-2766</a> fixed -an issue where an operation against a WAL would unnecessarily wait for multiple syncs, slowing -down the ingest on the system.</p> - -<h3 id="minor-compactions-not-aggressive-enough">Minor-Compactions not aggressive enough</h3> - -<p>On a system with ample memory provided to Accumulo, long hold-times were observed which -blocks the ingest of new updates. Trying to free more server-side memory by running minor -compactions more frequently increased the overall throughput on the node. These changes -were made in <a href="https://issues.apache.org/jira/browse/ACCUMULO-2905">ACCUMULO-2905</a>.</p> - -<h3 id="heapiterator-optimization">HeapIterator optimization</h3> - -<p>Iterators, a notable feature of Accumulo, are provided to users as a server-side programming -construct, but are also used internally for numerous server operations. One of these system iterator -is the HeapIterator which implements a PriorityQueue of other Iterators. One way this iterator is -used is to merge multiple files in HDFS to present a single, sorted stream of Key-Value pairs. <a href="https://issues.apache.org/jira/browse/ACCUMULO-2827">ACCUMULO-2827</a> -introduces a performance optimization to the HeapIterator which can improve the speed of the -HeapIterator in common cases.</p> - -<h3 id="write-ahead-log-sync-implementation">Write-Ahead log sync implementation</h3> - -<p>In Hadoop-2, two implementations of sync are provided: hflush and hsync. Both of these -methods provide a way to request that the datanodes write the data to the underlying -medium and not just hold it in memory (the <em>fsync</em> syscall). While both of these methods -inform the Datanodes to sync the relevant block(s), <em>hflush</em> does not wait for acknowledgement -from the Datanodes that the sync finished, where <em>hsync</em> does. To provide the most reliable system -âout of the boxâ, Accumulo defaults to <em>hsync</em> so that your data is as secure as possible in -a variety of situations (notably, unexpected power outages).</p> - -<p>The downside is that performance tends to suffer because waiting for a sync to disk is a very -expensive operation. <a href="https://issues.apache.org/jira/browse/ACCUMULO-2842">ACCUMULO-2842</a> introduces a new system property, tserver.wal.sync.method, -that lets users to change the HDFS sync implementation from <em>hsync</em> to <em>hflush</em>. Using <em>hflush</em> instead -of <em>hsync</em> may result in about a 30% increase in ingest performance.</p> - -<p>For users upgrading from Hadoop-1 or Hadoop-0.20 releases, <em>hflush</em> is the equivalent of how -sync was implemented in these older versions of Hadoop and should give comparable performance.</p> - -<h3 id="server-side-mutation-queue-size">Server-side mutation queue size</h3> - -<p>When users desire writes to be as durable as possible, using <em>hsync</em>, the ingest performance -of the system can be improved by increasing the tserver.mutation.queue.max property. The cost -of this change is that it will cause TabletServers to use additional memory per writer. In 1.5.1, -the value of this parameter defaulted to a conservative 256K, which resulted in sub-par ingest -performance.</p> - -<p>1.5.2 and <a href="https://issues.apache.org/jira/browse/ACCUMULO-3018">ACCUMULO-3018</a> increases this buffer to 1M which has a noticeable positive impact on -ingest performance with a minimal increase in TabletServer memory usage.</p> - -<h2 id="notable-bug-fixes">Notable Bug Fixes</h2> - -<h3 id="fixes-mapreduce-package-name-change">Fixes MapReduce package name change</h3> - -<p>1.5.1 inadvertently included a change to RangeInputSplit which created an incompatibility -with 1.5.0. The original class has been restored to ensure that users accessing -the RangeInputSplit class do not have to alter their client code. See <a href="https://issues.apache.org/jira/browse/ACCUMULO-2586">ACCUMULO-2586</a> for -more information</p> - -<h3 id="add-configurable-maximum-frame-size-to-apache-thrift-proxy">Add configurable maximum frame size to Apache Thrift proxy</h3> - -<p>The Thrift proxy server was subject to memory exhaustion, typically -due to bad input, where the server would attempt to allocate a very large -buffer and die in the process. <a href="https://issues.apache.org/jira/browse/ACCUMULO-2658">ACCUMULO-2658</a> introduces a configuration -parameter, like <a href="https://issues.apache.org/jira/browse/ACCUMULO-2360">ACCUMULO-2360</a>, to prevent this error.</p> - -<h3 id="offline-tables-can-prevent-tablet-balancing">Offline tables can prevent tablet balancing</h3> - -<p>Before 1.5.2, when a table with many tablets was created, ingested into, and -taken offline, tablet balancing may have stoppped. This would happen if there -were tablet migrations for the table, because the migrations couldnât occur. -The balancer will not run when there are outstanding migrations; therefore, a -system could become unbalanced. <a href="https://issues.apache.org/jira/browse/ACCUMULO-2694">ACCUMULO-2694</a> introduces a fix to ensure -that offline tables do not block balancing and improves the server-side -logging.</p> - -<h3 id="miniaccumulocluster-process-management">MiniAccumuloCluster process management</h3> - -<p>MiniAccumuloCluster had a few issues which could cause deadlock or a method that -never returns. Most of these are related to management of the Accumulo processes -(<a href="https://issues.apache.org/jira/browse/ACCUMULO-2764">ACCUMULO-2764</a>, <a href="https://issues.apache.org/jira/browse/ACCUMULO-2985">ACCUMULO-2985</a>, and <a href="https://issues.apache.org/jira/browse/ACCUMULO-3055">ACCUMULO-3055</a>).</p> - -<h3 id="iteratorsettings-not-correctly-serialized-in-rangeinputsplit">IteratorSettings not correctly serialized in RangeInputSplit</h3> - -<p>The Writable interface methods on the RangeInputSplit class accidentally omitted -calls to serialize the IteratorSettings configured for the Job. <a href="https://issues.apache.org/jira/browse/ACCUMULO-2962">ACCUMULO-2962</a> -fixes the serialization and adds some additional tests.</p> - -<h3 id="constraint-violation-causes-hung-scans">Constraint violation causes hung scans</h3> - -<p>A failed bulk import transaction had the ability to create an infinitely retrying -loop due to a constraint violation. This directly prevents scans from completing, -but will also hang compactions. <a href="https://issues.apache.org/jira/browse/ACCUMULO-3096">ACCUMULO-3096</a> fixes the issue so that the -constraint no longer hangs the entire system.</p> - -<h2 id="documentation">Documentation</h2> - -<p>The following documentation updates were made:</p> - -<ul> - <li><a href="https://issues.apache.org/jira/browse/ACCUMULO-2540">ACCUMULO-2540</a></li> - <li><a href="https://issues.apache.org/jira/browse/ACCUMULO-2767">ACCUMULO-2767</a></li> - <li><a href="https://issues.apache.org/jira/browse/ACCUMULO-2796">ACCUMULO-2796</a></li> - <li><a href="https://issues.apache.org/jira/browse/ACCUMULO-2443">ACCUMULO-2443</a></li> - <li><a href="https://issues.apache.org/jira/browse/ACCUMULO-3008">ACCUMULO-3008</a></li> - <li><a href="https://issues.apache.org/jira/browse/ACCUMULO-2919">ACCUMULO-2919</a></li> - <li><a href="https://issues.apache.org/jira/browse/ACCUMULO-2874">ACCUMULO-2874</a></li> - <li><a href="https://issues.apache.org/jira/browse/ACCUMULO-2653">ACCUMULO-2653</a></li> - <li><a href="https://issues.apache.org/jira/browse/ACCUMULO-2437">ACCUMULO-2437</a></li> - <li><a href="https://issues.apache.org/jira/browse/ACCUMULO-3097">ACCUMULO-3097</a></li> - <li><a href="https://issues.apache.org/jira/browse/ACCUMULO-2499">ACCUMULO-2499</a></li> - <li><a href="https://issues.apache.org/jira/browse/ACCUMULO-1669">ACCUMULO-1669</a></li> -</ul> - -<h2 id="testing">Testing</h2> - -<p>Each unit and functional test only runs on a single node, while the RandomWalk and Continuous Ingest tests run -on any number of nodes. <em>Agitation</em> refers to randomly restarting Accumulo processes and Hadoop Datanode processes, -and, in HDFS High-Availability instances, forcing NameNode failover.</p> - -<table id="release_notes_testing" class="table"> - <thead> - <tr> - <th>OS</th> - <th>Hadoop</th> - <th>Nodes</th> - <th>ZooKeeper</th> - <th>HDFS High-Availability</th> - <th>Tests</th> - </tr> - </thead> - <tbody> - <tr> - <td>Gentoo</td> - <td>Apache 2.6.0-SNAPSHOT</td> - <td>1</td> - <td>Apache 3.4.5</td> - <td>No</td> - <td>Unit and Functional Tests, ContinuousIngest w/ verification (1B entries)</td> - </tr> - <tr> - <td>CentOS 6</td> - <td>Apache 2.3.0</td> - <td>20</td> - <td>Apache 3.4.5</td> - <td>No</td> - <td>24/hr RandomWalk, 24/hr ContinuousIngest w/ verification w/ and w/o agitation (30B and 23B entries)</td> - </tr> - </tbody> -</table> - -</description> - <pubDate>Sat, 19 Sep 2015 00:00:00 -0400</pubDate> - <link>https://accumulo.apache.org/release/accumulo-1.5.2/</link> - <guid isPermaLink="true">https://accumulo.apache.org/release/accumulo-1.5.2/</guid> - - - <category>release</category> - - </item> - </channel> </rss> http://git-wip-us.apache.org/repos/asf/accumulo-website/blob/ac9e7868/index.html ---------------------------------------------------------------------- diff --git a/index.html b/index.html index 5a33f31..8e02d71 100644 --- a/index.html +++ b/index.html @@ -157,6 +157,13 @@ <div class="row latest-news-item"> <div class="col-sm-12" style="margin-bottom: 5px"> + <span style="font-size: 12px; margin-right: 5px;">Dec 2016</span> + <a href="/blog/2016/12/19/running-on-fedora-25.html">Running Accumulo on Fedora 25</a> + </div> + </div> + + <div class="row latest-news-item"> + <div class="col-sm-12" style="margin-bottom: 5px"> <span style="font-size: 12px; margin-right: 5px;">Nov 2016</span> <a href="/blog/2016/11/16/simpler-scripts-and-config.html">Simpler scripts and configuration coming in Accumulo 2.0.0</a> </div> @@ -183,13 +190,6 @@ </div> </div> - <div class="row latest-news-item"> - <div class="col-sm-12" style="margin-bottom: 5px"> - <span style="font-size: 12px; margin-right: 5px;">Jun 2016</span> - <a href="/release/accumulo-1.7.2/">Apache Accumulo 1.7.2</a> - </div> - </div> - <div id="news-archive-link"> <p>View all posts in the <a href="/news">news archive</a></p> </div> http://git-wip-us.apache.org/repos/asf/accumulo-website/blob/ac9e7868/news/index.html ---------------------------------------------------------------------- diff --git a/news/index.html b/news/index.html index f4b8344..3e0bc29 100644 --- a/news/index.html +++ b/news/index.html @@ -151,6 +151,14 @@ <h3>2016</h3> <div class="row" style="margin-top: 15px"> + <div class="col-md-1">Dec 19</div> + <div class="col-md-10"><a href="/blog/2016/12/19/running-on-fedora-25.html">Running Accumulo on Fedora 25</a></div> + </div> + + + + + <div class="row" style="margin-top: 15px"> <div class="col-md-1">Nov 16</div> <div class="col-md-10"><a href="/blog/2016/11/16/simpler-scripts-and-config.html">Simpler scripts and configuration coming in Accumulo 2.0.0</a></div> </div>