[03/13] accumulo-website git commit: Jekyll build from master:7cc70b2

mwalch Mon, 22 May 2017 10:32:23 -0700
http://git-wip-us.apache.org/repos/asf/accumulo-website/blob/7b2eb317/docs/unreleased/getting-started/table_configuration.html
----------------------------------------------------------------------
diff --git a/docs/unreleased/getting-started/table_configuration.html 
b/docs/unreleased/getting-started/table_configuration.html
new file mode 100644
index 0000000..c2267ae
--- /dev/null
+++ b/docs/unreleased/getting-started/table_configuration.html
@@ -0,0 +1,1039 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one or more
+    contributor license agreements.  See the NOTICE file distributed with
+    this work for additional information regarding copyright ownership.
+    The ASF licenses this file to You under the Apache License, Version 2.0
+    (the "License"); you may not use this file except in compliance with
+    the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+-->
+<meta charset="utf-8">
+<meta http-equiv="X-UA-Compatible" content="IE=edge">
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<link 
href="https://maxcdn.bootstrapcdn.com/bootswatch/3.3.7/paper/bootstrap.min.css"; 
rel="stylesheet" 
integrity="sha384-awusxf8AUojygHf2+joICySzB780jVvQaVCAt1clU3QsyAitLGul28Qxb2r1e5g+"
 crossorigin="anonymous">
+<link href="//netdna.bootstrapcdn.com/font-awesome/4.0.3/css/font-awesome.css" 
rel="stylesheet">
+<link rel="stylesheet" type="text/css" 
href="https://cdn.datatables.net/v/bs/jq-2.2.3/dt-1.10.12/datatables.min.css";>
+<link href="/css/accumulo.css" rel="stylesheet" type="text/css">
+
+<title>Accumulo Documentation - Table Configuration</title>
+
+<script 
src="https://ajax.googleapis.com/ajax/libs/jquery/2.2.4/jquery.min.js";></script>
+<script 
src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/js/bootstrap.min.js"; 
integrity="sha384-Tc5IQib027qvyjSMfHjOMaLkfuWVxZxUPnCJA7l2mCWNIpG9mGCD8wGNIcPD7Txa"
 crossorigin="anonymous"></script>
+<script type="text/javascript" 
src="https://cdn.datatables.net/v/bs/jq-2.2.3/dt-1.10.12/datatables.min.js";></script>
+<script>
+  // show location of canonical site if not currently on the canonical site
+  $(function() {
+    var host = window.location.host;
+    if (typeof host !== 'undefined' && host !== 'accumulo.apache.org') {
+      $('#non-canonical').show();
+    }
+  });
+
+  $(function() {
+    // decorate section headers with anchors
+    return $("h2, h3, h4, h5, h6").each(function(i, el) {
+      var $el, icon, id;
+      $el = $(el);
+      id = $el.attr('id');
+      icon = '<i class="fa fa-link"></i>';
+      if (id) {
+        return $el.append($("<a />").addClass("header-link").attr("href", "#" 
+ id).html(icon));
+      }
+    });
+  });
+  
+  // configure Google Analytics
+  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new 
Date();a=s.createElement(o),
+  
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+  })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+
+  if (ga.hasOwnProperty('loaded') && ga.loaded === true) {
+    ga('create', 'UA-50934829-1', 'apache.org');
+    ga('send', 'pageview');
+  }
+</script>
+
+</head>
+<body style="padding-top: 100px">
+
+  <nav class="navbar navbar-default navbar-fixed-top">
+  <div class="container">
+    <div class="navbar-header">
+      <button type="button" class="navbar-toggle" data-toggle="collapse" 
data-target="#navbar-items">
+        <span class="sr-only">Toggle navigation</span>
+        <span class="icon-bar"></span>
+        <span class="icon-bar"></span>
+        <span class="icon-bar"></span>
+      </button>
+      <a href="/"><img id="nav-logo" alt="Apache Accumulo" 
class="img-responsive" src="/images/accumulo-logo.png" width="200"
+        /></a>
+    </div>
+    <div class="collapse navbar-collapse" id="navbar-items">
+      <ul class="nav navbar-nav">
+        <li class="nav-link"><a href="/downloads">Download</a></li>
+        <li class="dropdown">
+          <a class="dropdown-toggle" data-toggle="dropdown" 
href="#">Releases<span class="caret"></span></a>
+          <ul class="dropdown-menu">
+            <li><a href="/release/accumulo-1.8.1/">1.8.1 (Latest)</a></li>
+            <li><a href="/release/accumulo-1.7.3/">1.7.3</a></li>
+            <li><a href="/release/accumulo-1.6.6/">1.6.6</a></li>
+            <li><a href="/release/">Archive</a></li>
+          </ul>
+        </li>
+        <li class="dropdown">
+          <a class="dropdown-toggle" data-toggle="dropdown" 
href="#">Documentation<span class="caret"></span></a>
+          <ul class="dropdown-menu">
+            <li><a href="/1.8/accumulo_user_manual.html">User Manual 
(1.8)</a></li>
+            <li><a href="/1.8/apidocs">Javadocs (1.8)</a></li>
+            <li><a href="/1.8/examples">Examples (1.8)</a></li>
+            <li><a href="/features">Features</a></li>
+            <li><a href="/glossary">Glossary</a></li>
+            <li><a href="/external-docs">External Docs</a></li>
+            <li><a href="/docs-archive/">Archive</a></li>
+          </ul>
+        </li>
+        <li class="dropdown">
+          <a class="dropdown-toggle" data-toggle="dropdown" 
href="#">Community<span class="caret"></span></a>
+          <ul class="dropdown-menu">
+            <li><a href="/get_involved">Get Involved</a></li>
+            <li><a href="/mailing_list">Mailing Lists</a></li>
+            <li><a href="/people">People</a></li>
+            <li><a href="/related-projects">Related Projects</a></li>
+            <li><a href="/contributor/">Contributor Guide</a></li>
+          </ul>
+        </li>
+      </ul>
+      <ul class="nav navbar-nav navbar-right">
+        <li class="dropdown">
+          <a class="dropdown-toggle" data-toggle="dropdown" href="#">Apache 
Software Foundation<span class="caret"></span></a>
+          <ul class="dropdown-menu">
+            <li><a href="https://www.apache.org";>Apache Homepage <i class="fa 
fa-external-link"></i></a></li>
+            <li><a href="https://www.apache.org/licenses/LICENSE-2.0";>License 
<i class="fa fa-external-link"></i></a></li>
+            <li><a 
href="https://www.apache.org/foundation/sponsorship";>Sponsorship <i class="fa 
fa-external-link"></i></a></li>
+            <li><a href="https://www.apache.org/security";>Security <i 
class="fa fa-external-link"></i></a></li>
+            <li><a href="https://www.apache.org/foundation/thanks";>Thanks <i 
class="fa fa-external-link"></i></a></li>
+            <li><a 
href="https://www.apache.org/foundation/policies/conduct";>Code of Conduct <i 
class="fa fa-external-link"></i></a></li>
+          </ul>
+        </li>
+      </ul>
+    </div>
+  </div>
+</nav>
+
+  <div class="container">
+    <div class="row">
+      <div class="col-md-12">
+
+        <div id="non-canonical" style="display: none; background-color: 
#F0E68C; padding-left: 1em;">
+          Visit the official site at: <a 
href="https://accumulo.apache.org";>https://accumulo.apache.org</a>
+        </div>
+        <div id="content">
+          
+          <div class="alert alert-danger" role="alert">This documentation is 
for an unreleased version of Apache Accumulo that is currently under 
development! Check out the <a href="/docs-1.8/">documentation for the latest 
release</a>.</div>
+
+<div class="row">
+  <div class="col-md-3">
+    <div class="panel-group" id="accordion" role="tablist" 
aria-multiselectable="true">
+      <div class="panel panel-default">
+      
+      
+      
+        
+          
+        
+          
+        
+          
+            <div class="panel-heading" role="tab" id="headingOne">
+              <h4 class="panel-title">
+                <a role="button" data-toggle="collapse" 
data-parent="#accordion" href="#collapsegetting-started" aria-expanded="true" 
aria-controls="collapsegetting-started">
+                  Getting started
+                </a>
+              </h4>
+            </div>
+            <div id="collapsegetting-started" class="panel-collapse collapse 
in" role="tabpanel" aria-labelledby="headingOne">
+              <div class="panel-body">
+                
+                
+                <div class="row doc-sidebar-link"><a 
href="/docs/unreleased/getting-started/design">Accumulo Design</a></div>
+                
+                <div class="row doc-sidebar-link"><a 
href="/docs/unreleased/getting-started/clients">Accumulo Clients</a></div>
+                
+                <div class="row doc-sidebar-link"><a 
href="/docs/unreleased/getting-started/shell">Accumulo Shell</a></div>
+                
+                <div class="row doc-sidebar-link"><a 
href="/docs/unreleased/getting-started/table_design">Table Design</a></div>
+                
+                <div class="row doc-sidebar-link"><a 
href="/docs/unreleased/getting-started/table_configuration">Table 
Configuration</a></div>
+                
+              </div>
+            </div>
+          
+        
+          
+        
+          
+        
+      
+        
+          
+        
+          
+            <div class="panel-heading" role="tab" id="headingOne">
+              <h4 class="panel-title">
+                <a role="button" data-toggle="collapse" 
data-parent="#accordion" href="#collapsedevelopment" aria-expanded="false" 
aria-controls="collapsedevelopment">
+                  Development
+                </a>
+              </h4>
+            </div>
+            <div id="collapsedevelopment" class="panel-collapse collapse" 
role="tabpanel" aria-labelledby="headingOne">
+              <div class="panel-body">
+                
+                
+                <div class="row doc-sidebar-link"><a 
href="/docs/unreleased/development/iterator_design">Iterator Design</a></div>
+                
+                <div class="row doc-sidebar-link"><a 
href="/docs/unreleased/development/iterator_testing">Iterator Testing</a></div>
+                
+                <div class="row doc-sidebar-link"><a 
href="/docs/unreleased/development/development_tools">Development 
Tools</a></div>
+                
+                <div class="row doc-sidebar-link"><a 
href="/docs/unreleased/development/sampling">Sampling</a></div>
+                
+                <div class="row doc-sidebar-link"><a 
href="/docs/unreleased/development/summaries">Summary Statistics</a></div>
+                
+                <div class="row doc-sidebar-link"><a 
href="/docs/unreleased/development/security">Security</a></div>
+                
+                <div class="row doc-sidebar-link"><a 
href="/docs/unreleased/development/high_speed_ingest">High-Speed 
Ingest</a></div>
+                
+                <div class="row doc-sidebar-link"><a 
href="/docs/unreleased/development/analytics">Analytics</a></div>
+                
+              </div>
+            </div>
+          
+        
+          
+        
+          
+        
+          
+        
+      
+        
+          
+            <div class="panel-heading" role="tab" id="headingOne">
+              <h4 class="panel-title">
+                <a role="button" data-toggle="collapse" 
data-parent="#accordion" href="#collapseadministration" aria-expanded="false" 
aria-controls="collapseadministration">
+                  Administration
+                </a>
+              </h4>
+            </div>
+            <div id="collapseadministration" class="panel-collapse collapse" 
role="tabpanel" aria-labelledby="headingOne">
+              <div class="panel-body">
+                
+                
+                <div class="row doc-sidebar-link"><a 
href="/docs/unreleased/administration/overview">Overview</a></div>
+                
+                <div class="row doc-sidebar-link"><a 
href="/docs/unreleased/administration/configuration-management">Configuration 
Management</a></div>
+                
+                <div class="row doc-sidebar-link"><a 
href="/docs/unreleased/administration/configuration-properties">Configuration 
Properties</a></div>
+                
+                <div class="row doc-sidebar-link"><a 
href="/docs/unreleased/administration/kerberos">Kerberos</a></div>
+                
+                <div class="row doc-sidebar-link"><a 
href="/docs/unreleased/administration/replication">Replication</a></div>
+                
+                <div class="row doc-sidebar-link"><a 
href="/docs/unreleased/administration/fate">FATE</a></div>
+                
+                <div class="row doc-sidebar-link"><a 
href="/docs/unreleased/administration/multivolume">Multi-Volume 
Installations</a></div>
+                
+                <div class="row doc-sidebar-link"><a 
href="/docs/unreleased/administration/ssl">SSL</a></div>
+                
+              </div>
+            </div>
+          
+        
+          
+        
+          
+        
+          
+        
+          
+        
+      
+        
+          
+        
+          
+        
+          
+        
+          
+        
+          
+            <div class="panel-heading" role="tab" id="headingOne">
+              <h4 class="panel-title">
+                <a role="button" data-toggle="collapse" 
data-parent="#accordion" href="#collapsetroubleshooting" aria-expanded="false" 
aria-controls="collapsetroubleshooting">
+                  Troubleshooting
+                </a>
+              </h4>
+            </div>
+            <div id="collapsetroubleshooting" class="panel-collapse collapse" 
role="tabpanel" aria-labelledby="headingOne">
+              <div class="panel-body">
+                
+                
+                <div class="row doc-sidebar-link"><a 
href="/docs/unreleased/troubleshooting/overview">Overview</a></div>
+                
+              </div>
+            </div>
+          
+        
+      
+      </div>
+    </div>
+  </div>
+  <div class="col-md-9">
+    
+    <p><a href="/docs/unreleased/">Accumulo unreleased docs</a> 
&nbsp;&gt;&gt;&nbsp; Getting started &nbsp;&gt;&gt;&nbsp; Table 
Configuration</p>
+    
+    
+    <h1>Table Configuration</h1>
+    
+    <p>Accumulo tables have a few options that can be configured to alter the 
default
+behavior of Accumulo as well as improve performance based on the data stored.
+These include locality groups, constraints, bloom filters, iterators, and block
+cache.  See the <a href="/docs/unreleased/config/">configuration 
documentation</a> for a complete list of
+available configuration options.</p>
+
+<h2 id="locality-groups">Locality Groups</h2>
+
+<p>Accumulo supports storing sets of column families separately on disk to 
allow
+clients to efficiently scan over columns that are frequently used together and 
to avoid
+scanning over column families that are not requested. After a locality group 
is set,
+Scanner and BatchScanner operations will automatically take advantage of them
+whenever the fetchColumnFamilies() method is used.</p>
+
+<p>By default, tables place all column families into the same ``defaultââ 
locality group.
+Additional locality groups can be configured at any time via the shell or
+programmatically as follows:</p>
+
+<h3 id="managing-locality-groups-via-the-shell">Managing Locality Groups via 
the Shell</h3>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>usage: setgroups 
&lt;group&gt;=&lt;col fam&gt;{,&lt;col fam&gt;}{ &lt;group&gt;=&lt;col 
fam&gt;{,&lt;col fam&gt;}}
+    [-?] -t &lt;table&gt;
+
+user@myinstance mytable&gt; setgroups group_one=colf1,colf2 -t mytable
+
+user@myinstance mytable&gt; getgroups -t mytable
+</code></pre>
+</div>
+
+<h3 id="managing-locality-groups-via-the-client-api">Managing Locality Groups 
via the Client API</h3>
+
+<div class="language-java highlighter-rouge"><pre 
class="highlight"><code><span class="n">Connector</span> <span 
class="n">conn</span><span class="o">;</span>
+
+<span class="n">HashMap</span><span class="o">&lt;</span><span 
class="n">String</span><span class="o">,</span><span class="n">Set</span><span 
class="o">&lt;</span><span class="n">Text</span><span class="o">&gt;&gt;</span> 
<span class="n">localityGroups</span> <span class="o">=</span> <span 
class="k">new</span> <span class="n">HashMap</span><span 
class="o">&lt;</span><span class="n">String</span><span class="o">,</span> 
<span class="n">Set</span><span class="o">&lt;</span><span 
class="n">Text</span><span class="o">&gt;&gt;();</span>
+
+<span class="n">HashSet</span><span class="o">&lt;</span><span 
class="n">Text</span><span class="o">&gt;</span> <span 
class="n">metadataColumns</span> <span class="o">=</span> <span 
class="k">new</span> <span class="n">HashSet</span><span 
class="o">&lt;</span><span class="n">Text</span><span class="o">&gt;();</span>
+<span class="n">metadataColumns</span><span class="o">.</span><span 
class="na">add</span><span class="o">(</span><span class="k">new</span> <span 
class="n">Text</span><span class="o">(</span><span 
class="s">"domain"</span><span class="o">));</span>
+<span class="n">metadataColumns</span><span class="o">.</span><span 
class="na">add</span><span class="o">(</span><span class="k">new</span> <span 
class="n">Text</span><span class="o">(</span><span class="s">"link"</span><span 
class="o">));</span>
+
+<span class="n">HashSet</span><span class="o">&lt;</span><span 
class="n">Text</span><span class="o">&gt;</span> <span 
class="n">contentColumns</span> <span class="o">=</span> <span 
class="k">new</span> <span class="n">HashSet</span><span 
class="o">&lt;</span><span class="n">Text</span><span class="o">&gt;();</span>
+<span class="n">contentColumns</span><span class="o">.</span><span 
class="na">add</span><span class="o">(</span><span class="k">new</span> <span 
class="n">Text</span><span class="o">(</span><span class="s">"body"</span><span 
class="o">));</span>
+<span class="n">contentColumns</span><span class="o">.</span><span 
class="na">add</span><span class="o">(</span><span class="k">new</span> <span 
class="n">Text</span><span class="o">(</span><span 
class="s">"images"</span><span class="o">));</span>
+
+<span class="n">localityGroups</span><span class="o">.</span><span 
class="na">put</span><span class="o">(</span><span 
class="s">"metadata"</span><span class="o">,</span> <span 
class="n">metadataColumns</span><span class="o">);</span>
+<span class="n">localityGroups</span><span class="o">.</span><span 
class="na">put</span><span class="o">(</span><span 
class="s">"content"</span><span class="o">,</span> <span 
class="n">contentColumns</span><span class="o">);</span>
+
+<span class="n">conn</span><span class="o">.</span><span 
class="na">tableOperations</span><span class="o">().</span><span 
class="na">setLocalityGroups</span><span class="o">(</span><span 
class="s">"mytable"</span><span class="o">,</span> <span 
class="n">localityGroups</span><span class="o">);</span>
+
+<span class="c1">// existing locality groups can be obtained as follows</span>
+<span class="n">Map</span><span class="o">&lt;</span><span 
class="n">String</span><span class="o">,</span> <span class="n">Set</span><span 
class="o">&lt;</span><span class="n">Text</span><span class="o">&gt;&gt;</span> 
<span class="n">groups</span> <span class="o">=</span>
+    <span class="n">conn</span><span class="o">.</span><span 
class="na">tableOperations</span><span class="o">().</span><span 
class="na">getLocalityGroups</span><span class="o">(</span><span 
class="s">"mytable"</span><span class="o">);</span>
+</code></pre>
+</div>
+
+<p>The assignment of Column Families to Locality Groups can be changed at any 
time. The
+physical movement of column families into their new locality groups takes 
place via
+the periodic Major Compaction process that takes place continuously in the
+background. Major Compaction can also be scheduled to take place immediately
+through the shell:</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>user@myinstance 
mytable&gt; compact -t mytable
+</code></pre>
+</div>
+
+<h2 id="constraints">Constraints</h2>
+
+<p>Accumulo supports constraints applied on mutations at insert time. This can 
be
+used to disallow certain inserts according to a user defined policy. Any 
mutation
+that fails to meet the requirements of the constraint is rejected and sent 
back to the
+client.</p>
+
+<p>Constraints can be enabled by setting a table property as follows:</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>user@myinstance 
mytable&gt; constraint -t mytable -a com.test.ExampleConstraint 
com.test.AnotherConstraint
+
+user@myinstance mytable&gt; constraint -l
+com.test.ExampleConstraint=1
+com.test.AnotherConstraint=2
+</code></pre>
+</div>
+
+<p>Currently there are no general-purpose constraints provided with the 
Accumulo
+distribution. New constraints can be created by writing a Java class that 
implements
+the following interface:</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code> 
org.apache.accumulo.core.constraints.Constraint
+</code></pre>
+</div>
+
+<p>To deploy a new constraint, create a jar file containing the class 
implementing the
+new constraint and place it in the lib directory of the Accumulo installation. 
New
+constraint jars can be added to Accumulo and enabled without restarting but any
+change to an existing constraint class requires Accumulo to be restarted.</p>
+
+<p>See the <a 
href="https://github.com/apache/accumulo-examples/blob/master/docs/contraints.md";>contraints
 examples</a>
+for example code.</p>
+
+<h2 id="bloom-filters">Bloom Filters</h2>
+
+<p>As mutations are applied to an Accumulo table, several files are created 
per tablet. If
+bloom filters are enabled, Accumulo will create and load a small data 
structure into
+memory to determine whether a file contains a given key before opening the 
file.
+This can speed up lookups considerably.</p>
+
+<p>To enable bloom filters, enter the following command in the Shell:</p>
+
+<div class="highlighter-rouge"><pre 
class="highlight"><code>user@myinstance&gt; config -t mytable -s 
table.bloom.enabled=true
+</code></pre>
+</div>
+
+<p>The <a 
href="https://github.com/apache/accumulo-examples/blob/master/docs/bloom.md";>bloom
 filter examples</a>
+contains an extensive example of using Bloom Filters.</p>
+
+<h2 id="iterators">Iterators</h2>
+
+<p>Iterators provide a modular mechanism for adding functionality to be 
executed by
+TabletServers when scanning or compacting data. This allows users to 
efficiently
+summarize, filter, and aggregate data. In fact, the built-in features of 
cell-level
+security and column fetching are implemented using Iterators.
+Some useful Iterators are provided with Accumulo and can be found in the
+<em><code 
class="highlighter-rouge">org.apache.accumulo.core.iterators.user</code></em> 
package.
+In each case, any custom Iterators must be included in Accumuloâs classpath,
+typically by including a jar in <code class="highlighter-rouge">lib/</code> or 
<code class="highlighter-rouge">lib/ext/</code>, although the VFS classloader
+allows for classpath manipulation using a variety of schemes including URLs 
and HDFS URIs.</p>
+
+<h3 id="setting-iterators-via-the-shell">Setting Iterators via the Shell</h3>
+
+<p>Iterators can be configured on a table at scan, minor compaction and/or 
major
+compaction scopes. If the Iterator implements the OptionDescriber interface, 
the
+setiter command can be used which will interactively prompt the user to provide
+values for the given necessary options.</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>usage: setiter 
[-?] -ageoff | -agg | -class &lt;name&gt; | -regex |
+    -reqvis | -vers   [-majc] [-minc] [-n &lt;itername&gt;] -p &lt;pri&gt;
+    [-scan] [-t &lt;table&gt;]
+
+user@myinstance mytable&gt; setiter -t mytable -scan -p 15 -n myiter -class 
com.company.MyIterator
+</code></pre>
+</div>
+
+<p>The config command can always be used to manually configure iterators which 
is useful
+in cases where the Iterator does not implement the OptionDescriber 
interface.</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>config -t mytable 
-s table.iterator.scan.myiter=15,com.company.MyIterator
+config -t mytable -s table.iterator.minc.myiter=15,com.company.MyIterator
+config -t mytable -s table.iterator.majc.myiter=15,com.company.MyIterator
+config -t mytable -s table.iterator.scan.myiter.opt.myoptionname=myoptionvalue
+config -t mytable -s table.iterator.minc.myiter.opt.myoptionname=myoptionvalue
+config -t mytable -s table.iterator.majc.myiter.opt.myoptionname=myoptionvalue
+</code></pre>
+</div>
+
+<p>Typically, a table will have multiple iterators. Accumulo configures a set 
of
+system level iterators for each table. These iterators provide core
+functionality like visibility label filtering and may not be removed by
+users. User level iterators are applied in the order of their priority.
+Priority is a user configured integer; iterators with lower numbers go first,
+passing the results of their iteration on to the other iterators up the
+stack.</p>
+
+<h3 id="setting-iterators-programmatically">Setting Iterators 
Programmatically</h3>
+
+<div class="language-java highlighter-rouge"><pre 
class="highlight"><code><span class="n">scanner</span><span 
class="o">.</span><span class="na">addIterator</span><span 
class="o">(</span><span class="k">new</span> <span 
class="n">IteratorSetting</span><span class="o">(</span>
+    <span class="mi">15</span><span class="o">,</span> <span class="c1">// 
priority</span>
+    <span class="s">"myiter"</span><span class="o">,</span> <span 
class="c1">// name this iterator</span>
+    <span class="s">"com.company.MyIterator"</span> <span class="c1">// class 
name</span>
+<span class="o">));</span>
+</code></pre>
+</div>
+
+<p>Some iterators take additional parameters from client code, as in the 
following
+example:</p>
+
+<div class="language-java highlighter-rouge"><pre 
class="highlight"><code><span class="n">IteratorSetting</span> <span 
class="n">iter</span> <span class="o">=</span> <span class="k">new</span> <span 
class="n">IteratorSetting</span><span class="o">(...);</span>
+<span class="n">iter</span><span class="o">.</span><span 
class="na">addOption</span><span class="o">(</span><span 
class="s">"myoptionname"</span><span class="o">,</span> <span 
class="s">"myoptionvalue"</span><span class="o">);</span>
+<span class="n">scanner</span><span class="o">.</span><span 
class="na">addIterator</span><span class="o">(</span><span 
class="n">iter</span><span class="o">)</span>
+</code></pre>
+</div>
+
+<p>Tables support separate Iterator settings to be applied at scan time, upon 
minor
+compaction and upon major compaction. For most uses, tables will have identical
+iterator settings for all three to avoid inconsistent results.</p>
+
+<h3 id="versioning-iterators-and-timestamps">Versioning Iterators and 
Timestamps</h3>
+
+<p>Accumulo provides the capability to manage versioned data through the use of
+timestamps within the Key. If a timestamp is not specified in the key created 
by the
+client then the system will set the timestamp to the current time. Two keys 
with
+identical rowIDs and columns but different timestamps are considered two 
versions
+of the same key. If two inserts are made into Accumulo with the same rowID,
+column, and timestamp, then the behavior is non-deterministic.</p>
+
+<p>Timestamps are sorted in descending order, so the most recent data comes 
first.
+Accumulo can be configured to return the top k versions, or versions later 
than a
+given date. The default is to return the one most recent version.</p>
+
+<p>The version policy can be changed by changing the VersioningIterator 
options for a
+table as follows:</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>user@myinstance 
mytable&gt; config -t mytable -s table.iterator.scan.vers.opt.maxVersions=3
+
+user@myinstance mytable&gt; config -t mytable -s 
table.iterator.minc.vers.opt.maxVersions=3
+
+user@myinstance mytable&gt; config -t mytable -s 
table.iterator.majc.vers.opt.maxVersions=3
+</code></pre>
+</div>
+
+<p>When a table is created, by default its configured to use the
+VersioningIterator and keep one version. A table can be created without the
+VersioningIterator with the -ndi option in the shell. Also the Java API
+has the following method</p>
+
+<div class="language-java highlighter-rouge"><pre 
class="highlight"><code><span class="n">connector</span><span 
class="o">.</span><span class="na">tableOperations</span><span 
class="o">.</span><span class="na">create</span><span class="o">(</span><span 
class="n">String</span> <span class="n">tableName</span><span 
class="o">,</span> <span class="kt">boolean</span> <span 
class="n">limitVersion</span><span class="o">);</span>
+</code></pre>
+</div>
+
+<h4 id="logical-time">Logical Time</h4>
+
+<p>Accumulo 1.2 introduces the concept of logical time. This ensures that 
timestamps
+set by Accumulo always move forward. This helps avoid problems caused by
+TabletServers that have different time settings. The per tablet counter gives 
unique
+one up time stamps on a per mutation basis. When using time in milliseconds, if
+two things arrive within the same millisecond then both receive the same
+timestamp. When using time in milliseconds, Accumulo set times will still
+always move forward and never backwards.</p>
+
+<p>A table can be configured to use logical timestamps at creation time as 
follows:</p>
+
+<div class="highlighter-rouge"><pre 
class="highlight"><code>user@myinstance&gt; createtable -tl logical
+</code></pre>
+</div>
+
+<h4 id="deletes">Deletes</h4>
+
+<p>Deletes are special keys in Accumulo that get sorted along will all the 
other data.
+When a delete key is inserted, Accumulo will not show anything that has a
+timestamp less than or equal to the delete key. During major compaction, any 
keys
+older than a delete key are omitted from the new file created, and the omitted 
keys
+are removed from disk as part of the regular garbage collection process.</p>
+
+<h3 id="filters">Filters</h3>
+
+<p>When scanning over a set of key-value pairs it is possible to apply an 
arbitrary
+filtering policy through the use of a Filter. Filters are types of iterators 
that return
+only key-value pairs that satisfy the filter logic. Accumulo has a few 
built-in filters
+that can be configured on any table: AgeOff, ColumnAgeOff, Timestamp, NoVis, 
and RegEx. More can be added
+by writing a Java class that extends the
+<code 
class="highlighter-rouge">org.apache.accumulo.core.iterators.Filter</code> 
class.</p>
+
+<p>The AgeOff filter can be configured to remove data older than a certain 
date or a fixed
+amount of time from the present. The following example sets a table to delete
+everything inserted over 30 seconds ago:</p>
+
+<div class="highlighter-rouge"><pre 
class="highlight"><code>user@myinstance&gt; createtable filtertest
+
+user@myinstance filtertest&gt; setiter -t filtertest -scan -minc -majc -p 10 
-n myfilter -ageoff
+AgeOffFilter removes entries with timestamps more than &lt;ttl&gt; 
milliseconds old
+----------&gt; set org.apache.accumulo.core.iterators.user.AgeOffFilter 
parameter negate, default false
+                keeps k/v that pass accept method, true rejects k/v that pass 
accept method:
+----------&gt; set org.apache.accumulo.core.iterators.user.AgeOffFilter 
parameter ttl, time to
+                live (milliseconds): 30000
+----------&gt; set org.apache.accumulo.core.iterators.user.AgeOffFilter 
parameter currentTime, if set,
+                use the given value as the absolute time in milliseconds as 
the current time of day:
+
+user@myinstance filtertest&gt;
+
+user@myinstance filtertest&gt; scan
+
+user@myinstance filtertest&gt; insert foo a b c
+
+user@myinstance filtertest&gt; scan
+foo a:b [] c
+
+user@myinstance filtertest&gt; sleep 4
+
+user@myinstance filtertest&gt; scan
+
+user@myinstance filtertest&gt;
+</code></pre>
+</div>
+
+<p>To see the iterator settings for a table, use:</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>user@example 
filtertest&gt; config -t filtertest -f iterator
+---------+---------------------------------------------+------------------
+SCOPE    | NAME                                        | VALUE
+---------+---------------------------------------------+------------------
+table    | table.iterator.majc.myfilter .............. | 
10,org.apache.accumulo.core.iterators.user.AgeOffFilter
+table    | table.iterator.majc.myfilter.opt.ttl ...... | 30000
+table    | table.iterator.majc.vers .................. | 
20,org.apache.accumulo.core.iterators.VersioningIterator
+table    | table.iterator.majc.vers.opt.maxVersions .. | 1
+table    | table.iterator.minc.myfilter .............. | 
10,org.apache.accumulo.core.iterators.user.AgeOffFilter
+table    | table.iterator.minc.myfilter.opt.ttl ...... | 30000
+table    | table.iterator.minc.vers .................. | 
20,org.apache.accumulo.core.iterators.VersioningIterator
+table    | table.iterator.minc.vers.opt.maxVersions .. | 1
+table    | table.iterator.scan.myfilter .............. | 
10,org.apache.accumulo.core.iterators.user.AgeOffFilter
+table    | table.iterator.scan.myfilter.opt.ttl ...... | 30000
+table    | table.iterator.scan.vers .................. | 
20,org.apache.accumulo.core.iterators.VersioningIterator
+table    | table.iterator.scan.vers.opt.maxVersions .. | 1
+---------+---------------------------------------------+------------------
+</code></pre>
+</div>
+
+<h3 id="combiners">Combiners</h3>
+
+<p>Accumulo supports on the fly lazy aggregation of data using Combiners. 
Aggregation is
+done at compaction and scan time. No lookup is done at insert time, which` 
greatly
+speeds up ingest.</p>
+
+<p>Accumulo allows Combiners to be configured on tables and column
+families. When a Combiner is set it is applied across the values
+associated with any keys that share rowID, column family, and column qualifier.
+This is similar to the reduce step in MapReduce, which applied some function 
to all
+the values associated with a particular key.</p>
+
+<p>For example, if a summing combiner were configured on a table and the 
following
+mutations were inserted:</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>Row     Family 
Qualifier Timestamp  Value
+rowID1  colfA  colqA     20100101   1
+rowID1  colfA  colqA     20100102   1
+</code></pre>
+</div>
+
+<p>The table would reflect only one aggregate value:</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>rowID1  colfA  
colqA     -          2
+</code></pre>
+</div>
+
+<p>Combiners can be enabled for a table using the setiter command in the 
shell. Below is an example.</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>root@a14 
perDayCounts&gt; setiter -t perDayCounts -p 10 -scan -minc -majc -n daycount
+                       -class 
org.apache.accumulo.core.iterators.user.SummingCombiner
+TypedValueCombiner can interpret Values as a variety of number encodings
+  (VLong, Long, or String) before combining
+----------&gt; set SummingCombiner parameter columns,
+            &lt;col fam&gt;[:&lt;col qual&gt;]{,&lt;col fam&gt;[:&lt;col 
qual&gt;]} : day
+----------&gt; set SummingCombiner parameter type, &lt;VARNUM|LONG|STRING&gt;: 
STRING
+
+root@a14 perDayCounts&gt; insert foo day 20080101 1
+root@a14 perDayCounts&gt; insert foo day 20080101 1
+root@a14 perDayCounts&gt; insert foo day 20080103 1
+root@a14 perDayCounts&gt; insert bar day 20080101 1
+root@a14 perDayCounts&gt; insert bar day 20080101 1
+
+root@a14 perDayCounts&gt; scan
+bar day:20080101 []    2
+foo day:20080101 []    2
+foo day:20080103 []    1
+</code></pre>
+</div>
+
+<p>Accumulo includes some useful Combiners out of the box. To find these look 
in
+the <em><code 
class="highlighter-rouge">org.apache.accumulo.core.iterators.user</code></em> 
package.</p>
+
+<p>Additional Combiners can be added by creating a Java class that extends
+<code 
class="highlighter-rouge">org.apache.accumulo.core.iterators.Combiner</code> 
and adding a jar containing that
+class to Accumuloâs lib/ext directory.</p>
+
+<p>See the <a 
href="https://github.com/apache/accumulo-examples/blob/master/docs/combiner.md";>combiner
 example</a>
+for example code.</p>
+
+<h2 id="block-cache">Block Cache</h2>
+
+<p>In order to increase throughput of commonly accessed entries, Accumulo 
employs a block cache.
+This block cache buffers data in memory so that it doesnât have to be read 
off of disk.
+The RFile format that Accumulo prefers is a mix of index blocks and data 
blocks, where the index blocks are used to find the appropriate data blocks.
+Typical queries to Accumulo result in a binary search over several index 
blocks followed by a linear scan of one or more data blocks.</p>
+
+<p>The block cache can be configured on a per-table basis, and all tablets 
hosted on a tablet server share a single resource pool.
+To configure the size of the tablet serverâs block cache, set the following 
properties:</p>
+
+<div class="highlighter-rouge"><pre 
class="highlight"><code>tserver.cache.data.size: Specifies the size of the 
cache for file data blocks.
+tserver.cache.index.size: Specifies the size of the cache for file indices.
+</code></pre>
+</div>
+
+<p>To enable the block cache for your table, set the following properties:</p>
+
+<div class="highlighter-rouge"><pre 
class="highlight"><code>table.cache.block.enable: Determines whether file 
(data) block cache is enabled.
+table.cache.index.enable: Determines whether index cache is enabled.
+</code></pre>
+</div>
+
+<p>The block cache can have a significant effect on alleviating hot spots, as 
well as reducing query latency.
+It is enabled by default for the metadata tables.</p>
+
+<h2 id="compaction">Compaction</h2>
+
+<p>As data is written to Accumulo it is buffered in memory. The data buffered 
in
+memory is eventually written to HDFS on a per tablet basis. Files can also be
+added to tablets directly by bulk import. In the background tablet servers run
+major compactions to merge multiple files into one. The tablet server has to
+decide which tablets to compact and which files within a tablet to compact.
+This decision is made using the compaction ratio, which is configurable on a
+per table basis. To configure this ratio modify the following property:</p>
+
+<div class="highlighter-rouge"><pre 
class="highlight"><code>table.compaction.major.ratio
+</code></pre>
+</div>
+
+<p>Increasing this ratio will result in more files per tablet and less 
compaction
+work. More files per tablet means more higher query latency. So adjusting
+this ratio is a trade off between ingest and query performance. The ratio
+defaults to 3.</p>
+
+<p>The way the ratio works is that a set of files is compacted into one file 
if the
+sum of the sizes of the files in the set is larger than the ratio multiplied by
+the size of the largest file in the set. If this is not true for the set of all
+files in a tablet, the largest file is removed from consideration, and the
+remaining files are considered for compaction. This is repeated until a
+compaction is triggered or there are no files left to consider.</p>
+
+<p>The number of background threads tablet servers use to run major 
compactions is
+configurable. To configure this modify the following property:</p>
+
+<div class="highlighter-rouge"><pre 
class="highlight"><code>tserver.compaction.major.concurrent.max
+</code></pre>
+</div>
+
+<p>Also, the number of threads tablet servers use for minor compactions is
+configurable. To configure this modify the following property:</p>
+
+<div class="highlighter-rouge"><pre 
class="highlight"><code>tserver.compaction.minor.concurrent.max
+</code></pre>
+</div>
+
+<p>The numbers of minor and major compactions running and queued is visible on 
the
+Accumulo monitor page. This allows you to see if compactions are backing up
+and adjustments to the above settings are needed. When adjusting the number of
+threads available for compactions, consider the number of cores and other tasks
+running on the nodes such as maps and reduces.</p>
+
+<p>If major compactions are not keeping up, then the number of files per tablet
+will grow to a point such that query performance starts to suffer. One way to
+handle this situation is to increase the compaction ratio. For example, if the
+compaction ratio were set to 1, then every new file added to a tablet by minor
+compaction would immediately queue the tablet for major compaction. So if a
+tablet has a 200M file and minor compaction writes a 1M file, then the major
+compaction will attempt to merge the 200M and 1M file. If the tablet server
+has lots of tablets trying to do this sort of thing, then major compactions
+will back up and the number of files per tablet will start to grow, assuming
+data is being continuously written. Increasing the compaction ratio will
+alleviate backups by lowering the amount of major compaction work that needs to
+be done.</p>
+
+<p>Another option to deal with the files per tablet growing too large is to 
adjust
+the following property:</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>table.file.max
+</code></pre>
+</div>
+
+<p>When a tablet reaches this number of files and needs to flush its in-memory
+data to disk, it will choose to do a merging minor compaction. A merging minor
+compaction will merge the tabletâs smallest file with the data in memory at
+minor compaction time. Therefore the number of files will not grow beyond this
+limit. This will make minor compactions take longer, which will cause ingest
+performance to decrease. This can cause ingest to slow down until major
+compactions have enough time to catch up. When adjusting this property, also
+consider adjusting the compaction ratio. Ideally, merging minor compactions
+never need to occur and major compactions will keep up. It is possible to
+configure the file max and compaction ratio such that only merging minor
+compactions occur and major compactions never occur. This should be avoided
+because doing only merging minor compactions causes O(<em>N</em>^2^) work to 
be done.
+The amount of work done by major compactions is 
O(<em>N</em>*log~<em>R</em>~(<em>N</em>)) where
+<em>R</em> is the compaction ratio.</p>
+
+<p>Compactions can be initiated manually for a table. To initiate a minor
+compaction, use the flush command in the shell. To initiate a major compaction,
+use the compact command in the shell. The compact command will compact all
+tablets in a table to one file. Even tablets with one file are compacted. This
+is useful for the case where a major compaction filter is configured for a
+table. In 1.4 the ability to compact a range of a table was added. To use this
+feature specify start and stop rows for the compact command. This will only
+compact tablets that overlap the given row range.</p>
+
+<h3 id="compaction-strategies">Compaction Strategies</h3>
+
+<p>The default behavior of major compactions is defined in the class 
DefaultCompactionStrategy. 
+This behavior can be changed by overriding the following property with a fully 
qualified class name:</p>
+
+<div class="highlighter-rouge"><pre 
class="highlight"><code>table.majc.compaction.strategy
+</code></pre>
+</div>
+
+<p>Custom compaction strategies can have additional properties that are 
specified following the prefix property:</p>
+
+<div class="highlighter-rouge"><pre 
class="highlight"><code>table.majc.compaction.strategy.opts.*
+</code></pre>
+</div>
+
+<p>Accumulo provides a few classes that can be used as an alternative 
compaction strategy. These classes are located in the 
+org.apache.accumulo.tserver.compaction.* package. EverythingCompactionStrategy 
will simply compact all files. This is the 
+strategy used by the user âcompactâ command. SizeLimitCompactionStrategy 
compacts files no bigger than the limit set in the
+property table.majc.compaction.strategy.opts.sizeLimit.</p>
+
+<p>TwoTierCompactionStrategy is a hybrid compaction strategy that supports two 
types of compression. If the total size of 
+files being compacted is larger than 
table.majc.compaction.strategy.opts.file.large.compress.threshold than a larger 
+compression type will be used. The larger compression type is specified in 
table.majc.compaction.strategy.opts.file.large.compress.type. 
+Otherwise, the configured table compression will be used. To use this strategy 
with minor compactions set table.file.compress.type=snappy 
+and set a different compress type in 
table.majc.compaction.strategy.opts.file.large.compress.type for larger 
files.</p>
+
+<h2 id="pre-splitting-tables">Pre-splitting tables</h2>
+
+<p>Accumulo will balance and distribute tables across servers. Before a
+table gets large, it will be maintained as a single tablet on a single
+server. This limits the speed at which data can be added or queried
+to the speed of a single node. To improve performance when the a table
+is new, or small, you can add split points and generate new tablets.</p>
+
+<p>In the shell:</p>
+
+<div class="highlighter-rouge"><pre 
class="highlight"><code>root@myinstance&gt; createtable newTable
+root@myinstance&gt; addsplits -t newTable g n t
+</code></pre>
+</div>
+
+<p>This will create a new table with 4 tablets. The table will be split
+on the letters <code class="highlighter-rouge">g'',</code>nââ, and 
``tââ which will work nicely if the
+row data start with lower-case alphabetic characters. If your row
+data includes binary information or numeric information, or if the
+distribution of the row information is not flat, then you would pick
+different split points. Now ingest and query can proceed on 4 nodes
+which can improve performance.</p>
+
+<h2 id="merging-tablets">Merging tablets</h2>
+
+<p>Over time, a table can get very large, so large that it has hundreds
+of thousands of split points. Once there are enough tablets to spread
+a table across the entire cluster, additional splits may not improve
+performance, and may create unnecessary bookkeeping. The distribution
+of data may change over time. For example, if row data contains date
+information, and data is continually added and removed to maintain a
+window of current information, tablets for older rows may be empty.</p>
+
+<p>Accumulo supports tablet merging, which can be used to reduce
+the number of split points. The following command will merge all rows
+from <code class="highlighter-rouge">A'' to</code>Zââ into a single 
tablet:</p>
+
+<div class="highlighter-rouge"><pre 
class="highlight"><code>root@myinstance&gt; merge -t myTable -s A -e Z
+</code></pre>
+</div>
+
+<p>If the result of a merge produces a tablet that is larger than the
+configured split size, the tablet may be split by the tablet server.
+Be sure to increase your tablet size prior to any merges if the goal
+is to have larger tablets:</p>
+
+<div class="highlighter-rouge"><pre 
class="highlight"><code>root@myinstance&gt; config -t myTable -s 
table.split.threshold=2G
+</code></pre>
+</div>
+
+<p>In order to merge small tablets, you can ask Accumulo to merge
+sections of a table smaller than a given size.</p>
+
+<div class="highlighter-rouge"><pre 
class="highlight"><code>root@myinstance&gt; merge -t myTable -s 100M
+</code></pre>
+</div>
+
+<p>By default, small tablets will not be merged into tablets that are
+already larger than the given size. This can leave isolated small
+tablets. To force small tablets to be merged into larger tablets use
+the <code class="highlighter-rouge">--force</code> option:</p>
+
+<div class="highlighter-rouge"><pre 
class="highlight"><code>root@myinstance&gt; merge -t myTable -s 100M --force
+</code></pre>
+</div>
+
+<p>Merging away small tablets works on one section at a time. If your
+table contains many sections of small split points, or you are
+attempting to change the split size of the entire table, it will be
+faster to set the split point and merge the entire table:</p>
+
+<div class="highlighter-rouge"><pre 
class="highlight"><code>root@myinstance&gt; config -t myTable -s 
table.split.threshold=256M
+root@myinstance&gt; merge -t myTable
+</code></pre>
+</div>
+
+<h2 id="delete-range">Delete Range</h2>
+
+<p>Consider an indexing scheme that uses date information in each row.
+For example ``20110823-15:20:25.013ââ might be a row that specifies a
+date and time. In some cases, we might like to delete rows based on
+this date, say to remove all the data older than the current year.
+Accumulo supports a delete range operation which efficiently
+removes data between two rows. For example:</p>
+
+<div class="highlighter-rouge"><pre 
class="highlight"><code>root@myinstance&gt; deleterange -t myTable -s 2010 -e 
2011
+</code></pre>
+</div>
+
+<p>This will delete all rows starting with <code 
class="highlighter-rouge">2010'' and it will stop at
+any row starting</code>2011ââ. You can delete any data prior to 2011
+with:</p>
+
+<div class="highlighter-rouge"><pre 
class="highlight"><code>root@myinstance&gt; deleterange -t myTable -e 2011 
--force
+</code></pre>
+</div>
+
+<p>The shell will not allow you to delete an unbounded range (no start)
+unless you provide the <code class="highlighter-rouge">--force</code> 
option.</p>
+
+<p>Range deletion is implemented using splits at the given start/end
+positions, and will affect the number of splits in the table.</p>
+
+<h2 id="cloning-tables">Cloning Tables</h2>
+
+<p>A new table can be created that points to an existing tableâs data. This 
is a
+very quick metadata operation, no data is actually copied. The cloned table
+and the source table can change independently after the clone operation. One
+use case for this feature is testing. For example to test a new filtering
+iterator, clone the table, add the filter to the clone, and force a major
+compaction. To perform a test on less data, clone a table and then use delete
+range to efficiently remove a lot of data from the clone. Another use case is
+generating a snapshot to guard against human error. To create a snapshot,
+clone a table and then disable write permissions on the clone.</p>
+
+<p>The clone operation will point to the source tableâs files. This is why 
the
+flush option is present and is enabled by default in the shell. If the flush
+option is not enabled, then any data the source table currently has in memory
+will not exist in the clone.</p>
+
+<p>A cloned table copies the configuration of the source table. However the
+permissions of the source table are not copied to the clone. After a clone is
+created, only the user that created the clone can read and write to it.</p>
+
+<p>In the following example we see that data inserted after the clone 
operation is
+not visible in the clone.</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>root@a14&gt; 
createtable people
+
+root@a14 people&gt; insert 890435 name last Doe
+root@a14 people&gt; insert 890435 name first John
+
+root@a14 people&gt; clonetable people test
+
+root@a14 people&gt; insert 890436 name first Jane
+root@a14 people&gt; insert 890436 name last Doe
+
+root@a14 people&gt; scan
+890435 name:first []    John
+890435 name:last []    Doe
+890436 name:first []    Jane
+890436 name:last []    Doe
+
+root@a14 people&gt; table test
+
+root@a14 test&gt; scan
+890435 name:first []    John
+890435 name:last []    Doe
+
+root@a14 test&gt;
+</code></pre>
+</div>
+
+<p>The du command in the shell shows how much space a table is using in HDFS.
+This command can also show how much overlapping space two cloned tables have in
+HDFS. In the example below du shows table ci is using 428M. Then ci is cloned
+to cic and du shows that both tables share 428M. After three entries are
+inserted into cic and its flushed, du shows the two tables still share 428M but
+cic has 226 bytes to itself. Finally, table cic is compacted and then du shows
+that each table uses 428M.</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>root@a14&gt; du ci
+             428,482,573 [ci]
+
+root@a14&gt; clonetable ci cic
+
+root@a14&gt; du ci cic
+             428,482,573 [ci, cic]
+
+root@a14&gt; table cic
+
+root@a14 cic&gt; insert r1 cf1 cq1 v1
+root@a14 cic&gt; insert r1 cf1 cq2 v2
+root@a14 cic&gt; insert r1 cf1 cq3 v3
+
+root@a14 cic&gt; flush -t cic -w
+27 15:00:13,908 [shell.Shell] INFO : Flush of table cic completed.
+
+root@a14 cic&gt; du ci cic
+             428,482,573 [ci, cic]
+                     226 [cic]
+
+root@a14 cic&gt; compact -t cic -w
+27 15:00:35,871 [shell.Shell] INFO : Compacting table ...
+27 15:03:03,303 [shell.Shell] INFO : Compaction of table cic completed for 
given range
+
+root@a14 cic&gt; du ci cic
+             428,482,573 [ci]
+             428,482,612 [cic]
+
+root@a14 cic&gt;
+</code></pre>
+</div>
+
+<h2 id="exporting-tables">Exporting Tables</h2>
+
+<p>Accumulo supports exporting tables for the purpose of copying tables to 
another
+cluster. Exporting and importing tables preserves the tables configuration,
+splits, and logical time. Tables are exported and then copied via the hadoop
+distcp command. To export a table, it must be offline and stay offline while
+discp runs. The reason it needs to stay offline is to prevent files from being
+deleted. A table can be cloned and the clone taken offline inorder to avoid
+losing access to the table. See the <a 
href="https://github.com/apache/accumulo-examples/blob/master/docs/export.md";>export
 example</a>
+for example code.</p>
+
+
+  </div>
+</div>
+
+        </div>
+
+        
+<footer>
+
+  <p><a href="https://www.apache.org/foundation/contributing";><img 
src="https://www.apache.org/images/SupportApache-small.png"; alt="Support the 
ASF" id="asf-logo" height="100" /></a></p>
+
+  <p>Copyright Â© 2011-2017 The Apache Software Foundation. Licensed under the 
<a href="https://www.apache.org/licenses/LICENSE-2.0";>ApacheÂ License,Â 
VersionÂ 2.0</a>.</p>
+
+</footer>
+
+
+      </div>
+    </div>
+  </div>
+</body>
+</html>
[03/13] accumulo-website git commit: Jekyll build from master:7cc70b2

Reply via email to