http://git-wip-us.apache.org/repos/asf/accumulo-website/blob/7b2eb317/docs/unreleased/development/sampling.html ---------------------------------------------------------------------- diff --git a/docs/unreleased/development/sampling.html b/docs/unreleased/development/sampling.html new file mode 100644 index 0000000..24dcdb8 --- /dev/null +++ b/docs/unreleased/development/sampling.html @@ -0,0 +1,400 @@ +<!DOCTYPE html> +<html lang="en"> +<head> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<meta charset="utf-8"> +<meta http-equiv="X-UA-Compatible" content="IE=edge"> +<meta name="viewport" content="width=device-width, initial-scale=1"> +<link href="https://maxcdn.bootstrapcdn.com/bootswatch/3.3.7/paper/bootstrap.min.css" rel="stylesheet" integrity="sha384-awusxf8AUojygHf2+joICySzB780jVvQaVCAt1clU3QsyAitLGul28Qxb2r1e5g+" crossorigin="anonymous"> +<link href="//netdna.bootstrapcdn.com/font-awesome/4.0.3/css/font-awesome.css" rel="stylesheet"> +<link rel="stylesheet" type="text/css" href="https://cdn.datatables.net/v/bs/jq-2.2.3/dt-1.10.12/datatables.min.css"> +<link href="/css/accumulo.css" rel="stylesheet" type="text/css"> + +<title>Accumulo Documentation - Sampling</title> + +<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.2.4/jquery.min.js"></script> +<script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/js/bootstrap.min.js" integrity="sha384-Tc5IQib027qvyjSMfHjOMaLkfuWVxZxUPnCJA7l2mCWNIpG9mGCD8wGNIcPD7Txa" crossorigin="anonymous"></script> +<script type="text/javascript" src="https://cdn.datatables.net/v/bs/jq-2.2.3/dt-1.10.12/datatables.min.js"></script> +<script> + // show location of canonical site if not currently on the canonical site + $(function() { + var host = window.location.host; + if (typeof host !== 'undefined' && host !== 'accumulo.apache.org') { + $('#non-canonical').show(); + } + }); + + $(function() { + // decorate section headers with anchors + return $("h2, h3, h4, h5, h6").each(function(i, el) { + var $el, icon, id; + $el = $(el); + id = $el.attr('id'); + icon = '<i class="fa fa-link"></i>'; + if (id) { + return $el.append($("<a />").addClass("header-link").attr("href", "#" + id).html(icon)); + } + }); + }); + + // configure Google Analytics + (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){ + (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o), + m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) + })(window,document,'script','//www.google-analytics.com/analytics.js','ga'); + + if (ga.hasOwnProperty('loaded') && ga.loaded === true) { + ga('create', 'UA-50934829-1', 'apache.org'); + ga('send', 'pageview'); + } +</script> + +</head> +<body style="padding-top: 100px"> + + <nav class="navbar navbar-default navbar-fixed-top"> + <div class="container"> + <div class="navbar-header"> + <button type="button" class="navbar-toggle" data-toggle="collapse" data-target="#navbar-items"> + <span class="sr-only">Toggle navigation</span> + <span class="icon-bar"></span> + <span class="icon-bar"></span> + <span class="icon-bar"></span> + </button> + <a href="/"><img id="nav-logo" alt="Apache Accumulo" class="img-responsive" src="/images/accumulo-logo.png" width="200" + /></a> + </div> + <div class="collapse navbar-collapse" id="navbar-items"> + <ul class="nav navbar-nav"> + <li class="nav-link"><a href="/downloads">Download</a></li> + <li class="dropdown"> + <a class="dropdown-toggle" data-toggle="dropdown" href="#">Releases<span class="caret"></span></a> + <ul class="dropdown-menu"> + <li><a href="/release/accumulo-1.8.1/">1.8.1 (Latest)</a></li> + <li><a href="/release/accumulo-1.7.3/">1.7.3</a></li> + <li><a href="/release/accumulo-1.6.6/">1.6.6</a></li> + <li><a href="/release/">Archive</a></li> + </ul> + </li> + <li class="dropdown"> + <a class="dropdown-toggle" data-toggle="dropdown" href="#">Documentation<span class="caret"></span></a> + <ul class="dropdown-menu"> + <li><a href="/1.8/accumulo_user_manual.html">User Manual (1.8)</a></li> + <li><a href="/1.8/apidocs">Javadocs (1.8)</a></li> + <li><a href="/1.8/examples">Examples (1.8)</a></li> + <li><a href="/features">Features</a></li> + <li><a href="/glossary">Glossary</a></li> + <li><a href="/external-docs">External Docs</a></li> + <li><a href="/docs-archive/">Archive</a></li> + </ul> + </li> + <li class="dropdown"> + <a class="dropdown-toggle" data-toggle="dropdown" href="#">Community<span class="caret"></span></a> + <ul class="dropdown-menu"> + <li><a href="/get_involved">Get Involved</a></li> + <li><a href="/mailing_list">Mailing Lists</a></li> + <li><a href="/people">People</a></li> + <li><a href="/related-projects">Related Projects</a></li> + <li><a href="/contributor/">Contributor Guide</a></li> + </ul> + </li> + </ul> + <ul class="nav navbar-nav navbar-right"> + <li class="dropdown"> + <a class="dropdown-toggle" data-toggle="dropdown" href="#">Apache Software Foundation<span class="caret"></span></a> + <ul class="dropdown-menu"> + <li><a href="https://www.apache.org">Apache Homepage <i class="fa fa-external-link"></i></a></li> + <li><a href="https://www.apache.org/licenses/LICENSE-2.0">License <i class="fa fa-external-link"></i></a></li> + <li><a href="https://www.apache.org/foundation/sponsorship">Sponsorship <i class="fa fa-external-link"></i></a></li> + <li><a href="https://www.apache.org/security">Security <i class="fa fa-external-link"></i></a></li> + <li><a href="https://www.apache.org/foundation/thanks">Thanks <i class="fa fa-external-link"></i></a></li> + <li><a href="https://www.apache.org/foundation/policies/conduct">Code of Conduct <i class="fa fa-external-link"></i></a></li> + </ul> + </li> + </ul> + </div> + </div> +</nav> + + <div class="container"> + <div class="row"> + <div class="col-md-12"> + + <div id="non-canonical" style="display: none; background-color: #F0E68C; padding-left: 1em;"> + Visit the official site at: <a href="https://accumulo.apache.org">https://accumulo.apache.org</a> + </div> + <div id="content"> + + <div class="alert alert-danger" role="alert">This documentation is for an unreleased version of Apache Accumulo that is currently under development! Check out the <a href="/docs-1.8/">documentation for the latest release</a>.</div> + +<div class="row"> + <div class="col-md-3"> + <div class="panel-group" id="accordion" role="tablist" aria-multiselectable="true"> + <div class="panel panel-default"> + + + + + + + + + + <div class="panel-heading" role="tab" id="headingOne"> + <h4 class="panel-title"> + <a role="button" data-toggle="collapse" data-parent="#accordion" href="#collapsegetting-started" aria-expanded="false" aria-controls="collapsegetting-started"> + Getting started + </a> + </h4> + </div> + <div id="collapsegetting-started" class="panel-collapse collapse" role="tabpanel" aria-labelledby="headingOne"> + <div class="panel-body"> + + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/getting-started/design">Accumulo Design</a></div> + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/getting-started/clients">Accumulo Clients</a></div> + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/getting-started/shell">Accumulo Shell</a></div> + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/getting-started/table_design">Table Design</a></div> + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/getting-started/table_configuration">Table Configuration</a></div> + + </div> + </div> + + + + + + + + + + + + <div class="panel-heading" role="tab" id="headingOne"> + <h4 class="panel-title"> + <a role="button" data-toggle="collapse" data-parent="#accordion" href="#collapsedevelopment" aria-expanded="true" aria-controls="collapsedevelopment"> + Development + </a> + </h4> + </div> + <div id="collapsedevelopment" class="panel-collapse collapse in" role="tabpanel" aria-labelledby="headingOne"> + <div class="panel-body"> + + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/development/iterator_design">Iterator Design</a></div> + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/development/iterator_testing">Iterator Testing</a></div> + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/development/development_tools">Development Tools</a></div> + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/development/sampling">Sampling</a></div> + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/development/summaries">Summary Statistics</a></div> + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/development/security">Security</a></div> + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/development/high_speed_ingest">High-Speed Ingest</a></div> + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/development/analytics">Analytics</a></div> + + </div> + </div> + + + + + + + + + + + + <div class="panel-heading" role="tab" id="headingOne"> + <h4 class="panel-title"> + <a role="button" data-toggle="collapse" data-parent="#accordion" href="#collapseadministration" aria-expanded="false" aria-controls="collapseadministration"> + Administration + </a> + </h4> + </div> + <div id="collapseadministration" class="panel-collapse collapse" role="tabpanel" aria-labelledby="headingOne"> + <div class="panel-body"> + + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/administration/overview">Overview</a></div> + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/administration/configuration-management">Configuration Management</a></div> + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/administration/configuration-properties">Configuration Properties</a></div> + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/administration/kerberos">Kerberos</a></div> + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/administration/replication">Replication</a></div> + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/administration/fate">FATE</a></div> + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/administration/multivolume">Multi-Volume Installations</a></div> + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/administration/ssl">SSL</a></div> + + </div> + </div> + + + + + + + + + + + + + + + + + + + + + + <div class="panel-heading" role="tab" id="headingOne"> + <h4 class="panel-title"> + <a role="button" data-toggle="collapse" data-parent="#accordion" href="#collapsetroubleshooting" aria-expanded="false" aria-controls="collapsetroubleshooting"> + Troubleshooting + </a> + </h4> + </div> + <div id="collapsetroubleshooting" class="panel-collapse collapse" role="tabpanel" aria-labelledby="headingOne"> + <div class="panel-body"> + + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/troubleshooting/overview">Overview</a></div> + + </div> + </div> + + + + </div> + </div> + </div> + <div class="col-md-9"> + + <p><a href="/docs/unreleased/">Accumulo unreleased docs</a> >> Development >> Sampling</p> + + + <h1>Sampling</h1> + + <h2 id="overview">Overview</h2> + +<p>Accumulo has the ability to generate and scan a per table set of sample data. +This sample data is kept up to date as a table is mutated. What key values are +placed in the sample data is configurable per table.</p> + +<p>This feature can be used for query estimation and optimization. For an example +of estimation assume an Accumulo table is configured to generate a sample +containing one millionth of a tables data. If a query is executed against the +sample and returns one thousand results, then the same query against all the +data would probably return a billion results. A nice property of having +Accumulo generate the sample is that its always up to date. So estimations +will be accurate even when querying the most recently written data.</p> + +<p>An example of a query optimization is an iterator using sample data to get an +estimate, and then making decisions based on the estimate.</p> + +<h2 id="configuring">Configuring</h2> + +<p>Inorder to use sampling, an Accumulo table must be configured with a class that +implements <code class="highlighter-rouge">org.apache.accumulo.core.sample.Sampler</code> along with options for +that class. For guidance on implementing a Sampler see that interfaceâs +javadoc. Accumulo provides a few implementations out of the box. For +information on how to use the samplers that ship with Accumulo look in the +package <code class="highlighter-rouge">org.apache.accumulo.core.sample</code> and consult the javadoc of the +classes there. See the <a href="https://github.com/apache/accumulo-examples/blob/master/docs/sample.md">sampling example</a> for examples of how to +configure a Sampler on a table.</p> + +<p>Once a table is configured with a sampler all writes after that point will +generate sample data. For data written before sampling was configured sample +data will not be present. A compaction can be initiated that only compacts the +files in the table that do not have sample data. The example readme shows how +to do this.</p> + +<p>If the sampling configuration of a table is changed, then Accumulo will start +generating new sample data with the new configuration. However old data will +still have sample data generated with the previous configuration. A selective +compaction can also be issued in this case to regenerate the sample data.</p> + +<h2 id="scanning-sample-data">Scanning sample data</h2> + +<p>Inorder to scan sample data, use the <code class="highlighter-rouge">setSamplerConfiguration(...)</code> method on +<code class="highlighter-rouge">Scanner</code> or <code class="highlighter-rouge">BatchScanner</code>. Please consult this methods javadocs for more +information.</p> + +<p>Sample data can also be scanned from within an Accumulo <code class="highlighter-rouge">SortedKeyValueIterator</code>. +To see how to do this, look at the example iterator referenced in the <a href="https://github.com/apache/accumulo-examples/blob/master/docs/sample.md">sampling example</a>. +Also, consult the javadoc on <code class="highlighter-rouge">org.apache.accumulo.core.iterators.IteratorEnvironment.cloneWithSamplingEnabled()</code>.</p> + +<p>Map reduce jobs using the <code class="highlighter-rouge">AccumuloInputFormat</code> can also read sample data. See +the javadoc for the <code class="highlighter-rouge">setSamplerConfiguration()</code> method on +<code class="highlighter-rouge">AccumuloInputFormat</code>.</p> + +<p>Scans over sample data will throw a <code class="highlighter-rouge">SampleNotPresentException</code> in the following cases :</p> + +<ol> + <li>sample data is not present,</li> + <li>sample data is present but was generated with multiple configurations</li> + <li>sample data is partially present</li> +</ol> + +<p>So a scan over sample data can only succeed if all data written has sample data +generated with the same configuration.</p> + +<h2 id="bulk-import">Bulk import</h2> + +<p>When generating rfiles to bulk import into Accumulo, those rfiles can contain +sample data. To use this feature, look at the javadoc on the +<code class="highlighter-rouge">AccumuloFileOutputFormat.setSampler(...)</code> method.</p> + + + </div> +</div> + + </div> + + +<footer> + + <p><a href="https://www.apache.org/foundation/contributing"><img src="https://www.apache.org/images/SupportApache-small.png" alt="Support the ASF" id="asf-logo" height="100" /></a></p> + + <p>Copyright © 2011-2017 The Apache Software Foundation. Licensed under the <a href="https://www.apache.org/licenses/LICENSE-2.0">Apache License, Version 2.0</a>.</p> + +</footer> + + + </div> + </div> + </div> +</body> +</html>
http://git-wip-us.apache.org/repos/asf/accumulo-website/blob/7b2eb317/docs/unreleased/development/security.html ---------------------------------------------------------------------- diff --git a/docs/unreleased/development/security.html b/docs/unreleased/development/security.html new file mode 100644 index 0000000..1f61c21 --- /dev/null +++ b/docs/unreleased/development/security.html @@ -0,0 +1,497 @@ +<!DOCTYPE html> +<html lang="en"> +<head> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<meta charset="utf-8"> +<meta http-equiv="X-UA-Compatible" content="IE=edge"> +<meta name="viewport" content="width=device-width, initial-scale=1"> +<link href="https://maxcdn.bootstrapcdn.com/bootswatch/3.3.7/paper/bootstrap.min.css" rel="stylesheet" integrity="sha384-awusxf8AUojygHf2+joICySzB780jVvQaVCAt1clU3QsyAitLGul28Qxb2r1e5g+" crossorigin="anonymous"> +<link href="//netdna.bootstrapcdn.com/font-awesome/4.0.3/css/font-awesome.css" rel="stylesheet"> +<link rel="stylesheet" type="text/css" href="https://cdn.datatables.net/v/bs/jq-2.2.3/dt-1.10.12/datatables.min.css"> +<link href="/css/accumulo.css" rel="stylesheet" type="text/css"> + +<title>Accumulo Documentation - Security</title> + +<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.2.4/jquery.min.js"></script> +<script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/js/bootstrap.min.js" integrity="sha384-Tc5IQib027qvyjSMfHjOMaLkfuWVxZxUPnCJA7l2mCWNIpG9mGCD8wGNIcPD7Txa" crossorigin="anonymous"></script> +<script type="text/javascript" src="https://cdn.datatables.net/v/bs/jq-2.2.3/dt-1.10.12/datatables.min.js"></script> +<script> + // show location of canonical site if not currently on the canonical site + $(function() { + var host = window.location.host; + if (typeof host !== 'undefined' && host !== 'accumulo.apache.org') { + $('#non-canonical').show(); + } + }); + + $(function() { + // decorate section headers with anchors + return $("h2, h3, h4, h5, h6").each(function(i, el) { + var $el, icon, id; + $el = $(el); + id = $el.attr('id'); + icon = '<i class="fa fa-link"></i>'; + if (id) { + return $el.append($("<a />").addClass("header-link").attr("href", "#" + id).html(icon)); + } + }); + }); + + // configure Google Analytics + (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){ + (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o), + m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) + })(window,document,'script','//www.google-analytics.com/analytics.js','ga'); + + if (ga.hasOwnProperty('loaded') && ga.loaded === true) { + ga('create', 'UA-50934829-1', 'apache.org'); + ga('send', 'pageview'); + } +</script> + +</head> +<body style="padding-top: 100px"> + + <nav class="navbar navbar-default navbar-fixed-top"> + <div class="container"> + <div class="navbar-header"> + <button type="button" class="navbar-toggle" data-toggle="collapse" data-target="#navbar-items"> + <span class="sr-only">Toggle navigation</span> + <span class="icon-bar"></span> + <span class="icon-bar"></span> + <span class="icon-bar"></span> + </button> + <a href="/"><img id="nav-logo" alt="Apache Accumulo" class="img-responsive" src="/images/accumulo-logo.png" width="200" + /></a> + </div> + <div class="collapse navbar-collapse" id="navbar-items"> + <ul class="nav navbar-nav"> + <li class="nav-link"><a href="/downloads">Download</a></li> + <li class="dropdown"> + <a class="dropdown-toggle" data-toggle="dropdown" href="#">Releases<span class="caret"></span></a> + <ul class="dropdown-menu"> + <li><a href="/release/accumulo-1.8.1/">1.8.1 (Latest)</a></li> + <li><a href="/release/accumulo-1.7.3/">1.7.3</a></li> + <li><a href="/release/accumulo-1.6.6/">1.6.6</a></li> + <li><a href="/release/">Archive</a></li> + </ul> + </li> + <li class="dropdown"> + <a class="dropdown-toggle" data-toggle="dropdown" href="#">Documentation<span class="caret"></span></a> + <ul class="dropdown-menu"> + <li><a href="/1.8/accumulo_user_manual.html">User Manual (1.8)</a></li> + <li><a href="/1.8/apidocs">Javadocs (1.8)</a></li> + <li><a href="/1.8/examples">Examples (1.8)</a></li> + <li><a href="/features">Features</a></li> + <li><a href="/glossary">Glossary</a></li> + <li><a href="/external-docs">External Docs</a></li> + <li><a href="/docs-archive/">Archive</a></li> + </ul> + </li> + <li class="dropdown"> + <a class="dropdown-toggle" data-toggle="dropdown" href="#">Community<span class="caret"></span></a> + <ul class="dropdown-menu"> + <li><a href="/get_involved">Get Involved</a></li> + <li><a href="/mailing_list">Mailing Lists</a></li> + <li><a href="/people">People</a></li> + <li><a href="/related-projects">Related Projects</a></li> + <li><a href="/contributor/">Contributor Guide</a></li> + </ul> + </li> + </ul> + <ul class="nav navbar-nav navbar-right"> + <li class="dropdown"> + <a class="dropdown-toggle" data-toggle="dropdown" href="#">Apache Software Foundation<span class="caret"></span></a> + <ul class="dropdown-menu"> + <li><a href="https://www.apache.org">Apache Homepage <i class="fa fa-external-link"></i></a></li> + <li><a href="https://www.apache.org/licenses/LICENSE-2.0">License <i class="fa fa-external-link"></i></a></li> + <li><a href="https://www.apache.org/foundation/sponsorship">Sponsorship <i class="fa fa-external-link"></i></a></li> + <li><a href="https://www.apache.org/security">Security <i class="fa fa-external-link"></i></a></li> + <li><a href="https://www.apache.org/foundation/thanks">Thanks <i class="fa fa-external-link"></i></a></li> + <li><a href="https://www.apache.org/foundation/policies/conduct">Code of Conduct <i class="fa fa-external-link"></i></a></li> + </ul> + </li> + </ul> + </div> + </div> +</nav> + + <div class="container"> + <div class="row"> + <div class="col-md-12"> + + <div id="non-canonical" style="display: none; background-color: #F0E68C; padding-left: 1em;"> + Visit the official site at: <a href="https://accumulo.apache.org">https://accumulo.apache.org</a> + </div> + <div id="content"> + + <div class="alert alert-danger" role="alert">This documentation is for an unreleased version of Apache Accumulo that is currently under development! Check out the <a href="/docs-1.8/">documentation for the latest release</a>.</div> + +<div class="row"> + <div class="col-md-3"> + <div class="panel-group" id="accordion" role="tablist" aria-multiselectable="true"> + <div class="panel panel-default"> + + + + + + + + + + <div class="panel-heading" role="tab" id="headingOne"> + <h4 class="panel-title"> + <a role="button" data-toggle="collapse" data-parent="#accordion" href="#collapsegetting-started" aria-expanded="false" aria-controls="collapsegetting-started"> + Getting started + </a> + </h4> + </div> + <div id="collapsegetting-started" class="panel-collapse collapse" role="tabpanel" aria-labelledby="headingOne"> + <div class="panel-body"> + + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/getting-started/design">Accumulo Design</a></div> + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/getting-started/clients">Accumulo Clients</a></div> + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/getting-started/shell">Accumulo Shell</a></div> + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/getting-started/table_design">Table Design</a></div> + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/getting-started/table_configuration">Table Configuration</a></div> + + </div> + </div> + + + + + + + + + + + + <div class="panel-heading" role="tab" id="headingOne"> + <h4 class="panel-title"> + <a role="button" data-toggle="collapse" data-parent="#accordion" href="#collapsedevelopment" aria-expanded="true" aria-controls="collapsedevelopment"> + Development + </a> + </h4> + </div> + <div id="collapsedevelopment" class="panel-collapse collapse in" role="tabpanel" aria-labelledby="headingOne"> + <div class="panel-body"> + + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/development/iterator_design">Iterator Design</a></div> + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/development/iterator_testing">Iterator Testing</a></div> + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/development/development_tools">Development Tools</a></div> + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/development/sampling">Sampling</a></div> + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/development/summaries">Summary Statistics</a></div> + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/development/security">Security</a></div> + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/development/high_speed_ingest">High-Speed Ingest</a></div> + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/development/analytics">Analytics</a></div> + + </div> + </div> + + + + + + + + + + + + <div class="panel-heading" role="tab" id="headingOne"> + <h4 class="panel-title"> + <a role="button" data-toggle="collapse" data-parent="#accordion" href="#collapseadministration" aria-expanded="false" aria-controls="collapseadministration"> + Administration + </a> + </h4> + </div> + <div id="collapseadministration" class="panel-collapse collapse" role="tabpanel" aria-labelledby="headingOne"> + <div class="panel-body"> + + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/administration/overview">Overview</a></div> + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/administration/configuration-management">Configuration Management</a></div> + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/administration/configuration-properties">Configuration Properties</a></div> + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/administration/kerberos">Kerberos</a></div> + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/administration/replication">Replication</a></div> + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/administration/fate">FATE</a></div> + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/administration/multivolume">Multi-Volume Installations</a></div> + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/administration/ssl">SSL</a></div> + + </div> + </div> + + + + + + + + + + + + + + + + + + + + + + <div class="panel-heading" role="tab" id="headingOne"> + <h4 class="panel-title"> + <a role="button" data-toggle="collapse" data-parent="#accordion" href="#collapsetroubleshooting" aria-expanded="false" aria-controls="collapsetroubleshooting"> + Troubleshooting + </a> + </h4> + </div> + <div id="collapsetroubleshooting" class="panel-collapse collapse" role="tabpanel" aria-labelledby="headingOne"> + <div class="panel-body"> + + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/troubleshooting/overview">Overview</a></div> + + </div> + </div> + + + + </div> + </div> + </div> + <div class="col-md-9"> + + <p><a href="/docs/unreleased/">Accumulo unreleased docs</a> >> Development >> Security</p> + + + <h1>Security</h1> + + <p>Accumulo extends the BigTable data model to implement a security mechanism +known as cell-level security. Every key-value pair has its own security label, stored +under the column visibility element of the key, which is used to determine whether +a given user meets the security requirements to read the value. This enables data of +various security levels to be stored within the same row, and users of varying +degrees of access to query the same table, while preserving data confidentiality.</p> + +<h2 id="security-label-expressions">Security Label Expressions</h2> + +<p>When mutations are applied, users can specify a security label for each value. This is +done as the Mutation is created by passing a ColumnVisibility object to the put() +method:</p> + +<div class="language-java highlighter-rouge"><pre class="highlight"><code><span class="n">Text</span> <span class="n">rowID</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Text</span><span class="o">(</span><span class="s">"row1"</span><span class="o">);</span> +<span class="n">Text</span> <span class="n">colFam</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Text</span><span class="o">(</span><span class="s">"myColFam"</span><span class="o">);</span> +<span class="n">Text</span> <span class="n">colQual</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Text</span><span class="o">(</span><span class="s">"myColQual"</span><span class="o">);</span> +<span class="n">ColumnVisibility</span> <span class="n">colVis</span> <span class="o">=</span> <span class="k">new</span> <span class="n">ColumnVisibility</span><span class="o">(</span><span class="s">"public"</span><span class="o">);</span> +<span class="kt">long</span> <span class="n">timestamp</span> <span class="o">=</span> <span class="n">System</span><span class="o">.</span><span class="na">currentTimeMillis</span><span class="o">();</span> + +<span class="n">Value</span> <span class="n">value</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Value</span><span class="o">(</span><span class="s">"myValue"</span><span class="o">);</span> + +<span class="n">Mutation</span> <span class="n">mutation</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Mutation</span><span class="o">(</span><span class="n">rowID</span><span class="o">);</span> +<span class="n">mutation</span><span class="o">.</span><span class="na">put</span><span class="o">(</span><span class="n">colFam</span><span class="o">,</span> <span class="n">colQual</span><span class="o">,</span> <span class="n">colVis</span><span class="o">,</span> <span class="n">timestamp</span><span class="o">,</span> <span class="n">value</span><span class="o">);</span> +</code></pre> +</div> + +<h2 id="security-label-expression-syntax">Security Label Expression Syntax</h2> + +<p>Security labels consist of a set of user-defined tokens that are required to read the +value the label is associated with. The set of tokens required can be specified using +syntax that supports logical AND <code class="highlighter-rouge">&</code> and OR <code class="highlighter-rouge">|</code> combinations of terms, as +well as nesting groups <code class="highlighter-rouge">()</code> of terms together.</p> + +<p>Each term is comprised of one to many alpha-numeric characters, hyphens, underscores or +periods. Optionally, each term may be wrapped in quotation marks +which removes the restriction on valid characters. In quoted terms, quotation marks +and backslash characters can be used as characters in the term by escaping them +with a backslash.</p> + +<p>For example, suppose within our organization we want to label our data values with +security labels defined in terms of user roles. We might have tokens such as:</p> + +<div class="highlighter-rouge"><pre class="highlight"><code>admin +audit +system +</code></pre> +</div> + +<p>These can be specified alone or combined using logical operators:</p> + +<div class="highlighter-rouge"><pre class="highlight"><code>// Users must have admin privileges +admin + +// Users must have admin and audit privileges +admin&audit + +// Users with either admin or audit privileges +admin|audit + +// Users must have audit and one or both of admin or system +(admin|system)&audit +</code></pre> +</div> + +<p>When both <code class="highlighter-rouge">|</code> and <code class="highlighter-rouge">&</code> operators are used, parentheses must be used to specify +precedence of the operators.</p> + +<h2 id="authorization">Authorization</h2> + +<p>When clients attempt to read data from Accumulo, any security labels present are +examined against the set of authorizations passed by the client code when the +Scanner or BatchScanner are created. If the authorizations are determined to be +insufficient to satisfy the security label, the value is suppressed from the set of +results sent back to the client.</p> + +<p>Authorizations are specified as a comma-separated list of tokens the user possesses:</p> + +<div class="language-java highlighter-rouge"><pre class="highlight"><code><span class="c1">// user possesses both admin and system level access</span> +<span class="n">Authorization</span> <span class="n">auths</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Authorization</span><span class="o">(</span><span class="s">"admin"</span><span class="o">,</span><span class="s">"system"</span><span class="o">);</span> + +<span class="n">Scanner</span> <span class="n">s</span> <span class="o">=</span> <span class="n">connector</span><span class="o">.</span><span class="na">createScanner</span><span class="o">(</span><span class="s">"table"</span><span class="o">,</span> <span class="n">auths</span><span class="o">);</span> +</code></pre> +</div> + +<h2 id="user-authorizations">User Authorizations</h2> + +<p>Each Accumulo user has a set of associated security labels. To manipulate +these in the shell while using the default authorizor, use the setuaths and getauths commands. +These may also be modified for the default authorizor using the java security operations API.</p> + +<p>When a user creates a scanner a set of Authorizations is passed. If the +authorizations passed to the scanner are not a subset of the users +authorizations, then an exception will be thrown.</p> + +<p>To prevent users from writing data they can not read, add the visibility +constraint to a table. Use the -evc option in the createtable shell command to +enable this constraint. For existing tables use the following shell command to +enable the visibility constraint. Ensure the constraint number does not +conflict with any existing constraints.</p> + +<div class="highlighter-rouge"><pre class="highlight"><code>config -t table -s table.constraint.1=org.apache.accumulo.core.security.VisibilityConstraint +</code></pre> +</div> + +<p>Any user with the alter table permission can add or remove this constraint. +This constraint is not applied to bulk imported data, if this a concern then +disable the bulk import permission.</p> + +<h2 id="pluggable-security">Pluggable Security</h2> + +<p>New in 1.5 of Accumulo is a pluggable security mechanism. It can be broken into three actions â +authentication, authorization, and permission handling. By default all of these are handled in +Zookeeper, which is how things were handled in Accumulo 1.4 and before. It is worth noting at this +point, that it is a new feature in 1.5 and may be adjusted in future releases without the standard +deprecation cycle.</p> + +<p>Authentication simply handles the ability for a user to verify their integrity. A combination of +principal and authentication token are used to verify a user is who they say they are. An +authentication token should be constructed, either directly through its constructor, but it is +advised to use the <code class="highlighter-rouge">init(Property)</code> method to populate an authentication token. It is expected that a +user knows what the appropriate token to use for their system is. The default token is +<code class="highlighter-rouge">PasswordToken</code>.</p> + +<p>Once a user is authenticated by the Authenticator, the user has access to the other actions within +Accumulo. All actions in Accumulo are ACLed, and this ACL check is handled by the Permission +Handler. This is what manages all of the permissions, which are divided in system and per table +level. From there, if a user is doing an action which requires authorizations, the Authorizor is +queried to determine what authorizations the user has.</p> + +<p>This setup allows a variety of different mechanisms to be used for handling different aspects of +Accumuloâs security. A system like Kerberos can be used for authentication, then a system like LDAP +could be used to determine if a user has a specific permission, and then it may default back to the +default ZookeeperAuthorizor to determine what Authorizations a user is ultimately allowed to use. +This is a pluggable system so custom components can be created depending on your need.</p> + +<h2 id="secure-authorizations-handling">Secure Authorizations Handling</h2> + +<p>For applications serving many users, it is not expected that an Accumulo user +will be created for each application user. In this case an Accumulo user with +all authorizations needed by any of the applications users must be created. To +service queries, the application should create a scanner with the application +userâs authorizations. These authorizations could be obtained from a trusted 3rd +party.</p> + +<p>Often production systems will integrate with Public-Key Infrastructure (PKI) and +designate client code within the query layer to negotiate with PKI servers in order +to authenticate users and retrieve their authorization tokens (credentials). This +requires users to specify only the information necessary to authenticate themselves +to the system. Once user identity is established, their credentials can be accessed by +the client code and passed to Accumulo outside of the reach of the user.</p> + +<h2 id="query-services-layer">Query Services Layer</h2> + +<p>Since the primary method of interaction with Accumulo is through the Java API, +production environments often call for the implementation of a Query layer. This +can be done using web services in containers such as Apache Tomcat, but is not a +requirement. The Query Services Layer provides a mechanism for providing a +platform on which user facing applications can be built. This allows the application +designers to isolate potentially complex query logic, and enables a convenient point +at which to perform essential security functions.</p> + +<p>Several production environments choose to implement authentication at this layer, +where users identifiers are used to retrieve their access credentials which are then +cached within the query layer and presented to Accumulo through the +Authorizations mechanism.</p> + +<p>Typically, the query services layer sits between Accumulo and user workstations.</p> + + </div> +</div> + + </div> + + +<footer> + + <p><a href="https://www.apache.org/foundation/contributing"><img src="https://www.apache.org/images/SupportApache-small.png" alt="Support the ASF" id="asf-logo" height="100" /></a></p> + + <p>Copyright © 2011-2017 The Apache Software Foundation. Licensed under the <a href="https://www.apache.org/licenses/LICENSE-2.0">Apache License, Version 2.0</a>.</p> + +</footer> + + + </div> + </div> + </div> +</body> +</html> http://git-wip-us.apache.org/repos/asf/accumulo-website/blob/7b2eb317/docs/unreleased/development/summaries.html ---------------------------------------------------------------------- diff --git a/docs/unreleased/development/summaries.html b/docs/unreleased/development/summaries.html new file mode 100644 index 0000000..d8adda8 --- /dev/null +++ b/docs/unreleased/development/summaries.html @@ -0,0 +1,554 @@ +<!DOCTYPE html> +<html lang="en"> +<head> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<meta charset="utf-8"> +<meta http-equiv="X-UA-Compatible" content="IE=edge"> +<meta name="viewport" content="width=device-width, initial-scale=1"> +<link href="https://maxcdn.bootstrapcdn.com/bootswatch/3.3.7/paper/bootstrap.min.css" rel="stylesheet" integrity="sha384-awusxf8AUojygHf2+joICySzB780jVvQaVCAt1clU3QsyAitLGul28Qxb2r1e5g+" crossorigin="anonymous"> +<link href="//netdna.bootstrapcdn.com/font-awesome/4.0.3/css/font-awesome.css" rel="stylesheet"> +<link rel="stylesheet" type="text/css" href="https://cdn.datatables.net/v/bs/jq-2.2.3/dt-1.10.12/datatables.min.css"> +<link href="/css/accumulo.css" rel="stylesheet" type="text/css"> + +<title>Accumulo Documentation - Summary Statistics</title> + +<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.2.4/jquery.min.js"></script> +<script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/js/bootstrap.min.js" integrity="sha384-Tc5IQib027qvyjSMfHjOMaLkfuWVxZxUPnCJA7l2mCWNIpG9mGCD8wGNIcPD7Txa" crossorigin="anonymous"></script> +<script type="text/javascript" src="https://cdn.datatables.net/v/bs/jq-2.2.3/dt-1.10.12/datatables.min.js"></script> +<script> + // show location of canonical site if not currently on the canonical site + $(function() { + var host = window.location.host; + if (typeof host !== 'undefined' && host !== 'accumulo.apache.org') { + $('#non-canonical').show(); + } + }); + + $(function() { + // decorate section headers with anchors + return $("h2, h3, h4, h5, h6").each(function(i, el) { + var $el, icon, id; + $el = $(el); + id = $el.attr('id'); + icon = '<i class="fa fa-link"></i>'; + if (id) { + return $el.append($("<a />").addClass("header-link").attr("href", "#" + id).html(icon)); + } + }); + }); + + // configure Google Analytics + (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){ + (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o), + m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) + })(window,document,'script','//www.google-analytics.com/analytics.js','ga'); + + if (ga.hasOwnProperty('loaded') && ga.loaded === true) { + ga('create', 'UA-50934829-1', 'apache.org'); + ga('send', 'pageview'); + } +</script> + +</head> +<body style="padding-top: 100px"> + + <nav class="navbar navbar-default navbar-fixed-top"> + <div class="container"> + <div class="navbar-header"> + <button type="button" class="navbar-toggle" data-toggle="collapse" data-target="#navbar-items"> + <span class="sr-only">Toggle navigation</span> + <span class="icon-bar"></span> + <span class="icon-bar"></span> + <span class="icon-bar"></span> + </button> + <a href="/"><img id="nav-logo" alt="Apache Accumulo" class="img-responsive" src="/images/accumulo-logo.png" width="200" + /></a> + </div> + <div class="collapse navbar-collapse" id="navbar-items"> + <ul class="nav navbar-nav"> + <li class="nav-link"><a href="/downloads">Download</a></li> + <li class="dropdown"> + <a class="dropdown-toggle" data-toggle="dropdown" href="#">Releases<span class="caret"></span></a> + <ul class="dropdown-menu"> + <li><a href="/release/accumulo-1.8.1/">1.8.1 (Latest)</a></li> + <li><a href="/release/accumulo-1.7.3/">1.7.3</a></li> + <li><a href="/release/accumulo-1.6.6/">1.6.6</a></li> + <li><a href="/release/">Archive</a></li> + </ul> + </li> + <li class="dropdown"> + <a class="dropdown-toggle" data-toggle="dropdown" href="#">Documentation<span class="caret"></span></a> + <ul class="dropdown-menu"> + <li><a href="/1.8/accumulo_user_manual.html">User Manual (1.8)</a></li> + <li><a href="/1.8/apidocs">Javadocs (1.8)</a></li> + <li><a href="/1.8/examples">Examples (1.8)</a></li> + <li><a href="/features">Features</a></li> + <li><a href="/glossary">Glossary</a></li> + <li><a href="/external-docs">External Docs</a></li> + <li><a href="/docs-archive/">Archive</a></li> + </ul> + </li> + <li class="dropdown"> + <a class="dropdown-toggle" data-toggle="dropdown" href="#">Community<span class="caret"></span></a> + <ul class="dropdown-menu"> + <li><a href="/get_involved">Get Involved</a></li> + <li><a href="/mailing_list">Mailing Lists</a></li> + <li><a href="/people">People</a></li> + <li><a href="/related-projects">Related Projects</a></li> + <li><a href="/contributor/">Contributor Guide</a></li> + </ul> + </li> + </ul> + <ul class="nav navbar-nav navbar-right"> + <li class="dropdown"> + <a class="dropdown-toggle" data-toggle="dropdown" href="#">Apache Software Foundation<span class="caret"></span></a> + <ul class="dropdown-menu"> + <li><a href="https://www.apache.org">Apache Homepage <i class="fa fa-external-link"></i></a></li> + <li><a href="https://www.apache.org/licenses/LICENSE-2.0">License <i class="fa fa-external-link"></i></a></li> + <li><a href="https://www.apache.org/foundation/sponsorship">Sponsorship <i class="fa fa-external-link"></i></a></li> + <li><a href="https://www.apache.org/security">Security <i class="fa fa-external-link"></i></a></li> + <li><a href="https://www.apache.org/foundation/thanks">Thanks <i class="fa fa-external-link"></i></a></li> + <li><a href="https://www.apache.org/foundation/policies/conduct">Code of Conduct <i class="fa fa-external-link"></i></a></li> + </ul> + </li> + </ul> + </div> + </div> +</nav> + + <div class="container"> + <div class="row"> + <div class="col-md-12"> + + <div id="non-canonical" style="display: none; background-color: #F0E68C; padding-left: 1em;"> + Visit the official site at: <a href="https://accumulo.apache.org">https://accumulo.apache.org</a> + </div> + <div id="content"> + + <div class="alert alert-danger" role="alert">This documentation is for an unreleased version of Apache Accumulo that is currently under development! Check out the <a href="/docs-1.8/">documentation for the latest release</a>.</div> + +<div class="row"> + <div class="col-md-3"> + <div class="panel-group" id="accordion" role="tablist" aria-multiselectable="true"> + <div class="panel panel-default"> + + + + + + + + + + <div class="panel-heading" role="tab" id="headingOne"> + <h4 class="panel-title"> + <a role="button" data-toggle="collapse" data-parent="#accordion" href="#collapsegetting-started" aria-expanded="false" aria-controls="collapsegetting-started"> + Getting started + </a> + </h4> + </div> + <div id="collapsegetting-started" class="panel-collapse collapse" role="tabpanel" aria-labelledby="headingOne"> + <div class="panel-body"> + + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/getting-started/design">Accumulo Design</a></div> + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/getting-started/clients">Accumulo Clients</a></div> + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/getting-started/shell">Accumulo Shell</a></div> + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/getting-started/table_design">Table Design</a></div> + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/getting-started/table_configuration">Table Configuration</a></div> + + </div> + </div> + + + + + + + + + + + + <div class="panel-heading" role="tab" id="headingOne"> + <h4 class="panel-title"> + <a role="button" data-toggle="collapse" data-parent="#accordion" href="#collapsedevelopment" aria-expanded="true" aria-controls="collapsedevelopment"> + Development + </a> + </h4> + </div> + <div id="collapsedevelopment" class="panel-collapse collapse in" role="tabpanel" aria-labelledby="headingOne"> + <div class="panel-body"> + + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/development/iterator_design">Iterator Design</a></div> + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/development/iterator_testing">Iterator Testing</a></div> + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/development/development_tools">Development Tools</a></div> + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/development/sampling">Sampling</a></div> + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/development/summaries">Summary Statistics</a></div> + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/development/security">Security</a></div> + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/development/high_speed_ingest">High-Speed Ingest</a></div> + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/development/analytics">Analytics</a></div> + + </div> + </div> + + + + + + + + + + + + <div class="panel-heading" role="tab" id="headingOne"> + <h4 class="panel-title"> + <a role="button" data-toggle="collapse" data-parent="#accordion" href="#collapseadministration" aria-expanded="false" aria-controls="collapseadministration"> + Administration + </a> + </h4> + </div> + <div id="collapseadministration" class="panel-collapse collapse" role="tabpanel" aria-labelledby="headingOne"> + <div class="panel-body"> + + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/administration/overview">Overview</a></div> + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/administration/configuration-management">Configuration Management</a></div> + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/administration/configuration-properties">Configuration Properties</a></div> + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/administration/kerberos">Kerberos</a></div> + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/administration/replication">Replication</a></div> + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/administration/fate">FATE</a></div> + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/administration/multivolume">Multi-Volume Installations</a></div> + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/administration/ssl">SSL</a></div> + + </div> + </div> + + + + + + + + + + + + + + + + + + + + + + <div class="panel-heading" role="tab" id="headingOne"> + <h4 class="panel-title"> + <a role="button" data-toggle="collapse" data-parent="#accordion" href="#collapsetroubleshooting" aria-expanded="false" aria-controls="collapsetroubleshooting"> + Troubleshooting + </a> + </h4> + </div> + <div id="collapsetroubleshooting" class="panel-collapse collapse" role="tabpanel" aria-labelledby="headingOne"> + <div class="panel-body"> + + + <div class="row doc-sidebar-link"><a href="/docs/unreleased/troubleshooting/overview">Overview</a></div> + + </div> + </div> + + + + </div> + </div> + </div> + <div class="col-md-9"> + + <p><a href="/docs/unreleased/">Accumulo unreleased docs</a> >> Development >> Summary Statistics</p> + + + <h1>Summary Statistics</h1> + + <h2 id="overview">Overview</h2> + +<p>Accumulo has the ability to generate summary statistics about data in a table +using user defined functions. Currently these statistics are only generated for +data written to files. Data recently written to Accumulo that is still in +memory will not contribute to summary statistics.</p> + +<p>This feature can be used to inform a user about what data is in their table. +Summary statistics can also be used by compaction strategies to make decisions +about which files to compact.</p> + +<p>Summary data is stored in each file Accumulo produces. Accumulo can gather +summary information from across a cluster merging it along the way. In order +for this to be fast the, summary information should fit in cache. There is a +dedicated cache for summary data on each tserver with a configurable size. In +order for summary data to fit in cache, it should probably be small.</p> + +<p>For information on writing a custom summarizer see the javadoc for +<code class="highlighter-rouge">org.apache.accumulo.core.client.summary.Summarizer</code>. The package +<code class="highlighter-rouge">org.apache.accumulo.core.client.summary.summarizers</code> contains summarizer +implementations that ship with Accumulo and can be configured for use.</p> + +<h2 id="inaccuracies">Inaccuracies</h2> + +<p>Summary data can be inaccurate when files are missing summary data or when +files have extra summary data. Files can contain data outside of a tablets +boundaries. This can happen as result of bulk imported files and tablet splits. +When this happens, those files could contain extra summary information. +Accumulo offsets this some by storing summary information for multiple row +ranges per a file. However, the ranges are not granular enough to completely +offset extra data.</p> + +<p>Any source of inaccuracies is reported when summary information is requested. +In the shell examples below this can be seen on the <code class="highlighter-rouge">File Statistics</code> line. +For files missing summary information, the compact command in the shell has a +<code class="highlighter-rouge">--sf-no-summary</code> option. This options compacts files that do not have the +summary information configured for the table. The compact command also has the +<code class="highlighter-rouge">--sf-extra-summary</code> option which will compact files with extra summary +information.</p> + +<h2 id="configuring">Configuring</h2> + +<p>The following tablet server and table properties configure summarization.</p> + +<ul> + <li><a href="/docs/unreleased/administration/configuration-properties#tserver_cache_summary_size">tserver.cache.summary.size</a></li> + <li><a href="/docs/unreleased/administration/configuration-properties#tserver_summary_partition_threads">tserver.summary.partition.threads</a></li> + <li><a href="/docs/unreleased/administration/configuration-properties#tserver_summary_remote_threads">tserver.summary.remote.threads</a></li> + <li><a href="/docs/unreleased/administration/configuration-properties#tserver_summary_retreival_threads">tserver.summary.retrieval.threads</a></li> + <li><a href="/docs/unreleased/administration/configuration-properties#table_summarizer_prefix">table.summarizer.*</a></li> + <li><a href="/docs/unreleased/administration/configuration-properties#table_file_summary_maxSize">table.file.summary.maxSize</a></li> +</ul> + +<h2 id="permissions">Permissions</h2> + +<p>Because summary data may be derived from sensitive data, requesting summary data +requires a special permission. User must have the table permission +<code class="highlighter-rouge">GET_SUMMARIES</code> in order to retrieve summary data.</p> + +<h2 id="bulk-import">Bulk import</h2> + +<p>When generating rfiles to bulk import into Accumulo, those rfiles can contain +summary data. To use this feature, look at the javadoc on the +<code class="highlighter-rouge">AccumuloFileOutputFormat.setSummarizers(...)</code> method. Also, +<code class="highlighter-rouge">org.apache.accumulo.core.client.rfile.RFile</code> has options for creating RFiles +with embedded summary data.</p> + +<h2 id="examples">Examples</h2> + +<p>This example walks through using summarizers in the Accumulo shell. Below a +table is created and some data is inserted to summarize.</p> + +<div class="highlighter-rouge"><pre class="highlight"><code>root@uno> createtable summary_test +root@uno summary_test> setauths -u root -s PI,GEO,TIME +root@uno summary_test> insert 3b503bd name last Doe +root@uno summary_test> insert 3b503bd name first John +root@uno summary_test> insert 3b503bd contact address "123 Park Ave, NY, NY" -l PI&GEO +root@uno summary_test> insert 3b503bd date birth "1/11/1942" -l PI&TIME +root@uno summary_test> insert 3b503bd date married "5/11/1962" -l PI&TIME +root@uno summary_test> insert 3b503bd contact home_phone 1-123-456-7890 -l PI +root@uno summary_test> insert d5d18dd contact address "50 Lake Shore Dr, Chicago, IL" -l PI&GEO +root@uno summary_test> insert d5d18dd name first Jane +root@uno summary_test> insert d5d18dd name last Doe +root@uno summary_test> insert d5d18dd date birth 8/15/1969 -l PI&TIME +root@uno summary_test> scan -s PI,GEO,TIME +3b503bd contact:address [PI&GEO] 123 Park Ave, NY, NY +3b503bd contact:home_phone [PI] 1-123-456-7890 +3b503bd date:birth [PI&TIME] 1/11/1942 +3b503bd date:married [PI&TIME] 5/11/1962 +3b503bd name:first [] John +3b503bd name:last [] Doe +d5d18dd contact:address [PI&GEO] 50 Lake Shore Dr, Chicago, IL +d5d18dd date:birth [PI&TIME] 8/15/1969 +d5d18dd name:first [] Jane +d5d18dd name:last [] Doe +</code></pre> +</div> + +<p>After inserting the data, summaries are requested below. No summaries are returned.</p> + +<div class="highlighter-rouge"><pre class="highlight"><code>root@uno summary_test> summaries +</code></pre> +</div> + +<p>The visibility summarizer is configured below and the table is flushed. +Flushing the table creates a file creating summary data in the process. The +summary data returned counts how many times each column visibility occurred. +The statistics with a <code class="highlighter-rouge">c:</code> prefix are visibilities. The others are generic +statistics created by the CountingSummarizer that VisibilitySummarizer extends.</p> + +<div class="highlighter-rouge"><pre class="highlight"><code>root@uno summary_test> config -t summary_test -s table.summarizer.vis=org.apache.accumulo.core.client.summary.summarizers.VisibilitySummarizer +root@uno summary_test> summaries +root@uno summary_test> flush -w +2017-02-24 19:54:46,090 [shell.Shell] INFO : Flush of table summary_test completed. +root@uno summary_test> summaries +Summarizer : org.apache.accumulo.core.client.summary.summarizers.VisibilitySummarizer vis {} +File Statistics : [total:1, missing:0, extra:0, large:0] +Summary Statistics : + c: = 4 + c:PI = 1 + c:PI&GEO = 2 + c:PI&TIME = 3 + emitted = 10 + seen = 10 + tooLong = 0 + tooMany = 0 +</code></pre> +</div> + +<p>VisibilitySummarizer has an option <code class="highlighter-rouge">maxCounters</code> that determines the max number +of column visibilites it will track. Below this option is set and compaction +is forced to regenerate summary data. The new summary data only has three +visibilites and now the <code class="highlighter-rouge">tooMany</code> statistic is 4. This is the number of +visibilites that were not counted.</p> + +<div class="highlighter-rouge"><pre class="highlight"><code> root@uno summary_test> config -t summary_test -s table.summarizer.vis.opt.maxCounters=3 + root@uno summary_test> compact -w + 2017-02-24 19:54:46,267 [shell.Shell] INFO : Compacting table ... + 2017-02-24 19:54:47,127 [shell.Shell] INFO : Compaction of table summary_test completed for given range + root@uno summary_test> summaries + Summarizer : org.apache.accumulo.core.client.summary.summarizers.VisibilitySummarizer vis {maxCounters=3} + File Statistics : [total:1, missing:0, extra:0, large:0] + Summary Statistics : + c:PI = 1 + c:PI&GEO = 2 + c:PI&TIME = 3 + emitted = 10 + seen = 10 + tooLong = 0 + tooMany = 4 +</code></pre> +</div> + +<p>Another summarizer is configured below that tracks the number of deletes. Also +a compaction strategy that uses this summary data is configured. The +<code class="highlighter-rouge">TooManyDeletesCompactionStrategy</code> will force a compaction of the tablet when +the ratio of deletes to non-deletes is over 25%. This threshold is +configurable. Below a delete is added and its reflected in the statistics. In +this case there is 1 delete and 10 non-deletes, not enough to force a +compaction of the tablet.</p> + +<div class="highlighter-rouge"><pre class="highlight"><code>root@uno summary_test> config -t summary_test -s table.summarizer.del=org.apache.accumulo.core.client.summary.summarizers.DeletesSummarizer +root@uno summary_test> compact -w +2017-02-24 19:54:47,282 [shell.Shell] INFO : Compacting table ... +2017-02-24 19:54:49,236 [shell.Shell] INFO : Compaction of table summary_test completed for given range +root@uno summary_test> config -t summary_test -s table.compaction.major.ratio=10 +root@uno summary_test> config -t summary_test -s table.majc.compaction.strategy=org.apache.accumulo.tserver.compaction.strategies.TooManyDeletesCompactionStrategy +root@uno summary_test> deletemany -r d5d18dd -c date -f +[DELETED] d5d18dd date:birth [PI&TIME] +root@uno summary_test> flush -w +2017-02-24 19:54:49,686 [shell.Shell] INFO : Flush of table summary_test completed. +root@uno summary_test> summaries + Summarizer : org.apache.accumulo.core.client.summary.summarizers.VisibilitySummarizer vis {maxCounters=3} + File Statistics : [total:2, missing:0, extra:0, large:0] + Summary Statistics : + c:PI = 1 + c:PI&GEO = 2 + c:PI&TIME = 4 + emitted = 11 + seen = 11 + tooLong = 0 + tooMany = 4 + + Summarizer : org.apache.accumulo.core.client.summary.summarizers.DeletesSummarizer del {} + File Statistics : [total:2, missing:0, extra:0, large:0] + Summary Statistics : + deletes = 1 + total = 11 +</code></pre> +</div> + +<p>Some more deletes are added and the table is flushed below. This results in 4 +deletes and 10 non-deletes, which triggers a full compaction. A full +compaction of all files is the only time when delete markers are dropped. The +compaction ratio was set to 10 above to show that the number of files did not +trigger the compaction. After the compaction there no deletes 6 non-deletes.</p> + +<div class="highlighter-rouge"><pre class="highlight"><code>root@uno summary_test> deletemany -r d5d18dd -f +[DELETED] d5d18dd contact:address [PI&GEO] +[DELETED] d5d18dd name:first [] +[DELETED] d5d18dd name:last [] +root@uno summary_test> flush -w +2017-02-24 19:54:52,800 [shell.Shell] INFO : Flush of table summary_test completed. +root@uno summary_test> summaries + Summarizer : org.apache.accumulo.core.client.summary.summarizers.VisibilitySummarizer vis {maxCounters=3} + File Statistics : [total:1, missing:0, extra:0, large:0] + Summary Statistics : + c:PI = 1 + c:PI&GEO = 1 + c:PI&TIME = 2 + emitted = 6 + seen = 6 + tooLong = 0 + tooMany = 2 + + Summarizer : org.apache.accumulo.core.client.summary.summarizers.DeletesSummarizer del {} + File Statistics : [total:1, missing:0, extra:0, large:0] + Summary Statistics : + deletes = 0 + total = 6 +root@uno summary_test> +</code></pre> +</div> + + + </div> +</div> + + </div> + + +<footer> + + <p><a href="https://www.apache.org/foundation/contributing"><img src="https://www.apache.org/images/SupportApache-small.png" alt="Support the ASF" id="asf-logo" height="100" /></a></p> + + <p>Copyright © 2011-2017 The Apache Software Foundation. Licensed under the <a href="https://www.apache.org/licenses/LICENSE-2.0">Apache License, Version 2.0</a>.</p> + +</footer> + + + </div> + </div> + </div> +</body> +</html>