ACCUMULO-4612 Simplify Accumulo memory configuration * The MEMORY property type now supports percentage of max JVM memory for settings. Several properties now use percentage for default which allow default to work for various memory environments. * Created new BYTES property type which only supports fixed memory * Removed 'accumulo create-config' command as it no longer needed. * Removed default classpath settings from 'general.classpaths' property in accumulo-site.xml and deprecated property. Users can now configure CLASSPATH variable in accumulo-env.sh.
Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/f159ec1f Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/f159ec1f Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/f159ec1f Branch: refs/heads/master Commit: f159ec1fd23d0f4bad34f22c72ee52dad1f9bfc9 Parents: c2778b8 Author: Mike Walch <mwa...@apache.org> Authored: Mon Mar 20 16:36:45 2017 -0400 Committer: Mike Walch <mwa...@apache.org> Committed: Fri Mar 24 13:01:07 2017 -0400 ---------------------------------------------------------------------- INSTALL.md | 92 ++-- assemble/bin/accumulo | 21 +- assemble/bin/accumulo-util | 433 ++----------------- assemble/conf/accumulo-env.sh | 117 +++++ assemble/conf/accumulo-site.xml | 51 +++ assemble/conf/templates/accumulo-env.sh | 113 ----- assemble/conf/templates/accumulo-site.xml | 175 -------- assemble/pom.xml | 10 - .../main/scripts/generate-example-configs.sh | 23 - .../apache/accumulo/core/cli/ClientOpts.java | 2 +- .../core/client/rfile/RFileScanner.java | 2 +- .../accumulo/core/compaction/SizeType.java | 2 +- .../core/conf/AccumuloConfiguration.java | 56 ++- .../org/apache/accumulo/core/conf/Property.java | 61 +-- .../apache/accumulo/core/conf/PropertyType.java | 16 +- .../apache/accumulo/core/file/rfile/RFile.java | 2 +- .../core/file/rfile/RFileOperations.java | 6 +- .../accumulo/core/file/rfile/SplitLarge.java | 2 +- .../iterators/user/RowEncodingIterator.java | 4 +- .../iterators/user/TransformingIterator.java | 4 +- .../security/crypto/CryptoModuleFactory.java | 2 +- .../accumulo/core/summary/SummaryWriter.java | 2 +- .../org/apache/accumulo/core/util/Merge.java | 2 +- .../mapred/AccumuloFileOutputFormatTest.java | 12 +- .../mapreduce/AccumuloFileOutputFormatTest.java | 12 +- .../core/conf/AccumuloConfigurationTest.java | 52 ++- .../accumulo/core/conf/PropertyTypeTest.java | 8 +- .../main/asciidoc/chapters/administration.txt | 38 +- .../java/org/apache/accumulo/proxy/Proxy.java | 2 +- .../accumulo/server/rpc/TServerUtils.java | 2 +- .../tabletserver/LargestFirstMemoryManager.java | 2 +- .../accumulo/gc/SimpleGarbageCollector.java | 2 +- .../apache/accumulo/tserver/TabletServer.java | 6 +- .../tserver/TabletServerResourceManager.java | 14 +- .../compaction/SizeLimitCompactionStrategy.java | 2 +- .../compaction/TwoTierCompactionStrategy.java | 2 +- .../apache/accumulo/tserver/log/DfsLogger.java | 4 +- .../apache/accumulo/tserver/log/LogSorter.java | 4 +- .../replication/AccumuloReplicaSystem.java | 2 +- .../BatchWriterReplicationReplayer.java | 2 +- .../accumulo/tserver/scan/LookupTask.java | 2 +- .../tserver/tablet/DatafileManager.java | 4 +- .../apache/accumulo/tserver/tablet/Tablet.java | 8 +- .../tserver/LargestFirstMemoryManagerTest.java | 6 +- .../SizeLimitCompactionStrategyTest.java | 2 +- .../TwoTierCompactionStrategyTest.java | 2 +- .../ConfigurableCompactionStrategyTest.java | 6 +- .../BatchWriterReplicationReplayerTest.java | 4 +- .../tserver/tablet/DatafileManagerTest.java | 2 +- .../accumulo/shell/commands/MergeCommand.java | 2 +- .../start/classloader/AccumuloClassLoader.java | 38 +- .../classloader/vfs/AccumuloClasspathTest.java | 6 +- .../apache/accumulo/test/LargeSplitRowIT.java | 6 +- test/src/main/resources/conf/accumulo-site.xml | 107 ----- 54 files changed, 465 insertions(+), 1094 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/accumulo/blob/f159ec1f/INSTALL.md ---------------------------------------------------------------------- diff --git a/INSTALL.md b/INSTALL.md index c7f06db..eac6408 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -36,62 +36,49 @@ to manage Accumulo: These scripts will be used in the remaining instructions to configure and run Accumulo. For convenience, consider adding `accumulo-X.Y.Z/bin/` to your shell's path. -## Configuring +## Configuring Accumulo -Accumulo has some optional native code that improves its performance and -stability. Before configuring Accumulo, attempt to build this native code -with the following command. +Accumulo requires running [Zookeeper][3] and [HDFS][4] instances which should be set up +before configuring Accumulo. - accumulo-util build-native +The primary configuration files for Accumulo are `accumulo-env.sh` and `accumulo-site.xml` +which are located in the `conf/` directory. -If the command fails, its OK to continue with setup and resolve the issue later. +Follow the steps below to configure `accumulo-site.xml`: -Accumulo is configured by the files `accumulo-site.xml` and `accumulo-env.sh` in the `conf/` -directory. You can either edit these files for your environment or run the command below which will -overwrite them with files configured for your environment. +1. Run `accumulo-util build-native` to build native code. If this command fails, disable + native maps by setting `tserver.memory.maps.native.enabled` to `false`. - accumulo-util create-config +2. Set `instance.volumes` to HDFS location where Accumulo will store data. If your namenode + is running at 192.168.1.9:8020 and you want to store data in `/accumulo` in HDFS, then set + `instance.volumes` to `hdfs://192.168.1.9:8020/accumulo`. -The script will ask you questions about your set up. Below are some suggestions: +3. Set `instance.zookeeper.host` to the location of your Zookeepers -* When the script asks about memory-map type, choose Native if the build native script - was successful. Otherwise, choose Java. -* The script will prompt for memory usage. Please note that the footprints are - only for the Accumulo system processes, so ample space should be left for other - processes like Hadoop, Zookeeper, and the Accumulo client code. If Accumulo - worker processes are swapped out and unresponsive, they may be killed. +4. (Optional) Change `instance.secret` (which is used by Accumulo processes to communicate) + from the default. This value should match on all servers. -While `accumulo-util create-config` creates `accumulo-env.sh` and `accumulo-site.xml` files -targeted for your environment, these files still require a few more edits before starting Accumulo. +Follow the steps below to configure `accumulo-env.sh`: -### Secret +1. Set `HADOOP_PREFIX` and `ZOOKEEPER_HOME` to the location of your Hadoop and Zookeeper + installations. Accumulo will use these locations to find Hadoop and Zookeeper jars and add + them to your `CLASSPATH` variable. If you you are running a vendor-specific release of + Hadoop or Zookeeper, you may need to modify how the `CLASSPATH` variable is built in + `accumulo-env.sh`. If Accumulo has problems loading classes when you start it, run + `accumulo classpath -d` to debug and print Accumulo's classpath. -Accumulo coordination and worker processes can only communicate with each other -if they share the same secret key. To change the secret key set -`instance.secret` in `accumulo-site.xml`. Changing this secret key from -the default is highly recommended. +2. Accumulo tablet servers are configured by default to use 1GB of memory (768MB is allocated to + JVM and 256MB is allocated for native maps). Native maps are allocated memory equal to 33% of + the tserver JVM heap. The table below can be used if you would like to change tsever memory + usage in the `JAVA_OPTS` section of `accumulo-env.sh`: -### Dependencies + | Native? | 512MB | 1GB | 2GB | 3GB | + |---------|-------------------|-------------------|---------------------|---------------| + | Yes | -Xmx384m -Xms384m | -Xmx768m -Xms768m | -Xmx1536m -Xms1536m | -Xmx2g -Xms2g | + | No | -Xmx512m -Xms512m | -Xmx1g -Xms1g | -Xmx2g -Xms2g | -Xmx3g -Xms3g | -Accumulo requires running [Zookeeper][3] and [HDFS][4] instances. Also, the -Accumulo binary distribution does not include jars for Zookeeper and Hadoop. -When configuring Accumulo the following information about these dependencies -must be provided. - - * **Location of Zookeepers** : Provide this by setting `instance.zookeeper.host` - in `accumulo-site.xml`. - * **Where to store data** : Provide this by setting `instance.volumes` in - `accumulo-site.xml`. If your namenode is running at 192.168.1.9:9000 - and you want to store data in `/accumulo` in HDFS, then set - `instance.volumes` to `hdfs://192.168.1.9:9000/accumulo`. - * **Location of Zookeeper and Hadoop jars** : Setting `ZOOKEEPER_HOME` and - `HADOOP_PREFIX` in `accumulo-env.sh` will help Accumulo find these jars - when using the default setting for `general.classpaths` in accumulo-site.xml. - -If Accumulo has problems later on finding jars, then run `bin/accumulo -classpath` to print out info about where Accumulo is finding jars. If the -settings mentioned above are correct, then inspect `general.classpaths` in -`accumulo-site.xml`. +3. (Optional) Review the memory settings for the Accumulo master, garbage collector, and monitor + in the `JAVA_OPTS` section of `accumulo-env.sh`. ## Initialization @@ -112,9 +99,12 @@ The initialization command will prompt for the following information. There are several methods for running Accumulo: -1. Run individual Accumulo services using `accumulo-service`. Useful if you are - using a cluster management tool (i.e Ansible, Salt, etc) or init.d scripts to - start Accumulo. +1. Run Accumulo processes using `accumulo` command which runs processes in foreground and + will not redirect stderr/stdout. Useful for creating init.d scripts that run Accumulo. + +2. Run Accumulo processes as services using `accumulo-service` which uses `accumulo` + command but backgrounds processes, redirects stderr/stdout and manages pid files. + Useful if you are using a cluster management tool (i.e Ansible, Salt, etc). 2. Run an Accumulo cluster on one or more nodes using `accumulo-cluster` (which uses `accumulo-service` to run services). Useful for local development and @@ -122,6 +112,14 @@ There are several methods for running Accumulo: Each method above has instructions below. +### Run Accumulo processes + +Start Accumulo processes (tserver, master, moniitor, etc) using command below: + + accumulo tserver + +The process will run in the foreground. Use ctrl-c to quit. + ### Run Accumulo services Start Accumulo services (tserver, master, monitor, etc) using command below: http://git-wip-us.apache.org/repos/asf/accumulo/blob/f159ec1f/assemble/bin/accumulo ---------------------------------------------------------------------- diff --git a/assemble/bin/accumulo b/assemble/bin/accumulo index 5dd96e2..6c8b22c 100755 --- a/assemble/bin/accumulo +++ b/assemble/bin/accumulo @@ -15,19 +15,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -function verify_env_dir() { - property=$1 - directory=$2 - if [[ -z "$directory" ]]; then - echo "$property is not set. Please make sure it's set globally or in $conf/accumulo-env.sh" - exit 1 - fi - if [[ ! -d "$directory" ]]; then - echo "$property=$directory is not a valid directory. Please make sure it's set correctly globally or in $conf/accumulo-env.sh" - exit 1 - fi -} - function main() { SOURCE="${BASH_SOURCE[0]}" @@ -49,7 +36,6 @@ function main() { fi if [[ ! -f $conf/accumulo-env.sh || ! -f $conf/accumulo-site.xml ]]; then echo "The configuration files 'accumulo-env.sh' & 'accumulo-site.xml' must exist in $conf" - echo "Run 'accumulo-util create-config' to create them and edit them for your environment." exit 1 fi source "$conf/accumulo-env.sh" @@ -60,11 +46,9 @@ function main() { # Verify setting in accumulo-env.sh : "${JAVA_OPTS:?"variable is not set in accumulo-env.sh"}" + : "${CLASSPATH:?"variable is not set in accumulo-env.sh"}" : "${ACCUMULO_LOG_DIR:?"variable is not set in accumulo-env.sh"}" mkdir -p "${ACCUMULO_LOG_DIR}" 2>/dev/null - verify_env_dir "HADOOP_PREFIX" "${HADOOP_PREFIX}" - verify_env_dir "HADOOP_CONF_DIR" "${HADOOP_CONF_DIR}" - verify_env_dir "ZOOKEEPER_HOME" "${ZOOKEEPER_HOME}" : "${MALLOC_ARENA_MAX:?"variable is not set in accumulo-env.sh"}" if [ -x "$JAVA_HOME/bin/java" ]; then @@ -80,9 +64,6 @@ function main() { JAVA=($ACCUMULO_JAVA_PREFIX $JAVA) fi - CLASSPATH="${conf}:${lib}/*:${CLASSPATH}" - export CLASSPATH - exec "${JAVA[@]}" "${JAVA_OPTS[@]}" org.apache.accumulo.start.Main "$@" } http://git-wip-us.apache.org/repos/asf/accumulo/blob/f159ec1f/assemble/bin/accumulo-util ---------------------------------------------------------------------- diff --git a/assemble/bin/accumulo-util b/assemble/bin/accumulo-util index 995b312..9dc7a9d 100755 --- a/assemble/bin/accumulo-util +++ b/assemble/bin/accumulo-util @@ -20,7 +20,6 @@ function print_usage { Usage: accumulo-util <command> (<argument> ...) Commands: - create-config Creates Accumulo configuration build-native Builds Accumulo native libraries hadoop-jar Runs 'hadoop jar' command with Accumulo jars gen-monitor-cert Generates Accumulo monitor certficate @@ -30,386 +29,6 @@ EOF exit 1 } -function create_config_usage() { - cat <<EOF -Usage: accumulo-util create-config [-options] -where options include (long options not available on all platforms): - -d, --dir Alternate directory to setup config files - -s, --size Supported sizes: '1GB' '2GB' '3GB' '512MB' - -n, --native Configure to use native libraries - -j, --jvm Configure to use the jvm - -o, --overwrite Overwrite the default config directory - -v, --version Specify the Apache Hadoop version supported versions: '1' '2' - -k, --kerberos Configure for use with Kerberos - -h, --help Print this help message -EOF -} - -function create_config() { - TEMPLATE_CONF_DIR="${conf}/templates" - CONF_DIR="${conf}" - ACCUMULO_SITE=accumulo-site.xml - ACCUMULO_ENV=accumulo-env.sh - - SIZE= - TYPE= - HADOOP_VERSION= - OVERWRITE="0" - BASE_DIR= - KERBEROS= - - #Execute getopt - if [[ $(uname -s) == "Linux" ]]; then - args=$(getopt -o "b:d:s:njokv:h" -l "basedir:,dir:,size:,native,jvm,overwrite,kerberos,version:,help" -q -- "$@") - else # Darwin, BSD - args=$(getopt b:d:s:njokv:h "$@") - fi - - #Bad arguments - if [[ $? != 0 ]]; then - create_config_usage 1>&2 - exit 1 - fi - eval set -- "${args[@]}" - - for i - do - case "$i" in - -b|--basedir) #Hidden option used to set general.maven.project.basedir for developers - BASE_DIR=$2; shift - shift;; - -d|--dir) - CONF_DIR=$2; shift - shift;; - -s|--size) - SIZE=$2; shift - shift;; - -n|--native) - TYPE=native - shift;; - -j|--jvm) - TYPE=jvm - shift;; - -o|--overwrite) - OVERWRITE=1 - shift;; - -v|--version) - HADOOP_VERSION=$2; shift - shift;; - -k|--kerberos) - KERBEROS="true" - shift;; - -h|--help) - create_config_usage - exit 0 - shift;; - --) - shift - break;; - esac - done - - while [[ "${OVERWRITE}" = "0" ]]; do - if [[ -e "${CONF_DIR}/${ACCUMULO_ENV}" || -e "${CONF_DIR}/${ACCUMULO_SITE}" ]]; then - echo "Warning your current config files in ${CONF_DIR} will be overwritten!" - echo - echo "How would you like to proceed?:" - select CHOICE in 'Continue with overwrite' 'Specify new conf dir'; do - if [[ "${CHOICE}" = 'Specify new conf dir' ]]; then - echo -n "Please specifiy new conf directory: " - read CONF_DIR - elif [[ "${CHOICE}" = 'Continue with overwrite' ]]; then - OVERWRITE=1 - fi - break - done - else - OVERWRITE=1 - fi - done - echo "Copying configuration files to: ${CONF_DIR}" - - #Native 1GB - native_1GB_tServer="'-Xmx128m' '-Xms128m'" - _1GB_master="'-Xmx128m' '-Xms128m'" - _1GB_monitor="'-Xmx64m' '-Xms64m'" - _1GB_gc="'-Xmx64m' '-Xms64m'" - _1GB_other="'-Xmx128m' '-Xms64m'" - _1GB_shell="${_1GB_other}" - - _1GB_memoryMapMax="256M" - native_1GB_nativeEnabled="true" - _1GB_cacheDataSize="15M" - _1GB_cacheIndexSize="40M" - _1GB_sortBufferSize="50M" - _1GB_waLogMaxSize="256M" - - #Native 2GB - native_2GB_tServer="'-Xmx256m' '-Xms256m'" - _2GB_master="'-Xmx256m' '-Xms256m'" - _2GB_monitor="'-Xmx128m' '-Xms64m'" - _2GB_gc="'-Xmx128m' '-Xms128m'" - _2GB_other="'-Xmx256m' '-Xms64m'" - _2GB_shell="${_2GB_other}" - - _2GB_memoryMapMax="512M" - native_2GB_nativeEnabled="true" - _2GB_cacheDataSize="30M" - _2GB_cacheIndexSize="80M" - _2GB_sortBufferSize="50M" - _2GB_waLogMaxSize="512M" - - #Native 3GB - native_3GB_tServer="'-Xmx1g' '-Xms1g' '-XX:NewSize=500m' '-XX:MaxNewSize=500m'" - _3GB_master="'-Xmx1g' '-Xms1g'" - _3GB_monitor="'-Xmx1g' '-Xms256m'" - _3GB_gc="'-Xmx256m' '-Xms256m'" - _3GB_other="'-Xmx1g' '-Xms256m'" - _3GB_shell="${_3GB_other}" - - _3GB_memoryMapMax="1G" - native_3GB_nativeEnabled="true" - _3GB_cacheDataSize="128M" - _3GB_cacheIndexSize="128M" - _3GB_sortBufferSize="200M" - _3GB_waLogMaxSize="1G" - - #Native 512MB - native_512MB_tServer="'-Xmx48m' '-Xms48m'" - _512MB_master="'-Xmx128m' '-Xms128m'" - _512MB_monitor="'-Xmx64m' '-Xms64m'" - _512MB_gc="'-Xmx64m' '-Xms64m'" - _512MB_other="'-Xmx128m' '-Xms64m'" - _512MB_shell="${_512MB_other}" - - _512MB_memoryMapMax="80M" - native_512MB_nativeEnabled="true" - _512MB_cacheDataSize="7M" - _512MB_cacheIndexSize="20M" - _512MB_sortBufferSize="50M" - _512MB_waLogMaxSize="100M" - - #JVM 1GB - jvm_1GB_tServer="'-Xmx384m' '-Xms384m'" - - jvm_1GB_nativeEnabled="false" - - #JVM 2GB - jvm_2GB_tServer="'-Xmx768m' '-Xms768m'" - - jvm_2GB_nativeEnabled="false" - - #JVM 3GB - jvm_3GB_tServer="'-Xmx2g' '-Xms2g' '-XX:NewSize=1G' '-XX:MaxNewSize=1G'" - - jvm_3GB_nativeEnabled="false" - - #JVM 512MB - jvm_512MB_tServer="'-Xmx128m' '-Xms128m'" - - jvm_512MB_nativeEnabled="false" - - - if [[ -z "${SIZE}" ]]; then - echo "Choose the heap configuration:" - select DIRNAME in 1GB 2GB 3GB 512MB; do - echo "Using '${DIRNAME}' configuration" - SIZE=${DIRNAME} - break - done - elif [[ "${SIZE}" != "1GB" && "${SIZE}" != "2GB" && "${SIZE}" != "3GB" && "${SIZE}" != "512MB" ]]; then - echo "Invalid memory size" - echo "Supported sizes: '1GB' '2GB' '3GB' '512MB'" - exit 1 - fi - - if [[ -z "${TYPE}" ]]; then - echo - echo "Choose the Accumulo memory-map type:" - select TYPENAME in Java Native; do - if [[ "${TYPENAME}" == "Native" ]]; then - TYPE="native" - echo "Don't forget to build the native libraries using the command 'accumulo-util build-native'" - elif [[ "${TYPENAME}" == "Java" ]]; then - TYPE="jvm" - fi - echo "Using '${TYPE}' configuration" - echo - break - done - fi - - if [[ -z "${HADOOP_VERSION}" ]]; then - echo - echo "Choose the Apache Hadoop version:" - select HADOOP in 'Hadoop 2' 'HDP 2.0/2.1' 'HDP 2.2' 'IOP 4.1'; do - if [ "${HADOOP}" == "Hadoop 2" ]; then - HADOOP_VERSION="2" - elif [ "${HADOOP}" == "HDP 2.0/2.1" ]; then - HADOOP_VERSION="HDP2" - elif [ "${HADOOP}" == "HDP 2.2" ]; then - HADOOP_VERSION="HDP2.2" - elif [ "${HADOOP}" == "IOP 4.1" ]; then - HADOOP_VERSION="IOP4.1" - fi - echo "Using Hadoop version '${HADOOP_VERSION}' configuration" - echo - break - done - elif [[ "${HADOOP_VERSION}" != "2" && "${HADOOP_VERSION}" != "HDP2" && "${HADOOP_VERSION}" != "HDP2.2" ]]; then - echo "Invalid Hadoop version" - echo "Supported Hadoop versions: '2', 'HDP2', 'HDP2.2'" - exit 1 - fi - - TRACE_USER="root" - - if [[ ! -z "${KERBEROS}" ]]; then - echo - read -p "Enter server's Kerberos principal: " PRINCIPAL - read -p "Enter server's Kerberos keytab: " KEYTAB - TRACE_USER="${PRINCIPAL}" - fi - - for var in SIZE TYPE HADOOP_VERSION; do - if [[ -z ${!var} ]]; then - echo "Invalid $var configuration" - exit 1 - fi - done - - TSERVER="${TYPE}_${SIZE}_tServer" - MASTER="_${SIZE}_master" - MONITOR="_${SIZE}_monitor" - GC="_${SIZE}_gc" - SHELL="_${SIZE}_shell" - OTHER="_${SIZE}_other" - - MEMORY_MAP_MAX="_${SIZE}_memoryMapMax" - NATIVE="${TYPE}_${SIZE}_nativeEnabled" - CACHE_DATA_SIZE="_${SIZE}_cacheDataSize" - CACHE_INDEX_SIZE="_${SIZE}_cacheIndexSize" - SORT_BUFFER_SIZE="_${SIZE}_sortBufferSize" - WAL_MAX_SIZE="_${SIZE}_waLogMaxSize" - - MAVEN_PROJ_BASEDIR="" - - if [[ ! -z "${BASE_DIR}" ]]; then - MAVEN_PROJ_BASEDIR="\n <property>\n <name>general.maven.project.basedir</name>\n <value>${BASE_DIR}</value>\n </property>\n" - fi - - mkdir -p "${CONF_DIR}" && cp "${TEMPLATE_CONF_DIR}"/{$ACCUMULO_SITE,$ACCUMULO_ENV} "${CONF_DIR}"/ - - #Configure accumulo-env.sh - sed -e "s/\${tServerHigh_tServerLow}/${!TSERVER}/" \ - -e "s/\${masterHigh_masterLow}/${!MASTER}/" \ - -e "s/\${monitorHigh_monitorLow}/${!MONITOR}/" \ - -e "s/\${gcHigh_gcLow}/${!GC}/" \ - -e "s/\${shellHigh_shellLow}/${!SHELL}/" \ - -e "s/\${otherHigh_otherLow}/${!OTHER}/" \ - "${TEMPLATE_CONF_DIR}/$ACCUMULO_ENV" > "${CONF_DIR}/$ACCUMULO_ENV" - - #Configure accumulo-site.xml - sed -e "s/\${memMapMax}/${!MEMORY_MAP_MAX}/" \ - -e "s/\${nativeEnabled}/${!NATIVE}/" \ - -e "s/\${cacheDataSize}/${!CACHE_DATA_SIZE}/" \ - -e "s/\${cacheIndexSize}/${!CACHE_INDEX_SIZE}/" \ - -e "s/\${sortBufferSize}/${!SORT_BUFFER_SIZE}/" \ - -e "s/\${waLogMaxSize}/${!WAL_MAX_SIZE}/" \ - -e "s=\${traceUser}=${TRACE_USER}=" \ - -e "s=\${mvnProjBaseDir}=${MAVEN_PROJ_BASEDIR}=" "${TEMPLATE_CONF_DIR}/$ACCUMULO_SITE" > "${CONF_DIR}/$ACCUMULO_SITE" - - # If we're not using kerberos, filter out the krb properties - if [[ -z "${KERBEROS}" ]]; then - sed -e 's/<!-- Kerberos requirements -->/<!-- Kerberos requirements --><!--/' \ - -e 's/<!-- End Kerberos requirements -->/--><!-- End Kerberos requirements -->/' \ - "${CONF_DIR}/$ACCUMULO_SITE" > temp - mv temp "${CONF_DIR}/$ACCUMULO_SITE" - else - # Make the substitutions - sed -e "s!\${keytab}!${KEYTAB}!" \ - -e "s!\${principal}!${PRINCIPAL}!" \ - "${CONF_DIR}/${ACCUMULO_SITE}" > temp - mv temp "${CONF_DIR}/${ACCUMULO_SITE}" - fi - - # Configure hadoop version - if [[ "${HADOOP_VERSION}" == "2" ]]; then - sed -e 's/<!-- HDP 2.0 requirements -->/<!-- HDP 2.0 requirements --><!--/' \ - -e 's/<!-- End HDP 2.0 requirements -->/--><!-- End HDP 2.0 requirements -->/' \ - "${CONF_DIR}/$ACCUMULO_SITE" > temp - mv temp "${CONF_DIR}/$ACCUMULO_SITE" - sed -e 's/<!-- HDP 2.2 requirements -->/<!-- HDP 2.2 requirements --><!--/' \ - -e 's/<!-- End HDP 2.2 requirements -->/--><!-- End HDP 2.2 requirements -->/' \ - "${CONF_DIR}/$ACCUMULO_SITE" > temp - mv temp "${CONF_DIR}/$ACCUMULO_SITE" - sed -e 's/<!-- IOP 4.1 requirements -->/<!-- IOP 4.1 requirements --><!--/' \ - -e 's/<!-- End IOP 4.1 requirements -->/--><!-- End IOP 4.1 requirements -->/' \ - "${CONF_DIR}/$ACCUMULO_SITE" > temp - mv temp "${CONF_DIR}/$ACCUMULO_SITE" - elif [[ "${HADOOP_VERSION}" == "HDP2" ]]; then - sed -e 's/<!-- Hadoop 2 requirements -->/<!-- Hadoop 2 requirements --><!--/' \ - -e 's/<!-- End Hadoop 2 requirements -->/--><!-- End Hadoop 2 requirements -->/' \ - "${CONF_DIR}/$ACCUMULO_SITE" > temp - mv temp "${CONF_DIR}/$ACCUMULO_SITE" - sed -e 's/<!-- HDP 2.2 requirements -->/<!-- HDP 2.2 requirements --><!--/' \ - -e 's/<!-- End HDP 2.2 requirements -->/--><!-- End HDP 2.2 requirements -->/' \ - "${CONF_DIR}/$ACCUMULO_SITE" > temp - mv temp "${CONF_DIR}/$ACCUMULO_SITE" - sed -e 's/<!-- IOP 4.1 requirements -->/<!-- IOP 4.1 requirements --><!--/' \ - -e 's/<!-- End IOP 4.1 requirements -->/--><!-- End IOP 4.1 requirements -->/' \ - "${CONF_DIR}/$ACCUMULO_SITE" > temp - mv temp "${CONF_DIR}/$ACCUMULO_SITE" - elif [[ "${HADOOP_VERSION}" == "HDP2.2" ]]; then - sed -e 's/<!-- Hadoop 2 requirements -->/<!-- Hadoop 2 requirements --><!--/' \ - -e 's/<!-- End Hadoop 2 requirements -->/--><!-- End Hadoop 2 requirements -->/' \ - "${CONF_DIR}/$ACCUMULO_SITE" > temp - mv temp "${CONF_DIR}/$ACCUMULO_SITE" - sed -e 's/<!-- HDP 2.0 requirements -->/<!-- HDP 2.0 requirements --><!--/' \ - -e 's/<!-- End HDP 2.0 requirements -->/--><!-- End HDP 2.0 requirements -->/' \ - "${CONF_DIR}/$ACCUMULO_SITE" > temp - mv temp "${CONF_DIR}/$ACCUMULO_SITE" - sed -e 's/<!-- IOP 4.1 requirements -->/<!-- IOP 4.1 requirements --><!--/' \ - -e 's/<!-- End IOP 4.1 requirements -->/--><!-- End IOP 4.1 requirements -->/' \ - "${CONF_DIR}/$ACCUMULO_SITE" > temp - mv temp "${CONF_DIR}/$ACCUMULO_SITE" - elif [[ "${HADOOP_VERSION}" == "IOP4.1" ]]; then - sed -e 's/<!-- Hadoop 2 requirements -->/<!-- Hadoop 2 requirements --><!--/' \ - -e 's/<!-- End Hadoop 2 requirements -->/--><!-- End Hadoop 2 requirements -->/' \ - "${CONF_DIR}/$ACCUMULO_SITE" > temp - mv temp "${CONF_DIR}/$ACCUMULO_SITE" - sed -e 's/<!-- HDP 2.0 requirements -->/<!-- HDP 2.0 requirements --><!--/' \ - -e 's/<!-- End HDP 2.0 requirements -->/--><!-- End HDP 2.0 requirements -->/' \ - "${CONF_DIR}/$ACCUMULO_SITE" > temp - mv temp "${CONF_DIR}/$ACCUMULO_SITE" - sed -e 's/<!-- HDP 2.2 requirements -->/<!-- HDP 2.2 requirements --><!--/' \ - -e 's/<!-- End HDP 2.2 requirements -->/--><!-- End HDP 2.2 requirements -->/' \ - "${CONF_DIR}/$ACCUMULO_SITE" > temp - mv temp "${CONF_DIR}/$ACCUMULO_SITE" - fi - - #Additional setup steps for native configuration. - if [[ ${TYPE} == native ]]; then - if [[ $(uname) == Linux ]]; then - if [[ -z $HADOOP_PREFIX ]]; then - echo "WARNING: HADOOP_PREFIX not set, cannot automatically configure LD_LIBRARY_PATH to include Hadoop native libraries" - else - NATIVE_LIB=$(readlink -ef "$(dirname "$(for x in $(find "$HADOOP_PREFIX" -name libhadoop.so); do ld "$x" 2>/dev/null && echo "$x" && break; done)" 2>>/dev/null)" 2>>/dev/null) - if [[ -z $NATIVE_LIB ]]; then - echo -e "WARNING: The Hadoop native libraries could not be found for your sytem in: $HADOOP_PREFIX" - else - sed "/# Should the monitor/ i export LD_LIBRARY_PATH=${NATIVE_LIB}:\${LD_LIBRARY_PATH}" "${CONF_DIR}/$ACCUMULO_ENV" > temp - mv temp "${CONF_DIR}/$ACCUMULO_ENV" - echo -e "Added ${NATIVE_LIB} to the LD_LIBRARY_PATH" - fi - fi - fi - echo -e "Please remember to compile the Accumulo native libraries using the command 'accumulo-util build-native' and to set the LD_LIBRARY_PATH variable in the ${CONF_DIR}/accumulo-env.sh if needed." - fi - - echo "Setup complete" -} - function build_native() { final_native_target="$basedir/lib/native" if [ -f "$final_native_target/libaccumulo.so" -o -f "$final_native_target/libaccumulo.dylib" ]; then @@ -516,9 +135,15 @@ function gen_monitor_cert() { } function load_jars_hdfs() { - if [ -z "$HADOOP_PREFIX" ]; then - echo "HADOOP_PREFIX is not set!" - exit 1 + + if [ -x "$HADOOP_PREFIX/bin/hadoop" ]; then + HADOOP="$HADOOP_PREFIX/bin/hadoop" + else + HADOOP=$(which hadoop) + fi + if [ ! -x "$HADOOP" ]; then + echo "Could not find 'hadoop' command. Please set hadoop on your PATH or set HADOOP_PREFIX" + exit 1 fi # Find the system context directory in HDFS @@ -540,9 +165,9 @@ function load_jars_hdfs() { fi # Create the system context directy in HDFS if it does not exist - "$HADOOP_PREFIX/bin/hadoop" fs -ls "$SYSTEM_CONTEXT_HDFS_DIR" > /dev/null + "$HADOOP" fs -ls "$SYSTEM_CONTEXT_HDFS_DIR" > /dev/null if [[ $? != 0 ]]; then - "$HADOOP_PREFIX/bin/hadoop" fs -mkdir "$SYSTEM_CONTEXT_HDFS_DIR" > /dev/null + "$HADOOP" fs -mkdir "$SYSTEM_CONTEXT_HDFS_DIR" > /dev/null if [[ $? != 0 ]]; then echo "Unable to create classpath directory at $SYSTEM_CONTEXT_HDFS_DIR" exit 1 @@ -558,22 +183,27 @@ function load_jars_hdfs() { (( REP < 3 )) && REP=3 # Copy all jars in lib to the system context directory - "$HADOOP_PREFIX/bin/hadoop" fs -moveFromLocal "$lib"/*.jar "$SYSTEM_CONTEXT_HDFS_DIR" > /dev/null - "$HADOOP_PREFIX/bin/hadoop" fs -setrep -R $REP "$SYSTEM_CONTEXT_HDFS_DIR" > /dev/null + "$HADOOP" fs -moveFromLocal "$lib"/*.jar "$SYSTEM_CONTEXT_HDFS_DIR" > /dev/null + "$HADOOP" fs -setrep -R $REP "$SYSTEM_CONTEXT_HDFS_DIR" > /dev/null # We need some of the jars in lib, copy them back out and remove them from the system context dir - "$HADOOP_PREFIX/bin/hadoop" fs -copyToLocal "$SYSTEM_CONTEXT_HDFS_DIR/commons-vfs2.jar" "$lib/." > /dev/null - "$HADOOP_PREFIX/bin/hadoop" fs -rm "$SYSTEM_CONTEXT_HDFS_DIR/commons-vfs2.jar" > /dev/null - "$HADOOP_PREFIX/bin/hadoop" fs -copyToLocal "$SYSTEM_CONTEXT_HDFS_DIR/accumulo-start.jar" "$lib/." > /dev/null - "$HADOOP_PREFIX/bin/hadoop" fs -rm "$SYSTEM_CONTEXT_HDFS_DIR/accumulo-start.jar" > /dev/null - "$HADOOP_PREFIX/bin/hadoop" fs -copyToLocal "$SYSTEM_CONTEXT_HDFS_DIR/slf4j*.jar" "$lib/." > /dev/null - "$HADOOP_PREFIX/bin/hadoop" fs -rm "$SYSTEM_CONTEXT_HDFS_DIR/slf4j*.jar" > /dev/null + "$HADOOP" fs -copyToLocal "$SYSTEM_CONTEXT_HDFS_DIR/commons-vfs2.jar" "$lib/." > /dev/null + "$HADOOP" fs -rm "$SYSTEM_CONTEXT_HDFS_DIR/commons-vfs2.jar" > /dev/null + "$HADOOP" fs -copyToLocal "$SYSTEM_CONTEXT_HDFS_DIR/accumulo-start.jar" "$lib/." > /dev/null + "$HADOOP" fs -rm "$SYSTEM_CONTEXT_HDFS_DIR/accumulo-start.jar" > /dev/null + "$HADOOP" fs -copyToLocal "$SYSTEM_CONTEXT_HDFS_DIR/slf4j*.jar" "$lib/." > /dev/null + "$HADOOP" fs -rm "$SYSTEM_CONTEXT_HDFS_DIR/slf4j*.jar" > /dev/null } function hadoop_jar() { - if [ -z "$HADOOP_PREFIX" ]; then - echo "HADOOP_PREFIX must be set!" - exit 1 + if [ -x "$HADOOP_PREFIX/bin/hadoop" ]; then + HADOOP="$HADOOP_PREFIX/bin/hadoop" + else + HADOOP=$(which hadoop) + fi + if [ ! -x "$HADOOP" ]; then + echo "Could not find 'hadoop' command. Please set hadoop on your PATH or set HADOOP_PREFIX" + exit 1 fi if [ -z "$ZOOKEEPER_HOME" ]; then echo "ZOOKEEPER_HOME must be set!" @@ -628,11 +258,7 @@ function hadoop_jar() { exit 1 fi - #echo USERJARS=$USERJARS - #echo CLASSNAME=$CLASSNAME - #echo HADOOP_CLASSPATH=$HADOOP_CLASSPATH - #echo exec "$HADOOP_PREFIX/bin/hadoop" jar "$TOOLJAR" "$CLASSNAME" -libjars \"$LIB_JARS\" $ARGS - exec "$HADOOP_PREFIX/bin/hadoop" jar "$TOOLJAR" "$CLASSNAME" -libjars "$LIB_JARS" "$@" + exec "$HADOOP" jar "$TOOLJAR" "$CLASSNAME" -libjars "$LIB_JARS" "$@" } function main() { @@ -648,9 +274,6 @@ function main() { lib="${basedir}/lib" case "$1" in - create-config) - create_config "${@:2}" - ;; build-native) build_native "${@:2}" ;; http://git-wip-us.apache.org/repos/asf/accumulo/blob/f159ec1f/assemble/conf/accumulo-env.sh ---------------------------------------------------------------------- diff --git a/assemble/conf/accumulo-env.sh b/assemble/conf/accumulo-env.sh new file mode 100644 index 0000000..f9e8945 --- /dev/null +++ b/assemble/conf/accumulo-env.sh @@ -0,0 +1,117 @@ +#! /usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +## Before accumulo-env.sh is loaded, these environment variables are set and can be used in this file: + +# cmd - Command that is being called such as tserver, master, etc. +# basedir - Root of Accumulo installation +# bin - Directory containing Accumulo scripts +# conf - Directory containing Accumulo configuration +# lib - Directory containing Accumulo libraries + +############################ +# Variables that must be set +############################ + +## Accumulo logs directory. Referenced by logger config. +export ACCUMULO_LOG_DIR="${ACCUMULO_LOG_DIR:-${basedir}/logs}" +## Hadoop installation +export HADOOP_PREFIX="${HADOOP_PREFIX:-/path/to/hadoop}" +## Hadoop configuration +export HADOOP_CONF_DIR="${HADOOP_CONF_DIR:-${HADOOP_PREFIX}/etc/hadoop}" +## Zookeeper installation +export ZOOKEEPER_HOME="${ZOOKEEPER_HOME:-/path/to/zookeeper}" + +########################## +# Build CLASSPATH variable +########################## + +## Adds external Hadoop & Zookeeper dependencies to CLASSPATH. See "Vendor configuration" section of Accumulo user manual +## for different settings if you installed vendor's distribution of Hadoop or Zookeeper. +CLASSPATH="$(find "$ZOOKEEPER_HOME"/ "$HADOOP_PREFIX"/share/hadoop/{common,common/lib,hdfs,mapreduce,yarn} -maxdepth 1 -name '*.jar' \ + -and -not -name '*slf4j*' \ + -and -not -name '*fatjar*' \ + -and -not -name '*-javadoc*' \ + -and -not -name '*-sources*.jar' \ + -and -not -name '*-test*.jar' \ + -print0 | tr '\0' ':')$CLASSPATH" +CLASSPATH="${conf}:${lib}/*:${HADOOP_CONF_DIR}:${CLASSPATH}" +export CLASSPATH + +################################################################## +# Build JAVA_OPTS variable. Defaults below work but can be edited. +################################################################## + +## JVM options set for all processes. Extra options can be passed in by setting ACCUMULO_JAVA_OPTS to an array of options. +JAVA_OPTS=("${ACCUMULO_JAVA_OPTS[@]}" + '-XX:+UseConcMarkSweepGC' + '-XX:CMSInitiatingOccupancyFraction=75' + '-XX:+CMSClassUnloadingEnabled' + '-XX:OnOutOfMemoryError=kill -9 %p' + '-XX:-OmitStackTraceInFastThrow' + '-Djava.net.preferIPv4Stack=true' + "-Daccumulo.native.lib.path=${lib}/native") + +## Make sure Accumulo native libraries are built since they are enabled by default +"${bin}"/accumulo-util build-native &> /dev/null + +## JVM options set for individual applications +case "$cmd" in + master) JAVA_OPTS=("${JAVA_OPTS[@]}" '-Xmx512m' '-Xms512m') ;; + monitor) JAVA_OPTS=("${JAVA_OPTS[@]}" '-Xmx256m' '-Xms256m') ;; + gc) JAVA_OPTS=("${JAVA_OPTS[@]}" '-Xmx256m' '-Xms256m') ;; + tserver) JAVA_OPTS=("${JAVA_OPTS[@]}" '-Xmx768m' '-Xms768m') ;; + *) JAVA_OPTS=("${JAVA_OPTS[@]}" '-Xmx256m' '-Xms64m') ;; +esac + +## JVM options set for logging. Review logj4 properties files to see how they are used. +JAVA_OPTS=("${JAVA_OPTS[@]}" + "-Daccumulo.log.dir=${ACCUMULO_LOG_DIR}" + "-Daccumulo.application=${cmd}${ACCUMULO_SERVICE_INSTANCE}_$(hostname)") + +case "$cmd" in + monitor) + JAVA_OPTS=("${JAVA_OPTS[@]}" "-Dlog4j.configuration=log4j-monitor.properties") + ;; + gc|master|tserver|tracer) + JAVA_OPTS=("${JAVA_OPTS[@]}" "-Dlog4j.configuration=log4j-service.properties") + ;; + *) + # let log4j use its default behavior (log4j.xml, log4j.properties) + true + ;; +esac + +export JAVA_OPTS + +############################ +# Variables set to a default +############################ + +export MALLOC_ARENA_MAX=${MALLOC_ARENA_MAX:-1} +## Add Hadoop native libraries to shared library paths given operating system +case "$(uname)" in + Darwin) export DYLD_LIBRARY_PATH="${HADOOP_PREFIX}/lib/native:${DYLD_LIBRARY_PATH}" ;; + *) export LD_LIBRARY_PATH="${HADOOP_PREFIX}/lib/native:${LD_LIBRARY_PATH}" ;; +esac + +############################################### +# Variables that are optional. Uncomment to set +############################################### + +## Specifies command that will be placed before calls to Java in accumulo script +# export ACCUMULO_JAVA_PREFIX="" http://git-wip-us.apache.org/repos/asf/accumulo/blob/f159ec1f/assemble/conf/accumulo-site.xml ---------------------------------------------------------------------- diff --git a/assemble/conf/accumulo-site.xml b/assemble/conf/accumulo-site.xml new file mode 100644 index 0000000..a617484 --- /dev/null +++ b/assemble/conf/accumulo-site.xml @@ -0,0 +1,51 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> +<!-- This is the main configuration file for Apache Accumulo. Available configuration properties + (and their default values) can be found in the user manual (docs/accumulo_user_manual.html). --> +<configuration> + <!-- Set location in HDFS where Accumulo will store data --> + <property> + <name>instance.volumes</name> + <value>hdfs://localhost:8020/accumulo</value> + </property> + <!-- Set location of Zookeepers --> + <property> + <name>instance.zookeeper.host</name> + <value>localhost:2181</value> + </property> + <!-- Change secret before initialization. All servers must have same secret --> + <property> + <name>instance.secret</name> + <value>DEFAULT</value> + </property> + <!-- Set to false if 'accumulo-util build-native' fails --> + <property> + <name>tserver.memory.maps.native.enabled</name> + <value>true</value> + </property> + <!-- Set a correct user/password below --> + <property> + <name>trace.user</name> + <value>root</value> + </property> + <property> + <name>trace.password</name> + <value>secret</value> + </property> +</configuration> http://git-wip-us.apache.org/repos/asf/accumulo/blob/f159ec1f/assemble/conf/templates/accumulo-env.sh ---------------------------------------------------------------------- diff --git a/assemble/conf/templates/accumulo-env.sh b/assemble/conf/templates/accumulo-env.sh deleted file mode 100644 index 64b8294..0000000 --- a/assemble/conf/templates/accumulo-env.sh +++ /dev/null @@ -1,113 +0,0 @@ -#! /usr/bin/env bash - -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -## Before accumulo-env.sh is loaded, these environment variables are set and can be used in this file: - -# cmd - Command that is being called such as tserver, master, etc. -# basedir - Root of Accumulo installation -# bin - Directory containing Accumulo scripts -# conf - Directory containing Accumulo configuration -# lib - Directory containing Accumulo libraries - -############################ -# Variables that must be set -############################ - -## Accumulo logs directory. Referenced by logger config. -export ACCUMULO_LOG_DIR="${ACCUMULO_LOG_DIR:-${basedir}/logs}" -## Hadoop installation -export HADOOP_PREFIX="${HADOOP_PREFIX:-/path/to/hadoop}" -## Hadoop configuration -export HADOOP_CONF_DIR="${HADOOP_CONF_DIR:-${HADOOP_PREFIX}/etc/hadoop}" -## Zookeeper installation -export ZOOKEEPER_HOME="${ZOOKEEPER_HOME:-/path/to/zookeeper}" - -################################################################## -# Build JAVA_OPTS variable. Defaults below work but can be edited. -################################################################## - -## JVM options set for all processes. Extra options can be passed in by setting ACCUMULO_JAVA_OPTS to an array of options. -JAVA_OPTS=("${ACCUMULO_JAVA_OPTS[@]}" - '-XX:+UseConcMarkSweepGC' - '-XX:CMSInitiatingOccupancyFraction=75' - '-XX:+CMSClassUnloadingEnabled' - '-XX:OnOutOfMemoryError=kill -9 %p' - '-XX:-OmitStackTraceInFastThrow' - '-Djava.net.preferIPv4Stack=true' - "-Daccumulo.native.lib.path=${lib}/native") - -## Make sure Accumulo native libraries are built since they are enabled by default -"${bin}"/accumulo-util build-native &> /dev/null - -## JVM options set for individual applications -case "$cmd" in - master) JAVA_OPTS=("${JAVA_OPTS[@]}" ${masterHigh_masterLow}) ;; - monitor) JAVA_OPTS=("${JAVA_OPTS[@]}" ${monitorHigh_monitorLow}) ;; - gc) JAVA_OPTS=("${JAVA_OPTS[@]}" ${gcHigh_gcLow}) ;; - tserver) JAVA_OPTS=("${JAVA_OPTS[@]}" ${tServerHigh_tServerLow}) ;; - shell) JAVA_OPTS=("${JAVA_OPTS[@]}" ${shellHigh_shellLow}) ;; - *) JAVA_OPTS=("${JAVA_OPTS[@]}" ${otherHigh_otherLow}) ;; -esac - -## JVM options set for logging. Review logj4 properties files to see how they are used. -JAVA_OPTS=("${JAVA_OPTS[@]}" - "-Daccumulo.log.dir=${ACCUMULO_LOG_DIR}" - "-Daccumulo.application=${cmd}${ACCUMULO_SERVICE_INSTANCE}_$(hostname)") - -case "$cmd" in - monitor) - JAVA_OPTS=("${JAVA_OPTS[@]}" "-Dlog4j.configuration=log4j-monitor.properties") - ;; - gc|master|tserver|tracer) - JAVA_OPTS=("${JAVA_OPTS[@]}" "-Dlog4j.configuration=log4j-service.properties") - ;; - *) - # let log4j use its default behavior (log4j.xml, log4j.properties) - true - ;; -esac - -export JAVA_OPTS - -## External class path items for Java system class loader (dependencies not included with Accumulo) -CLASSPATH="$(find "$ZOOKEEPER_HOME"/{,lib} "$HADOOP_PREFIX"/share/hadoop/{common,common/lib,hdfs,mapreduce,yarn} -maxdepth 1 -name '*.jar' \ - -and -not -name '*slf4j*' \ - -and -not -name '*fatjar*' \ - -and -not -name '*-javadoc*' \ - -and -not -name '*-sources*.jar' \ - -and -not -name '*-test*.jar' \ - -print0 | tr '\0' ':')$CLASSPATH" -CLASSPATH="${HADOOP_CONF_DIR}:${CLASSPATH}" -export CLASSPATH - -############################ -# Variables set to a default -############################ - -export MALLOC_ARENA_MAX=${MALLOC_ARENA_MAX:-1} -## Add Hadoop native libraries to shared library paths given operating system -case "$(uname)" in - Darwin) export DYLD_LIBRARY_PATH="${HADOOP_PREFIX}/lib/native:${DYLD_LIBRARY_PATH}" ;; - *) export LD_LIBRARY_PATH="${HADOOP_PREFIX}/lib/native:${LD_LIBRARY_PATH}" ;; -esac - -############################################### -# Variables that are optional. Uncomment to set -############################################### - -## Specifies command that will be placed before calls to Java in accumulo script -# export ACCUMULO_JAVA_PREFIX="" http://git-wip-us.apache.org/repos/asf/accumulo/blob/f159ec1f/assemble/conf/templates/accumulo-site.xml ---------------------------------------------------------------------- diff --git a/assemble/conf/templates/accumulo-site.xml b/assemble/conf/templates/accumulo-site.xml deleted file mode 100644 index a1f4153..0000000 --- a/assemble/conf/templates/accumulo-site.xml +++ /dev/null @@ -1,175 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<!-- - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. ---> -<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> - -<configuration> - <!-- This is the main configuration file for Apache Accumulo. Available configuration properties - (and their default values) can be found in the user manual (docs/accumulo_user_manual.html). --> - <property> - <name>instance.volumes</name> - <value></value> - <description>comma separated list of URIs for volumes. example: hdfs://localhost:9000/accumulo</description> - </property> - - <property> - <name>instance.zookeeper.host</name> - <value>localhost:2181</value> - <description>comma separated list of zookeeper servers</description> - </property> - - <property> - <name>instance.secret</name> - <value>DEFAULT</value> - <description>A secret unique to a given instance that all servers must know in order to communicate with one another. - Change it before initialization. To - change it later use ./bin/accumulo org.apache.accumulo.server.util.ChangeSecret --old [oldpasswd] --new [newpasswd], - and then update this file. - </description> - </property> - - <property> - <name>tserver.memory.maps.max</name> - <value>${memMapMax}</value> - </property> - - <property> - <name>tserver.memory.maps.native.enabled</name> - <value>${nativeEnabled}</value> - </property> - - <property> - <name>tserver.cache.data.size</name> - <value>${cacheDataSize}</value> - </property> - - <property> - <name>tserver.cache.index.size</name> - <value>${cacheIndexSize}</value> - </property> - - <property> - <name>trace.token.property.password</name> - <!-- change this to the root user's password, and/or change the user below --> - <value>secret</value> - </property> - - <!-- Kerberos requirements --> - <property> - <name>instance.rpc.sasl.enabled</name> - <value>true</value> - </property> - - <property> - <name>general.kerberos.keytab</name> - <value>${keytab}</value> - </property> - - <property> - <name>general.kerberos.principal</name> - <value>${principal}</value> - </property> - - <property> - <name>trace.token.type</name> - <value>org.apache.accumulo.core.client.security.tokens.KerberosToken</value> - </property> - - <property> - <name>instance.security.authenticator</name> - <value>org.apache.accumulo.server.security.handler.KerberosAuthenticator</value> - </property> - - <property> - <name>instance.security.authorizor</name> - <value>org.apache.accumulo.server.security.handler.KerberosAuthorizor</value> - </property> - - <property> - <name>instance.security.permissionHandler</name> - <value>org.apache.accumulo.server.security.handler.KerberosPermissionHandler</value> - </property> - <!-- End Kerberos requirements --> - - <property> - <name>trace.user</name> - <value>${traceUser}</value> - </property> - - <property> - <name>tserver.sort.buffer.size</name> - <value>${sortBufferSize}</value> - </property> - - <property> - <name>tserver.walog.max.size</name> - <value>${waLogMaxSize}</value> - </property> -${mvnProjBaseDir} - <property> - <name>general.classpaths</name> - - <value> - <!-- Accumulo requirements --> - $ACCUMULO_HOME/lib/accumulo-server.jar, - $ACCUMULO_HOME/lib/accumulo-core.jar, - $ACCUMULO_HOME/lib/accumulo-start.jar, - $ACCUMULO_HOME/lib/accumulo-fate.jar, - $ACCUMULO_HOME/lib/accumulo-proxy.jar, - $ACCUMULO_HOME/lib/[^.].*.jar, - <!-- ZooKeeper requirements --> - $ZOOKEEPER_HOME/zookeeper[^.].*.jar, - <!-- Common Hadoop requirements --> - $HADOOP_CONF_DIR, - <!-- Hadoop 2 requirements --> - $HADOOP_PREFIX/share/hadoop/common/[^.].*.jar, - $HADOOP_PREFIX/share/hadoop/common/lib/(?!slf4j)[^.].*.jar, - $HADOOP_PREFIX/share/hadoop/hdfs/[^.].*.jar, - $HADOOP_PREFIX/share/hadoop/mapreduce/[^.].*.jar, - $HADOOP_PREFIX/share/hadoop/yarn/[^.].*.jar, - $HADOOP_PREFIX/share/hadoop/yarn/lib/jersey.*.jar, - <!-- End Hadoop 2 requirements --> - <!-- HDP 2.0 requirements --> - /usr/lib/hadoop/[^.].*.jar, - /usr/lib/hadoop/lib/[^.].*.jar, - /usr/lib/hadoop-hdfs/[^.].*.jar, - /usr/lib/hadoop-mapreduce/[^.].*.jar, - /usr/lib/hadoop-yarn/[^.].*.jar, - /usr/lib/hadoop-yarn/lib/jersey.*.jar, - <!-- End HDP 2.0 requirements --> - <!-- HDP 2.2 requirements --> - /usr/hdp/current/hadoop-client/[^.].*.jar, - /usr/hdp/current/hadoop-client/lib/(?!slf4j)[^.].*.jar, - /usr/hdp/current/hadoop-hdfs-client/[^.].*.jar, - /usr/hdp/current/hadoop-mapreduce-client/[^.].*.jar, - /usr/hdp/current/hadoop-yarn-client/[^.].*.jar, - /usr/hdp/current/hadoop-yarn-client/lib/jersey.*.jar, - /usr/hdp/current/hive-client/lib/hive-accumulo-handler.jar - <!-- End HDP 2.2 requirements --> - <!-- IOP 4.1 requirements --> - /usr/iop/current/hadoop-client/[^.].*.jar, - /usr/iop/current/hadoop-client/lib/(?!slf4j)[^.].*.jar, - /usr/iop/current/hadoop-hdfs-client/[^.].*.jar, - /usr/iop/current/hadoop-mapreduce-client/[^.].*.jar, - /usr/iop/current/hadoop-yarn-client/[^.].*.jar, - /usr/iop/current/hadoop-yarn-client/lib/jersey.*.jar, - /usr/iop/current/hive-client/lib/hive-accumulo-handler.jar - <!-- End IOP 4.1 requirements --> - </value> - <description>Classpaths that accumulo checks for updates and class files.</description> - </property> -</configuration> http://git-wip-us.apache.org/repos/asf/accumulo/blob/f159ec1f/assemble/pom.xml ---------------------------------------------------------------------- diff --git a/assemble/pom.xml b/assemble/pom.xml index ceef079..c2fdb69 100644 --- a/assemble/pom.xml +++ b/assemble/pom.xml @@ -243,16 +243,6 @@ <executable>${basedir}/src/main/scripts/generate-versions-listing.sh</executable> </configuration> </execution> - <execution> - <id>generate-example-configs</id> - <goals> - <goal>exec</goal> - </goals> - <phase>generate-resources</phase> - <configuration> - <executable>${basedir}/src/main/scripts/generate-example-configs.sh</executable> - </configuration> - </execution> </executions> </plugin> <plugin> http://git-wip-us.apache.org/repos/asf/accumulo/blob/f159ec1f/assemble/src/main/scripts/generate-example-configs.sh ---------------------------------------------------------------------- diff --git a/assemble/src/main/scripts/generate-example-configs.sh b/assemble/src/main/scripts/generate-example-configs.sh deleted file mode 100755 index facf927..0000000 --- a/assemble/src/main/scripts/generate-example-configs.sh +++ /dev/null @@ -1,23 +0,0 @@ -#! /usr/bin/env bash - -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# This script will regenerate the example configuration files for the tarball - -out=target/config.out - -echo 'Generating example scripts...' > $out -bin/accumulo-util create-config -o -d target/example-configs -s 2GB -j -v 2 >> $out 2>&1 http://git-wip-us.apache.org/repos/asf/accumulo/blob/f159ec1f/core/src/main/java/org/apache/accumulo/core/cli/ClientOpts.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/accumulo/core/cli/ClientOpts.java b/core/src/main/java/org/apache/accumulo/core/cli/ClientOpts.java index 57f8774..9354f18 100644 --- a/core/src/main/java/org/apache/accumulo/core/cli/ClientOpts.java +++ b/core/src/main/java/org/apache/accumulo/core/cli/ClientOpts.java @@ -72,7 +72,7 @@ public class ClientOpts extends Help { public static class MemoryConverter implements IStringConverter<Long> { @Override public Long convert(String value) { - return AccumuloConfiguration.getMemoryInBytes(value); + return AccumuloConfiguration.getFixedMemoryAsBytes(value); } } http://git-wip-us.apache.org/repos/asf/accumulo/blob/f159ec1f/core/src/main/java/org/apache/accumulo/core/client/rfile/RFileScanner.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/accumulo/core/client/rfile/RFileScanner.java b/core/src/main/java/org/apache/accumulo/core/client/rfile/RFileScanner.java index 186471d..1b12fb6 100644 --- a/core/src/main/java/org/apache/accumulo/core/client/rfile/RFileScanner.java +++ b/core/src/main/java/org/apache/accumulo/core/client/rfile/RFileScanner.java @@ -73,7 +73,7 @@ class RFileScanner extends ScannerOptions implements Scanner { private int batchSize = 1000; private long readaheadThreshold = 3; - private static final long CACHE_BLOCK_SIZE = AccumuloConfiguration.getDefaultConfiguration().getMemoryInBytes(Property.TSERV_DEFAULT_BLOCKSIZE); + private static final long CACHE_BLOCK_SIZE = AccumuloConfiguration.getDefaultConfiguration().getAsBytes(Property.TSERV_DEFAULT_BLOCKSIZE); static class Opts { InputArgs in; http://git-wip-us.apache.org/repos/asf/accumulo/blob/f159ec1f/core/src/main/java/org/apache/accumulo/core/compaction/SizeType.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/accumulo/core/compaction/SizeType.java b/core/src/main/java/org/apache/accumulo/core/compaction/SizeType.java index cf147a8..ec53f87 100644 --- a/core/src/main/java/org/apache/accumulo/core/compaction/SizeType.java +++ b/core/src/main/java/org/apache/accumulo/core/compaction/SizeType.java @@ -23,7 +23,7 @@ import org.apache.accumulo.core.conf.AccumuloConfiguration; class SizeType implements Type { @Override public String convert(String str) { - long size = AccumuloConfiguration.getMemoryInBytes(str); + long size = AccumuloConfiguration.getFixedMemoryAsBytes(str); checkArgument(size > 0); return Long.toString(size); } http://git-wip-us.apache.org/repos/asf/accumulo/blob/f159ec1f/core/src/main/java/org/apache/accumulo/core/conf/AccumuloConfiguration.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/accumulo/core/conf/AccumuloConfiguration.java b/core/src/main/java/org/apache/accumulo/core/conf/AccumuloConfiguration.java index 593f466..363a66c 100644 --- a/core/src/main/java/org/apache/accumulo/core/conf/AccumuloConfiguration.java +++ b/core/src/main/java/org/apache/accumulo/core/conf/AccumuloConfiguration.java @@ -165,30 +165,32 @@ public abstract class AccumuloConfiguration implements Iterable<Entry<String,Str } /** - * Gets a property of type {@link PropertyType#MEMORY}, interpreting the value properly. + * Gets a property of type {@link PropertyType#BYTES} or {@link PropertyType#MEMORY}, interpreting + * the value properly. * - * @param property - * property to get + * @param property Property to get * @return property value - * @throws IllegalArgumentException - * if the property is of the wrong type - * @see #getMemoryInBytes(String) + * @throws IllegalArgumentException if the property is of the wrong type */ - public long getMemoryInBytes(Property property) { - checkType(property, PropertyType.MEMORY); - + public long getAsBytes(Property property) { String memString = get(property); - return getMemoryInBytes(memString); + if (property.getType() == PropertyType.MEMORY) { + return getMemoryAsBytes(memString); + } else if (property.getType() == PropertyType.BYTES) { + return getFixedMemoryAsBytes(memString); + } else { + throw new IllegalArgumentException(property.getKey() + " is not of BYTES or MEMORY type"); + } } /** - * Interprets a string specifying a memory size. A memory size is specified as a long integer followed by an optional B (bytes), K (KB), M (MB), or G (GB). + * Interprets a string specifying bytes. A bytes type is specified as a long integer followed by an optional B (bytes), K (KB), M (MB), or G (GB). * * @param str - * string value - * @return interpreted memory size + * String value + * @return interpreted memory size in bytes */ - static public long getMemoryInBytes(String str) { + static public long getFixedMemoryAsBytes(String str) { char lastChar = str.charAt(str.length() - 1); if (lastChar == 'b') { @@ -215,8 +217,32 @@ public abstract class AccumuloConfiguration implements Iterable<Entry<String,Str return Long.parseLong(str.substring(0, str.length() - 1)) << multiplier; } catch (Exception ex) { throw new IllegalArgumentException("The value '" + str + "' is not a valid memory setting. A valid value would a number " - + "possibily followed by an optional 'G', 'M', 'K', or 'B'."); + + "possibly followed by an optional 'G', 'M', 'K', or 'B'."); + } + } + + /** + * Interprets a string specifying a Memory type which is specified as a long integer followed by an optional B (bytes), K (KB), M (MB), G (GB) or % + * (percentage). + * + * @param str + * String value + * @return interpreted memory size in bytes + */ + static public long getMemoryAsBytes(String str) { + char lastChar = str.charAt(str.length() - 1); + if (lastChar == '%') { + try { + int percent = Integer.parseInt(str.substring(0, str.length() - 1)); + if (percent <= 0 || percent >= 100) { + throw new IllegalArgumentException("The value '" + str + "' is not a valid memory setting."); + } + return Runtime.getRuntime().maxMemory() * percent / 100; + } catch (Exception ex) { + throw new IllegalArgumentException("The value '" + str + "' is not a valid memory setting."); + } } + return getFixedMemoryAsBytes(str); } /** http://git-wip-us.apache.org/repos/asf/accumulo/blob/f159ec1f/core/src/main/java/org/apache/accumulo/core/conf/Property.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/accumulo/core/conf/Property.java b/core/src/main/java/org/apache/accumulo/core/conf/Property.java index 7298db1..4472a54 100644 --- a/core/src/main/java/org/apache/accumulo/core/conf/Property.java +++ b/core/src/main/java/org/apache/accumulo/core/conf/Property.java @@ -54,7 +54,7 @@ public enum Property { CRYPTO_CIPHER_ALGORITHM_NAME("crypto.cipher.algorithm.name", "NullCipher", PropertyType.STRING, "States the name of the algorithm used in the corresponding cipher suite. Do not make these different, unless you enjoy mysterious exceptions and bugs."), @Experimental - CRYPTO_BLOCK_STREAM_SIZE("crypto.block.stream.size", "1K", PropertyType.MEMORY, + CRYPTO_BLOCK_STREAM_SIZE("crypto.block.stream.size", "1K", PropertyType.BYTES, "The size of the buffer above the cipher stream. Used for reading files and padding walog entries."), @Experimental CRYPTO_CIPHER_KEY_LENGTH("crypto.cipher.key.length", "128", PropertyType.STRING, @@ -170,10 +170,13 @@ public enum Property { // general properties GENERAL_PREFIX("general.", null, PropertyType.PREFIX, "Properties in this category affect the behavior of accumulo overall, but do not have to be consistent throughout a cloud."), - GENERAL_CLASSPATHS(AccumuloClassLoader.CLASSPATH_PROPERTY_NAME, AccumuloClassLoader.ACCUMULO_CLASSPATH_VALUE, PropertyType.STRING, - "A list of all of the places to look for a class. Order does matter, as it will look for the jar " - + "starting in the first location to the last. Please note, hadoop conf and hadoop lib directories NEED to be here, " - + "along with accumulo lib and zookeeper directory. Supports full regex on filename alone."), // needs special treatment in accumulo start jar + @Deprecated + GENERAL_CLASSPATHS( + AccumuloClassLoader.CLASSPATH_PROPERTY_NAME, + "", + PropertyType.STRING, + "This property is deprecated as classpath should be configured accumulo-env.sh. A list of all of the places to look for a class. Order does matter, as it will look for the jar " + + "starting in the first location to the last. Supports full regex on filename alone."), GENERAL_DYNAMIC_CLASSPATHS(AccumuloVFSClassLoader.DYNAMIC_CLASSPATH_PROPERTY_NAME, AccumuloVFSClassLoader.DEFAULT_DYNAMIC_CLASSPATH_VALUE, PropertyType.STRING, "A list of all of the places where changes in jars or classes will force a reload of the classloader."), GENERAL_RPC_TIMEOUT("general.rpc.timeout", "120s", PropertyType.TIMEDURATION, "Time to wait on I/O for simple, short RPC calls"), @@ -185,7 +188,7 @@ public enum Property { + "replaced by the machines hostname in the hostname portion of the principal. Leave blank if not using kerberoized hdfs"), GENERAL_KERBEROS_RENEWAL_PERIOD("general.kerberos.renewal.period", "30s", PropertyType.TIMEDURATION, "The amount of time between attempts to perform " + "Kerberos ticket renewals. This does not equate to how often tickets are actually renewed (which is performed at 80% of the ticket lifetime)."), - GENERAL_MAX_MESSAGE_SIZE("general.server.message.size.max", "1G", PropertyType.MEMORY, "The maximum size of a message that can be sent to a server."), + GENERAL_MAX_MESSAGE_SIZE("general.server.message.size.max", "1G", PropertyType.BYTES, "The maximum size of a message that can be sent to a server."), GENERAL_SIMPLETIMER_THREADPOOL_SIZE("general.server.simpletimer.threadpool.size", "1", PropertyType.COUNT, "The number of threads to use for " + "server-internal scheduled tasks"), // If you update the default type, be sure to update the default used for initialization failures in VolumeManagerImpl @@ -240,26 +243,26 @@ public enum Property { // properties that are specific to tablet server behavior TSERV_PREFIX("tserver.", null, PropertyType.PREFIX, "Properties in this category affect the behavior of the tablet servers"), TSERV_CLIENT_TIMEOUT("tserver.client.timeout", "3s", PropertyType.TIMEDURATION, "Time to wait for clients to continue scans before closing a session."), - TSERV_DEFAULT_BLOCKSIZE("tserver.default.blocksize", "1M", PropertyType.MEMORY, "Specifies a default blocksize for the tserver caches"), + TSERV_DEFAULT_BLOCKSIZE("tserver.default.blocksize", "1M", PropertyType.BYTES, "Specifies a default blocksize for the tserver caches"), TSERV_CACHE_POLICY("tserver.cache.policy", "LRU", PropertyType.STRING, "Specifies the eviction policy of the file data caches (LRU or TinyLFU)."), - TSERV_DATACACHE_SIZE("tserver.cache.data.size", "128M", PropertyType.MEMORY, "Specifies the size of the cache for file data blocks."), - TSERV_INDEXCACHE_SIZE("tserver.cache.index.size", "512M", PropertyType.MEMORY, "Specifies the size of the cache for file indices."), - TSERV_SUMMARYCACHE_SIZE("tserver.cache.summary.size", "128M", PropertyType.MEMORY, "Specifies the size of the cache for summary data on each tablet server."), + TSERV_DATACACHE_SIZE("tserver.cache.data.size", "10%", PropertyType.MEMORY, "Specifies the size of the cache for file data blocks."), + TSERV_INDEXCACHE_SIZE("tserver.cache.index.size", "25%", PropertyType.MEMORY, "Specifies the size of the cache for file indices."), + TSERV_SUMMARYCACHE_SIZE("tserver.cache.summary.size", "10%", PropertyType.MEMORY, "Specifies the size of the cache for summary data on each tablet server."), TSERV_PORTSEARCH("tserver.port.search", "false", PropertyType.BOOLEAN, "if the ports above are in use, search higher ports until one is available"), TSERV_CLIENTPORT("tserver.port.client", "9997", PropertyType.PORT, "The port used for handling client connections on the tablet servers"), @Deprecated - TSERV_MUTATION_QUEUE_MAX("tserver.mutation.queue.max", "1M", PropertyType.MEMORY, "This setting is deprecated. See tserver.total.mutation.queue.max. " + TSERV_MUTATION_QUEUE_MAX("tserver.mutation.queue.max", "1M", PropertyType.BYTES, "This setting is deprecated. See tserver.total.mutation.queue.max. " + "The amount of memory to use to store write-ahead-log mutations-per-session before flushing them. Since the buffer is per write session, consider the" + " max number of concurrent writer when configuring. When using Hadoop 2, Accumulo will call hsync() on the WAL . For a small number of " + "concurrent writers, increasing this buffer size decreases the frequncy of hsync calls. For a large number of concurrent writers a small buffers " + "size is ok because of group commit."), - TSERV_TOTAL_MUTATION_QUEUE_MAX("tserver.total.mutation.queue.max", "50M", PropertyType.MEMORY, + TSERV_TOTAL_MUTATION_QUEUE_MAX("tserver.total.mutation.queue.max", "5%", PropertyType.MEMORY, "The amount of memory used to store write-ahead-log mutations before flushing them."), TSERV_TABLET_SPLIT_FINDMIDPOINT_MAXOPEN("tserver.tablet.split.midpoint.files.max", "300", PropertyType.COUNT, "To find a tablets split points, all index files are opened. This setting determines how many index " + "files can be opened at once. When there are more index files than this setting multiple passes " + "must be made, which is slower. However opening too many files at once can cause problems."), - TSERV_WALOG_MAX_SIZE("tserver.walog.max.size", "1G", PropertyType.MEMORY, + TSERV_WALOG_MAX_SIZE("tserver.walog.max.size", "1g", PropertyType.BYTES, "The maximum size for each write-ahead log. See comment for property tserver.memory.maps.max"), TSERV_WALOG_MAX_AGE("tserver.walog.max.age", "24h", PropertyType.TIMEDURATION, "The maximum age for each write-ahead log."), TSERV_WALOG_TOLERATED_CREATION_FAILURES("tserver.walog.tolerated.creation.failures", "50", PropertyType.COUNT, @@ -280,7 +283,7 @@ public enum Property { + "This setting determines how much time an unused file should be kept open until it is closed."), TSERV_NATIVEMAP_ENABLED("tserver.memory.maps.native.enabled", "true", PropertyType.BOOLEAN, "An in-memory data store for accumulo implemented in c++ that increases the amount of data accumulo can hold in memory and avoids Java GC pauses."), - TSERV_MAXMEM("tserver.memory.maps.max", "1G", PropertyType.MEMORY, + TSERV_MAXMEM("tserver.memory.maps.max", "33%", PropertyType.MEMORY, "Maximum amount of memory that can be used to buffer data written to a tablet server. There are two other properties that can effectively limit memory" + " usage table.compaction.minor.logs.threshold and tserver.walog.max.size. Ensure that table.compaction.minor.logs.threshold *" + " tserver.walog.max.size >= this property."), @@ -300,7 +303,7 @@ public enum Property { "The maximum number of concurrent tablet migrations for a tablet server"), TSERV_MAJC_MAXCONCURRENT("tserver.compaction.major.concurrent.max", "3", PropertyType.COUNT, "The maximum number of concurrent major compactions for a tablet server"), - TSERV_MAJC_THROUGHPUT("tserver.compaction.major.throughput", "0B", PropertyType.MEMORY, + TSERV_MAJC_THROUGHPUT("tserver.compaction.major.throughput", "0B", PropertyType.BYTES, "Maximum number of bytes to read or write per second over all major compactions on a TabletServer, or 0B for unlimited."), TSERV_MINC_MAXCONCURRENT("tserver.compaction.minor.concurrent.max", "4", PropertyType.COUNT, "The maximum number of concurrent minor compactions for a tablet server"), @@ -329,18 +332,18 @@ public enum Property { TSERV_BULK_TIMEOUT("tserver.bulk.timeout", "5m", PropertyType.TIMEDURATION, "The time to wait for a tablet server to process a bulk import request."), TSERV_MINTHREADS("tserver.server.threads.minimum", "20", PropertyType.COUNT, "The minimum number of threads to use to handle incoming requests."), TSERV_THREADCHECK("tserver.server.threadcheck.time", "1s", PropertyType.TIMEDURATION, "The time between adjustments of the server thread pool."), - TSERV_MAX_MESSAGE_SIZE("tserver.server.message.size.max", "1G", PropertyType.MEMORY, "The maximum size of a message that can be sent to a tablet server."), + TSERV_MAX_MESSAGE_SIZE("tserver.server.message.size.max", "1G", PropertyType.BYTES, "The maximum size of a message that can be sent to a tablet server."), TSERV_HOLD_TIME_SUICIDE("tserver.hold.time.max", "5m", PropertyType.TIMEDURATION, "The maximum time for a tablet server to be in the \"memory full\" state. If the tablet server cannot write out memory" + " in this much time, it will assume there is some failure local to its node, and quit. A value of zero is equivalent to forever."), - TSERV_WAL_BLOCKSIZE("tserver.wal.blocksize", "0", PropertyType.MEMORY, + TSERV_WAL_BLOCKSIZE("tserver.wal.blocksize", "0", PropertyType.BYTES, "The size of the HDFS blocks used to write to the Write-Ahead log. If zero, it will be 110% of tserver.walog.max.size (that is, try to use just one" + " block)"), TSERV_WAL_REPLICATION("tserver.wal.replication", "0", PropertyType.COUNT, "The replication to use when writing the Write-Ahead log to HDFS. If zero, it will use the HDFS default replication setting."), TSERV_RECOVERY_MAX_CONCURRENT("tserver.recovery.concurrent.max", "2", PropertyType.COUNT, "The maximum number of threads to use to sort logs during" + " recovery"), - TSERV_SORT_BUFFER_SIZE("tserver.sort.buffer.size", "200M", PropertyType.MEMORY, "The amount of memory to use when sorting logs during recovery."), + TSERV_SORT_BUFFER_SIZE("tserver.sort.buffer.size", "10%", PropertyType.MEMORY, "The amount of memory to use when sorting logs during recovery."), TSERV_ARCHIVE_WALOGS("tserver.archive.walogs", "false", PropertyType.BOOLEAN, "Keep copies of the WALOGs for debugging purposes"), TSERV_WORKQ_THREADS("tserver.workq.threads", "2", PropertyType.COUNT, "The number of threads for the distributed work queue. These threads are used for copying failed bulk files."), @@ -354,7 +357,7 @@ public enum Property { "Allows configuration of implementation used to apply replicated data"), TSERV_REPLICATION_DEFAULT_HANDLER("tserver.replication.default.replayer", "org.apache.accumulo.tserver.replication.BatchWriterReplicationReplayer", PropertyType.CLASSNAME, "Default AccumuloReplicationReplayer implementation"), - TSERV_REPLICATION_BW_REPLAYER_MEMORY("tserver.replication.batchwriter.replayer.memory", "50M", PropertyType.MEMORY, + TSERV_REPLICATION_BW_REPLAYER_MEMORY("tserver.replication.batchwriter.replayer.memory", "50M", PropertyType.BYTES, "Memory to provide to batchwriter to replay mutations for replication"), TSERV_ASSIGNMENT_MAXCONCURRENT("tserver.assignment.concurrent.max", "2", PropertyType.COUNT, "The number of threads available to load tablets. Recoveries are still performed serially."), @@ -420,7 +423,7 @@ public enum Property { TRACE_PASSWORD("trace.password", "secret", PropertyType.STRING, "The password for the user used to store distributed traces"), @Sensitive TRACE_TOKEN_PROPERTY_PREFIX("trace.token.property.", null, PropertyType.PREFIX, - "The prefix used to create a token for storing distributed traces. For each propetry required by trace.token.type, place this prefix in front of it."), + "The prefix used to create a token for storing distributed traces. For each property required by trace.token.type, place this prefix in front of it."), TRACE_TOKEN_TYPE("trace.token.type", PasswordToken.class.getName(), PropertyType.CLASSNAME, "An AuthenticationToken type supported by the authorizer"), // per table properties @@ -440,29 +443,29 @@ public enum Property { + "of its files compacted into one. There is no guarantee an idle tablet will be compacted. " + "Compactions of idle tablets are only started when regular compactions are not running. Idle " + "compactions only take place for tablets that have one or more files."), - TABLE_SPLIT_THRESHOLD("table.split.threshold", "1G", PropertyType.MEMORY, "When combined size of files exceeds this amount a tablet is split."), - TABLE_MAX_END_ROW_SIZE("table.split.endrow.size.max", "10K", PropertyType.MEMORY, "Maximum size of end row"), + TABLE_SPLIT_THRESHOLD("table.split.threshold", "1G", PropertyType.BYTES, "When combined size of files exceeds this amount a tablet is split."), + TABLE_MAX_END_ROW_SIZE("table.split.endrow.size.max", "10K", PropertyType.BYTES, "Maximum size of end row"), TABLE_MINC_LOGS_MAX("table.compaction.minor.logs.threshold", "3", PropertyType.COUNT, "When there are more than this many write-ahead logs against a tablet, it will be minor compacted. See comment for property tserver.memory.maps.max"), TABLE_MINC_COMPACT_IDLETIME("table.compaction.minor.idle", "5m", PropertyType.TIMEDURATION, "After a tablet has been idle (no mutations) for this time period it may have its " + "in-memory map flushed to disk in a minor compaction. There is no guarantee an idle " + "tablet will be compacted."), - TABLE_MINC_MAX_MERGE_FILE_SIZE("table.compaction.minor.merge.file.size.max", "0", PropertyType.MEMORY, + TABLE_MINC_MAX_MERGE_FILE_SIZE("table.compaction.minor.merge.file.size.max", "0", PropertyType.BYTES, "The max file size used for a merging minor compaction. The default value of 0 disables a max file size."), - TABLE_SCAN_MAXMEM("table.scan.max.memory", "512K", PropertyType.MEMORY, + TABLE_SCAN_MAXMEM("table.scan.max.memory", "512K", PropertyType.BYTES, "The maximum amount of memory that will be used to cache results of a client query/scan. " + "Once this limit is reached, the buffered data is sent to the client."), TABLE_FILE_TYPE("table.file.type", RFile.EXTENSION, PropertyType.STRING, "Change the type of file a table writes"), TABLE_LOAD_BALANCER("table.balancer", "org.apache.accumulo.server.master.balancer.DefaultLoadBalancer", PropertyType.STRING, "This property can be set to allow the LoadBalanceByTable load balancer to change the called Load Balancer for this table"), TABLE_FILE_COMPRESSION_TYPE("table.file.compress.type", "gz", PropertyType.STRING, "One of gz,lzo,none"), - TABLE_FILE_COMPRESSED_BLOCK_SIZE("table.file.compress.blocksize", "100K", PropertyType.MEMORY, + TABLE_FILE_COMPRESSED_BLOCK_SIZE("table.file.compress.blocksize", "100K", PropertyType.BYTES, "Similar to the hadoop io.seqfile.compress.blocksize setting, so that files have better query performance. The maximum value for this is " + Integer.MAX_VALUE + ". (This setting is the size threshold prior to compression, and applies even compression is disabled.)"), - TABLE_FILE_COMPRESSED_BLOCK_SIZE_INDEX("table.file.compress.blocksize.index", "128K", PropertyType.MEMORY, + TABLE_FILE_COMPRESSED_BLOCK_SIZE_INDEX("table.file.compress.blocksize.index", "128K", PropertyType.BYTES, "Determines how large index blocks can be in files that support multilevel indexes. The maximum value for this is " + Integer.MAX_VALUE + "." + " (This setting is the size threshold prior to compression, and applies even compression is disabled.)"), - TABLE_FILE_BLOCK_SIZE("table.file.blocksize", "0B", PropertyType.MEMORY, + TABLE_FILE_BLOCK_SIZE("table.file.blocksize", "0B", PropertyType.BYTES, "Overrides the hadoop dfs.block.size setting so that files have better query performance. The maximum value for this is " + Integer.MAX_VALUE), TABLE_FILE_REPLICATION("table.file.replication", "0", PropertyType.COUNT, "Determines how many replicas to keep of a tables' files in HDFS. " + "When this value is LTE 0, HDFS defaults are used."), @@ -470,7 +473,7 @@ public enum Property { "Determines the max # of files each tablet in a table can have. When adjusting this property you may want to consider adjusting" + " table.compaction.major.ratio also. Setting this property to 0 will make it default to tserver.scan.files.open.max-1, this will prevent a" + " tablet from having more files than can be opened. Setting this property low may throttle ingest and increase query performance."), - TABLE_FILE_SUMMARY_MAX_SIZE("table.file.summary.maxSize", "256K", PropertyType.MEMORY, "The maximum size summary that will be stored. The number of" + TABLE_FILE_SUMMARY_MAX_SIZE("table.file.summary.maxSize", "256K", PropertyType.BYTES, "The maximum size summary that will be stored. The number of" + " files that had summary data exceeding this threshold is reported by Summary.getFileStatistics().getLarge(). When adjusting this" + " consider the expected number files with summaries on each tablet server and the summary cache size."), @Deprecated @@ -607,7 +610,7 @@ public enum Property { REPLICATION_MIN_THREADS("replication.receiver.min.threads", "1", PropertyType.COUNT, "Minimum number of threads for replication"), REPLICATION_THREADCHECK("replication.receiver.threadcheck.time", "30s", PropertyType.TIMEDURATION, "The time between adjustments of the replication thread pool."), - REPLICATION_MAX_UNIT_SIZE("replication.max.unit.size", "64M", PropertyType.MEMORY, "Maximum size of data to send in a replication message"), + REPLICATION_MAX_UNIT_SIZE("replication.max.unit.size", "64M", PropertyType.BYTES, "Maximum size of data to send in a replication message"), REPLICATION_WORK_ASSIGNER("replication.work.assigner", "org.apache.accumulo.master.replication.UnorderedWorkAssigner", PropertyType.CLASSNAME, "Replication WorkAssigner implementation to use"), REPLICATION_DRIVER_DELAY("replication.driver.delay", "0s", PropertyType.TIMEDURATION, http://git-wip-us.apache.org/repos/asf/accumulo/blob/f159ec1f/core/src/main/java/org/apache/accumulo/core/conf/PropertyType.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/accumulo/core/conf/PropertyType.java b/core/src/main/java/org/apache/accumulo/core/conf/PropertyType.java index 9726090..927ecc9 100644 --- a/core/src/main/java/org/apache/accumulo/core/conf/PropertyType.java +++ b/core/src/main/java/org/apache/accumulo/core/conf/PropertyType.java @@ -45,10 +45,18 @@ public enum PropertyType { + "Examples of invalid durations are '1w', '1h30m', '1s 200ms', 'ms', '', and 'a'.\n" + "Unless otherwise stated, the max value for the duration represented in milliseconds is " + Long.MAX_VALUE), - MEMORY("memory", boundedUnits(0, Long.MAX_VALUE, false, "", "B", "K", "M", "G"), - "A positive integer optionally followed by a unit of memory (whitespace disallowed), as in 2G.\n" - + "If no unit is specified, bytes are assumed. Valid units are 'B', 'K', 'M', 'G', for bytes, kilobytes, megabytes, and gigabytes.\n" - + "Examples of valid memories are '1024', '20B', '100K', '1500M', '2G'.\n" + BYTES("bytes", boundedUnits(0, Long.MAX_VALUE, false, "", "B", "K", "M", "G"), + "A positive integer optionally followed by a unit of memory (whitespace disallowed).\n" + + "If no unit is specified, bytes are assumed. Valid units are 'B', 'K', 'M' or 'G' for bytes, kilobytes, megabytes, gigabytes.\n" + + "Examples of valid memories are '1024', '20B', '100K', '1500M', '2G', '20%'.\n" + + "Examples of invalid memories are '1M500K', '1M 2K', '1MB', '1.5G', '1,024K', '', and 'a'.\n" + + "Unless otherwise stated, the max value for the memory represented in bytes is " + Long.MAX_VALUE), + + MEMORY("memory", boundedUnits(0, Long.MAX_VALUE, false, "", "B", "K", "M", "G", "%"), + "A positive integer optionally followed by a unit of memory or a percentage (whitespace disallowed).\n" + + "If a percentage is specified, memory will be a percentage of the max memory allocated to a Java process (set by the JVM option -Xmx).\n" + + "If no unit is specified, bytes are assumed. Valid units are 'B', 'K', 'M', 'G', '%' for bytes, kilobytes, megabytes, gigabytes, and percentage.\n" + + "Examples of valid memories are '1024', '20B', '100K', '1500M', '2G', '20%'.\n" + "Examples of invalid memories are '1M500K', '1M 2K', '1MB', '1.5G', '1,024K', '', and 'a'.\n" + "Unless otherwise stated, the max value for the memory represented in bytes is " + Long.MAX_VALUE), http://git-wip-us.apache.org/repos/asf/accumulo/blob/f159ec1f/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java b/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java index 26343ba..16db01d 100644 --- a/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java +++ b/core/src/main/java/org/apache/accumulo/core/file/rfile/RFile.java @@ -518,7 +518,7 @@ public class RFile { private Sampler sampler; public Writer(BlockFileWriter bfw, int blockSize) throws IOException { - this(bfw, blockSize, (int) AccumuloConfiguration.getDefaultConfiguration().getMemoryInBytes(Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE_INDEX), null, null); + this(bfw, blockSize, (int) AccumuloConfiguration.getDefaultConfiguration().getAsBytes(Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE_INDEX), null, null); } public Writer(BlockFileWriter bfw, int blockSize, int indexBlockSize, SamplerConfigurationImpl samplerConfig, Sampler sampler) throws IOException { http://git-wip-us.apache.org/repos/asf/accumulo/blob/f159ec1f/core/src/main/java/org/apache/accumulo/core/file/rfile/RFileOperations.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/accumulo/core/file/rfile/RFileOperations.java b/core/src/main/java/org/apache/accumulo/core/file/rfile/RFileOperations.java index ec721ba..4d1af7e 100644 --- a/core/src/main/java/org/apache/accumulo/core/file/rfile/RFileOperations.java +++ b/core/src/main/java/org/apache/accumulo/core/file/rfile/RFileOperations.java @@ -81,8 +81,8 @@ public class RFileOperations extends FileOperations { AccumuloConfiguration acuconf = options.getTableConfiguration(); - long blockSize = acuconf.getMemoryInBytes(Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE); - long indexBlockSize = acuconf.getMemoryInBytes(Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE_INDEX); + long blockSize = acuconf.getAsBytes(Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE); + long indexBlockSize = acuconf.getAsBytes(Property.TABLE_FILE_COMPRESSED_BLOCK_SIZE_INDEX); SamplerConfigurationImpl samplerConfig = SamplerConfigurationImpl.newSamplerConfig(acuconf); Sampler sampler = null; @@ -106,7 +106,7 @@ public class RFileOperations extends FileOperations { rep = trep; } long hblock = conf.getLong("dfs.block.size", 1 << 26); - long tblock = acuconf.getMemoryInBytes(Property.TABLE_FILE_BLOCK_SIZE); + long tblock = acuconf.getAsBytes(Property.TABLE_FILE_BLOCK_SIZE); long block = hblock; if (tblock > 0) block = tblock; http://git-wip-us.apache.org/repos/asf/accumulo/blob/f159ec1f/core/src/main/java/org/apache/accumulo/core/file/rfile/SplitLarge.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/accumulo/core/file/rfile/SplitLarge.java b/core/src/main/java/org/apache/accumulo/core/file/rfile/SplitLarge.java index a3a4193..1f414d5 100644 --- a/core/src/main/java/org/apache/accumulo/core/file/rfile/SplitLarge.java +++ b/core/src/main/java/org/apache/accumulo/core/file/rfile/SplitLarge.java @@ -68,7 +68,7 @@ public class SplitLarge { String smallName = file.substring(0, file.length() - 3) + "_small.rf"; String largeName = file.substring(0, file.length() - 3) + "_large.rf"; - int blockSize = (int) aconf.getMemoryInBytes(Property.TABLE_FILE_BLOCK_SIZE); + int blockSize = (int) aconf.getAsBytes(Property.TABLE_FILE_BLOCK_SIZE); try (Writer small = new RFile.Writer(new CachableBlockFile.Writer(fs, new Path(smallName), "gz", null, conf, aconf), blockSize); Writer large = new RFile.Writer(new CachableBlockFile.Writer(fs, new Path(largeName), "gz", null, conf, aconf), blockSize)) { small.startDefaultLocalityGroup(); http://git-wip-us.apache.org/repos/asf/accumulo/blob/f159ec1f/core/src/main/java/org/apache/accumulo/core/iterators/user/RowEncodingIterator.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/accumulo/core/iterators/user/RowEncodingIterator.java b/core/src/main/java/org/apache/accumulo/core/iterators/user/RowEncodingIterator.java index e0fd64e..150d1fa 100644 --- a/core/src/main/java/org/apache/accumulo/core/iterators/user/RowEncodingIterator.java +++ b/core/src/main/java/org/apache/accumulo/core/iterators/user/RowEncodingIterator.java @@ -155,7 +155,7 @@ public abstract class RowEncodingIterator implements SortedKeyValueIterator<Key, public void init(SortedKeyValueIterator<Key,Value> source, Map<String,String> options, IteratorEnvironment env) throws IOException { sourceIter = source; if (options.containsKey(MAX_BUFFER_SIZE_OPT)) { - maxBufferSize = AccumuloConfiguration.getMemoryInBytes(options.get(MAX_BUFFER_SIZE_OPT)); + maxBufferSize = AccumuloConfiguration.getFixedMemoryAsBytes(options.get(MAX_BUFFER_SIZE_OPT)); } } @@ -172,7 +172,7 @@ public abstract class RowEncodingIterator implements SortedKeyValueIterator<Key, public boolean validateOptions(Map<String,String> options) { String maxBufferSizeStr = options.get(MAX_BUFFER_SIZE_OPT); try { - AccumuloConfiguration.getMemoryInBytes(maxBufferSizeStr); + AccumuloConfiguration.getFixedMemoryAsBytes(maxBufferSizeStr); } catch (Exception e) { throw new IllegalArgumentException("Failed to parse opt " + MAX_BUFFER_SIZE_OPT + " " + maxBufferSizeStr, e); }