YARN Resource Management
Also available as:
PDF
loading table of contents...

Configuring Accumulo on YARN

Accessing the Accumulo Configuration Files

The Accumulo application package includes default application and resource specification files. The package includes both non-secure (appConfig-default.json) and secure (appConfig-secured-default.json) versions of the application specification. You can save these files as a another name, and then edit the files to customize the Accumulo configuration.

You can use the unzip command to extract the Accumulo application and resource specification files from the Accumulo-on-Slider application package. For example, you would use the following command to extract the files from the Accumulo application package in the /usr/work/app-packages/accumulo directory:

unzip /usr/work/app-packages/accumulo/slider-app-packages/accumulo/slider-accumulo-app-package-1.7.0.2.4.2.0-258.zip appConfig-default.json -d /usr/work/app-packages/accumulo
unzip /usr/work/app-packages/accumulo/slider-app-packages/accumulo/slider-accumulo-app-package-1.7.0.2.4.2.0-258.zip resources-default.json -d /usr/work/app-packages/accumulo

You can use the following commands to copy and rename the default Storm application and resource specification files in the /usr/work/app-packages/accumulo directory:

cp /usr/work/app-packages/accumulo/appConfig-default.json /usr/work/app-packages/accumulo/appConfig.json
cp /usr/work/app-packages/accumulo/resources-default.json /usr/work/app-packages/accumulo/resources.json

Application Configuration for Accumulo on YARN

The following is an example of an appConfig.json file for Accumulo on YARN via Slider. The basic properties to adjust for your system are the heap size, the Accumulo memory properties, and the location of JAVA_HOME. The directories and classpaths are configured properly for HDP in the default appConfig-default.json file, but you must set the JAVA_HOME value in the "global" section of the appConfig.json file to match your system JAVA_HOME setting.

{ 
    "schema": "http://example.org/specification/v2.0.0", 
    "metadata": {
    }, 
    "global": { "application.def": ".slider/package/ACCUMULO/slider-accumulo-app-package-1.7.0.2.4.2.0-258.zip", 
    "java_home": "/usr/hadoop-jdk1.6.0_31", 
    "site.global.app_root": "${AGENT_WORK_ROOT}/app/install/accumulo-1.7.0.2.4.2.0-258", 
    "site.global.app_user": "${USER}", 
    "site.global.user_group": "hadoop", 
    
    "site.accumulo-env.java_home": "${JAVA_HOME}", 
    "site.accumulo-env.tserver_heapsize": "256m", 
    "site.accumulo-env.master_heapsize": "128m", 
    "site.accumulo-env.monitor_heapsize": "64m", 
    "site.accumulo-env.gc_heapsize": "64m", 
    "site.accumulo-env.other_heapsize": "128m", 
    "site.accumulo-env.hadoop_prefix": "/usr/hdp/current/hadoop-client", 
    "site.accumulo-env.hadoop_conf_dir": "/etc/hadoop/conf", 
    "site.accumulo-env.zookeeper_home": "${zk.dir}", 
    
    "site.client.instance.name": "${USER}-${CLUSTER_NAME}", 
    
    "site.global.accumulo_root_password": "NOT_USED", 
    "site.global.ssl_cert_dir": "ssl", 
    "site.global.monitor_protocol": "http", 
    
    "site.accumulo-site.instance.volumes": "${DEFAULT_DATA_DIR}/data", 
    "site.accumulo-site.instance.zookeeper.host": "${ZK_HOST}", 
    "site.accumulo-site.instance.security.authenticator": "org.apache.slider.accumulo.CustomAuthenticator", 
    
    "site.accumulo-site.general.security.credential.provider.paths": "jceks://hdfs/user/${USER}/accumulo-${CLUSTER_NAME}.jceks", 
    "site.accumulo-site.instance.rpc.ssl.enabled": "false", 
    "site.accumulo-site.instance.rpc.ssl.clientAuth": "false", 
    "site.accumulo-site.general.kerberos.keytab": "", 
    "site.accumulo-site.general.kerberos.principal": "", 
    
    "site.accumulo-site.tserver.memory.maps.native.enabled": "false", 
    "site.accumulo-site.tserver.memory.maps.max": "80M", 
    "site.accumulo-site.tserver.cache.data.size": "7M", 
    "site.accumulo-site.tserver.cache.index.size": "20M", 
    "site.accumulo-site.tserver.sort.buffer.size": "50M", 
    "site.accumulo-site.tserver.walog.max.size": "40M", 
    
    "site.accumulo-site.trace.user": "root", 
    
    "site.accumulo-site.master.port.client": "0", 
    "site.accumulo-site.trace.port.client": "0", 
    "site.accumulo-site.tserver.port.client": "0", 
    "site.accumulo-site.gc.port.client": "0", 
    "site.accumulo-site.monitor.port.client": "${ACCUMULO_MONITOR.ALLOCATED_PORT}", 
    "site.accumulo-site.monitor.port.log4j": "0", 
    "site.accumulo-site.master.replication.coordinator.port": "0", 
    "site.accumulo-site.replication.receipt.service.port": "0", 
    
    "site.accumulo-site.general.classpaths": "$ACCUMULO_HOME/lib/accumulo-server.jar,\n$ACCUMULO_HOME/lib/accumulo-core.jar,\n$ACCUMULO_HOME/lib/accumulo-start.jar,\n$ACCUMULO_HOME/lib/accumulo-fate.jar,\n$ACCUMULO_HOME/lib/accumulo-proxy.jar,\n$ACCUMULO_HOME/lib/[^.].*.jar,\n$ZOOKEEPER_HOME/zookeeper[^.].*.jar,\n$HADOOP_CONF_DIR,\n$HADOOP_PREFIX/[^.].*.jar,\n$HADOOP_PREFIX/lib/[^.].*.jar,\n$HADOOP_PREFIX/share/hadoop/common/.*.jar,\n$HADOOP_PREFIX/share/hadoop/common/lib/.*.jar,\n$HADOOP_PREFIX/share/hadoop/hdfs/.*.jar,\n$HADOOP_PREFIX/share/hadoop/mapreduce/.*.jar,\n$HADOOP_PREFIX/share/hadoop/yarn/.*.jar,\n/usr/hdp/current/hadoop-client/.*.jar,\n/usr/hdp/current/hadoop-client/lib/.*.jar,\n/usr/hdp/current/hadoop-hdfs-client/.*.jar,\n/usr/hdp/current/hadoop-mapreduce-client/.*.jar,\n/usr/hdp/current/hadoop-yarn-client/.*.jar,"
 }, 
 "credentials": { 
    "jceks://hdfs/user/${USER}/accumulo-${CLUSTER_NAME}.jceks": ["root.initial.password", "instance.secret", "trace.token.property.password"]
 }, 
 "components": { 
    "slider-appmaster": { 
       "jvm.heapsize": "256M", 
       "slider.am.keytab.local.path": "", 
       "slider.keytab.principal.name": ""
    }
  }
}

Resource Components in Accumulo on YARN

You can specify the following components (also referred to as "roles") when deploying Accumulo on YARN via Slider:

  • ACCUMULO_MASTER ⎯ Accumulo master process.

  • ACCUMULO_TSERVER ⎯ Accumulo tablet server process.

  • ACCUMULO_MONITOR ⎯ Accumulo monitor web UI

  • ACCUMULO_GC ⎯ Accumulo garbage collector process

  • ACCUMULO_TRACER ⎯ Accumulo trace collector process

The following is an example of an Accumulo resources.json file with these roles configured:

{ 
   "schema": "http://example.org/specification/v2.0.0", 
   "metadata": {
    }, 
    "global": { 
       "yarn.log.include.patterns": "", 
       "yarn.log.exclude.patterns": ""
    }, 
    "components": { 
       "ACCUMULO_MASTER": { 
          "yarn.role.priority": "1", 
          "yarn.component.instances": "1", 
          "yarn.memory": "256"
       }, 
       "slider-appmaster": {
       }, 
       "ACCUMULO_TSERVER": { 
          "yarn.role.priority": "2", 
          "yarn.component.instances": "1", 
          "yarn.memory": "512"
       }, 
       "ACCUMULO_MONITOR": { 
          "yarn.role.priority": "3", 
          "yarn.component.instances": "1", 
          "yarn.memory": "128"
       }, 
       "ACCUMULO_GC": { 
          "yarn.role.priority": "4", 
          "yarn.component.instances": "1", 
          "yarn.memory": "128"
       }, 
       "ACCUMULO_TRACER": { 
          "yarn.role.priority": "5", 
          "yarn.component.instances": "1", 
          "yarn.memory": "256"
      }
   }
}

The memory and number of instances of each component should be adjusted for your system and desired application instance size. You typically only need to request one instance of the ACCUMULO_MONITOR, ACCUMULO_GC, and ACCUMULO_TRACER processes. For HA (High Availability) purposes, you will generally want two instances of ACCUMULO_MASTER, and enough instances of ACCUMULO_TSERVER to support your application.