public class TableMapReduceUtil
extends java.lang.Object
TableMap
and TableReduce
Constructor and Description |
---|
TableMapReduceUtil() |
Modifier and Type | Method and Description |
---|---|
static void |
addDependencyJars(JobConf job) |
static void |
initCredentials(JobConf job) |
static void |
initMultiTableSnapshotMapperJob(java.util.Map<java.lang.String,java.util.Collection<Scan>> snapshotScans,
java.lang.Class<? extends TableMap> mapper,
java.lang.Class<?> outputKeyClass,
java.lang.Class<?> outputValueClass,
JobConf job,
boolean addDependencyJars,
Path tmpRestoreDir)
Sets up the job for reading from one or more multiple table snapshots, with one or more scans
per snapshot.
|
static void |
initTableMapJob(java.lang.String table,
java.lang.String columns,
java.lang.Class<? extends TableMap> mapper,
java.lang.Class<?> outputKeyClass,
java.lang.Class<?> outputValueClass,
JobConf job)
Use this before submitting a TableMap job.
|
static void |
initTableMapJob(java.lang.String table,
java.lang.String columns,
java.lang.Class<? extends TableMap> mapper,
java.lang.Class<?> outputKeyClass,
java.lang.Class<?> outputValueClass,
JobConf job,
boolean addDependencyJars) |
static void |
initTableMapJob(java.lang.String table,
java.lang.String columns,
java.lang.Class<? extends TableMap> mapper,
java.lang.Class<?> outputKeyClass,
java.lang.Class<?> outputValueClass,
JobConf job,
boolean addDependencyJars,
java.lang.Class<? extends InputFormat> inputFormat)
Use this before submitting a TableMap job.
|
static void |
initTableReduceJob(java.lang.String table,
java.lang.Class<? extends TableReduce> reducer,
JobConf job)
Use this before submitting a TableReduce job.
|
static void |
initTableReduceJob(java.lang.String table,
java.lang.Class<? extends TableReduce> reducer,
JobConf job,
java.lang.Class partitioner)
Use this before submitting a TableReduce job.
|
static void |
initTableReduceJob(java.lang.String table,
java.lang.Class<? extends TableReduce> reducer,
JobConf job,
java.lang.Class partitioner,
boolean addDependencyJars)
Use this before submitting a TableReduce job.
|
static void |
initTableSnapshotMapJob(java.lang.String snapshotName,
java.lang.String columns,
java.lang.Class<? extends TableMap> mapper,
java.lang.Class<?> outputKeyClass,
java.lang.Class<?> outputValueClass,
JobConf job,
boolean addDependencyJars,
Path tmpRestoreDir)
Sets up the job for reading from a table snapshot.
|
static void |
initTableSnapshotMapJob(java.lang.String snapshotName,
java.lang.String columns,
java.lang.Class<? extends TableMap> mapper,
java.lang.Class<?> outputKeyClass,
java.lang.Class<?> outputValueClass,
JobConf jobConf,
boolean addDependencyJars,
Path tmpRestoreDir,
RegionSplitter.SplitAlgorithm splitAlgo,
int numSplitsPerRegion)
Sets up the job for reading from a table snapshot.
|
static void |
limitNumMapTasks(java.lang.String table,
JobConf job)
Ensures that the given number of map tasks for the given job
configuration does not exceed the number of regions for the given table.
|
static void |
limitNumReduceTasks(java.lang.String table,
JobConf job)
Ensures that the given number of reduce tasks for the given job
configuration does not exceed the number of regions for the given table.
|
static void |
setNumMapTasks(java.lang.String table,
JobConf job)
Sets the number of map tasks for the given job configuration to the
number of regions the given table has.
|
static void |
setNumReduceTasks(java.lang.String table,
JobConf job)
Sets the number of reduce tasks for the given job configuration to the
number of regions the given table has.
|
static void |
setScannerCaching(JobConf job,
int batchSize)
Sets the number of rows to return and cache with each scanner iteration.
|
public static void initTableMapJob(java.lang.String table, java.lang.String columns, java.lang.Class<? extends TableMap> mapper, java.lang.Class<?> outputKeyClass, java.lang.Class<?> outputValueClass, JobConf job)
table
- The table name to read from.columns
- The columns to scan.mapper
- The mapper class to use.outputKeyClass
- The class of the output key.outputValueClass
- The class of the output value.job
- The current job configuration to adjust.public static void initTableMapJob(java.lang.String table, java.lang.String columns, java.lang.Class<? extends TableMap> mapper, java.lang.Class<?> outputKeyClass, java.lang.Class<?> outputValueClass, JobConf job, boolean addDependencyJars)
public static void initTableMapJob(java.lang.String table, java.lang.String columns, java.lang.Class<? extends TableMap> mapper, java.lang.Class<?> outputKeyClass, java.lang.Class<?> outputValueClass, JobConf job, boolean addDependencyJars, java.lang.Class<? extends InputFormat> inputFormat)
table
- The table name to read from.columns
- The columns to scan.mapper
- The mapper class to use.outputKeyClass
- The class of the output key.outputValueClass
- The class of the output value.job
- The current job configuration to adjust.addDependencyJars
- upload HBase jars and jars for any of the configured
job classes via the distributed cache (tmpjars).public static void initMultiTableSnapshotMapperJob(java.util.Map<java.lang.String,java.util.Collection<Scan>> snapshotScans, java.lang.Class<? extends TableMap> mapper, java.lang.Class<?> outputKeyClass, java.lang.Class<?> outputValueClass, JobConf job, boolean addDependencyJars, Path tmpRestoreDir) throws java.io.IOException
snapshotScans
- map of snapshot name to scans on that snapshot.mapper
- The mapper class to use.outputKeyClass
- The class of the output key.outputValueClass
- The class of the output value.job
- The current job to adjust. Make sure the passed job is
carrying all necessary HBase configuration.addDependencyJars
- upload HBase jars and jars for any of the configured
job classes via the distributed cache (tmpjars).java.io.IOException
public static void initTableSnapshotMapJob(java.lang.String snapshotName, java.lang.String columns, java.lang.Class<? extends TableMap> mapper, java.lang.Class<?> outputKeyClass, java.lang.Class<?> outputValueClass, JobConf job, boolean addDependencyJars, Path tmpRestoreDir) throws java.io.IOException
snapshotName
- The name of the snapshot (of a table) to read from.columns
- The columns to scan.mapper
- The mapper class to use.outputKeyClass
- The class of the output key.outputValueClass
- The class of the output value.job
- The current job to adjust. Make sure the passed job is
carrying all necessary HBase configuration.addDependencyJars
- upload HBase jars and jars for any of the configured
job classes via the distributed cache (tmpjars).tmpRestoreDir
- a temporary directory to copy the snapshot files into. Current user should
have write permissions to this directory, and this should not be a subdirectory of rootdir.
After the job is finished, restore directory can be deleted.java.io.IOException
- When setting up the details fails.TableSnapshotInputFormat
public static void initTableSnapshotMapJob(java.lang.String snapshotName, java.lang.String columns, java.lang.Class<? extends TableMap> mapper, java.lang.Class<?> outputKeyClass, java.lang.Class<?> outputValueClass, JobConf jobConf, boolean addDependencyJars, Path tmpRestoreDir, RegionSplitter.SplitAlgorithm splitAlgo, int numSplitsPerRegion) throws java.io.IOException
snapshotName
- The name of the snapshot (of a table) to read from.columns
- The columns to scan.mapper
- The mapper class to use.outputKeyClass
- The class of the output key.outputValueClass
- The class of the output value.jobConf
- The current job to adjust. Make sure the passed job is
carrying all necessary HBase configuration.addDependencyJars
- upload HBase jars and jars for any of the configured
job classes via the distributed cache (tmpjars).tmpRestoreDir
- a temporary directory to copy the snapshot files into. Current user should
have write permissions to this directory, and this should not be a subdirectory of rootdir.
After the job is finished, restore directory can be deleted.splitAlgo
- algorithm to splitnumSplitsPerRegion
- how many input splits to generate per one regionjava.io.IOException
- When setting up the details fails.TableSnapshotInputFormat
public static void initTableReduceJob(java.lang.String table, java.lang.Class<? extends TableReduce> reducer, JobConf job) throws java.io.IOException
table
- The output table.reducer
- The reducer class to use.job
- The current job configuration to adjust.java.io.IOException
- When determining the region count fails.public static void initTableReduceJob(java.lang.String table, java.lang.Class<? extends TableReduce> reducer, JobConf job, java.lang.Class partitioner) throws java.io.IOException
table
- The output table.reducer
- The reducer class to use.job
- The current job configuration to adjust.partitioner
- Partitioner to use. Pass null
to use
default partitioner.java.io.IOException
- When determining the region count fails.public static void initTableReduceJob(java.lang.String table, java.lang.Class<? extends TableReduce> reducer, JobConf job, java.lang.Class partitioner, boolean addDependencyJars) throws java.io.IOException
table
- The output table.reducer
- The reducer class to use.job
- The current job configuration to adjust.partitioner
- Partitioner to use. Pass null
to use
default partitioner.addDependencyJars
- upload HBase jars and jars for any of the configured
job classes via the distributed cache (tmpjars).java.io.IOException
- When determining the region count fails.public static void initCredentials(JobConf job) throws java.io.IOException
java.io.IOException
public static void limitNumReduceTasks(java.lang.String table, JobConf job) throws java.io.IOException
table
- The table to get the region count for.job
- The current job configuration to adjust.java.io.IOException
- When retrieving the table details fails.public static void limitNumMapTasks(java.lang.String table, JobConf job) throws java.io.IOException
table
- The table to get the region count for.job
- The current job configuration to adjust.java.io.IOException
- When retrieving the table details fails.public static void setNumReduceTasks(java.lang.String table, JobConf job) throws java.io.IOException
table
- The table to get the region count for.job
- The current job configuration to adjust.java.io.IOException
- When retrieving the table details fails.public static void setNumMapTasks(java.lang.String table, JobConf job) throws java.io.IOException
table
- The table to get the region count for.job
- The current job configuration to adjust.java.io.IOException
- When retrieving the table details fails.public static void setScannerCaching(JobConf job, int batchSize)
job
- The current job configuration to adjust.batchSize
- The number of rows to return in batch with each scanner
iteration.public static void addDependencyJars(JobConf job) throws java.io.IOException
java.io.IOException
org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil#addDependencyJars(org.apache.hadoop.mapreduce.Job)