|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Object org.apache.hadoop.hive.ql.exec.Utilities
public final class Utilities
Utilities.
Nested Class Summary | |
---|---|
static class |
Utilities.CollectionPersistenceDelegate
|
static class |
Utilities.CommonTokenDelegate
Need to serialize org.antlr.runtime.CommonToken |
static class |
Utilities.DatePersistenceDelegate
DatePersistenceDelegate. |
static class |
Utilities.EnumDelegate
Java 1.5 workaround. |
static class |
Utilities.ListDelegate
|
static class |
Utilities.MapDelegate
|
static class |
Utilities.PathDelegate
|
static class |
Utilities.ReduceField
ReduceField: KEY: record key VALUE: record value |
static class |
Utilities.SetDelegate
|
static class |
Utilities.SQLCommand<T>
|
static class |
Utilities.StreamPrinter
StreamPrinter. |
static class |
Utilities.StreamStatus
StreamStatus. |
static class |
Utilities.TimestampPersistenceDelegate
TimestampPersistenceDelegate. |
Field Summary | |
---|---|
static int |
carriageReturnCode
|
static int |
ctrlaCode
|
static TableDesc |
defaultTd
|
static String |
HADOOP_LOCAL_FS
The object in the reducer are composed of these top level fields. |
static String |
INDENT
|
static String |
MAP_PLAN_NAME
|
static String |
MAPRED_MAPPER_CLASS
|
static String |
MAPRED_REDUCER_CLASS
|
static int |
newLineCode
|
static String |
NSTR
|
static String |
nullStringOutput
|
static String |
nullStringStorage
|
static Random |
randGen
|
static String |
REDUCE_PLAN_NAME
|
static List<String> |
reduceFieldNameList
|
static ThreadLocal<com.esotericsoftware.kryo.Kryo> |
runtimeSerializationKryo
|
static char |
sqlEscapeChar
|
static String |
suffix
|
static int |
tabCode
|
Method Summary | ||
---|---|---|
static String |
abbreviate(String str,
int max)
convert "From src insert blah blah" to "From src insert ... |
|
static ClassLoader |
addToClassPath(ClassLoader cloader,
String[] newPaths)
Add new elements to the classpath. |
|
static ClassLoader |
addToClassPath(ClassLoader cloader,
String[] newPaths,
boolean downloadFiles)
|
|
static void |
clearWork(org.apache.hadoop.conf.Configuration conf)
|
|
static void |
clearWorkMap()
|
|
static Set<Operator<?>> |
cloneOperatorTree(org.apache.hadoop.conf.Configuration conf,
Set<Operator<?>> roots)
|
|
static MapredWork |
clonePlan(MapredWork plan)
Clones using the powers of XML. |
|
static Connection |
connectWithRetry(String connectionString,
int waitWindow,
int maxRetries)
Retry connecting to a database with random backoff (same as the one implemented in HDFS-767). |
|
static boolean |
contentsEqual(InputStream is1,
InputStream is2,
boolean ignoreWhitespace)
|
|
static void |
copyTableJobPropertiesToConf(TableDesc tbl,
org.apache.hadoop.mapred.JobConf job)
Copies the storage handler properties configured for a table descriptor to a runtime job configuration. |
|
static OutputStream |
createCompressedStream(org.apache.hadoop.mapred.JobConf jc,
OutputStream out)
Convert an output stream to a compressed output stream based on codecs and compression options specified in the Job Configuration. |
|
static OutputStream |
createCompressedStream(org.apache.hadoop.mapred.JobConf jc,
OutputStream out,
boolean isCompressed)
Convert an output stream to a compressed output stream based on codecs codecs in the Job Configuration. |
|
static boolean |
createDirsWithPermission(org.apache.hadoop.conf.Configuration conf,
org.apache.hadoop.fs.Path mkdir,
org.apache.hadoop.fs.permission.FsPermission fsPermission)
|
|
static boolean |
createDirsWithPermission(org.apache.hadoop.conf.Configuration conf,
org.apache.hadoop.fs.Path mkdir,
org.apache.hadoop.fs.permission.FsPermission fsPermission,
boolean recursive)
|
|
static RCFile.Writer |
createRCFileWriter(org.apache.hadoop.mapred.JobConf jc,
org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path file,
boolean isCompressed)
Create a RCFile output stream based on job configuration Uses user supplied compression flag (rather than obtaining it from the Job Configuration). |
|
static org.apache.hadoop.io.SequenceFile.Writer |
createSequenceWriter(org.apache.hadoop.mapred.JobConf jc,
org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path file,
Class<?> keyClass,
Class<?> valClass)
Create a sequencefile output stream based on job configuration. |
|
static org.apache.hadoop.io.SequenceFile.Writer |
createSequenceWriter(org.apache.hadoop.mapred.JobConf jc,
org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path file,
Class<?> keyClass,
Class<?> valClass,
boolean isCompressed)
Create a sequencefile output stream based on job configuration Uses user supplied compression flag (rather than obtaining it from the Job Configuration). |
|
static File |
createTempDir(String baseDir)
Create a temp dir in specified baseDir This can go away once hive moves to support only JDK 7 and can use Files.createTempDirectory Guava Files.createTempDir() does not take a base dir |
|
static void |
createTmpDirs(org.apache.hadoop.conf.Configuration conf,
MapWork mWork)
Hive uses tmp directories to capture the output of each FileSinkOperator. |
|
static void |
createTmpDirs(org.apache.hadoop.conf.Configuration conf,
ReduceWork rWork)
Hive uses tmp directories to capture the output of each FileSinkOperator. |
|
static ExprNodeGenericFuncDesc |
deserializeExpression(String s)
|
|
static ExprNodeGenericFuncDesc |
deserializeExpressionFromKryo(byte[] bytes)
Deserializes expression from Kryo. |
|
static
|
deserializePlan(InputStream in,
Class<T> planClass,
org.apache.hadoop.conf.Configuration conf)
Deserializes the plan. |
|
static String |
escapeSqlLike(String key)
Escape the '_', '%', as well as the escape characters inside the string key. |
|
static int |
estimateNumberOfReducers(HiveConf conf,
org.apache.hadoop.fs.ContentSummary inputSummary,
MapWork work,
boolean finalMapRed)
Estimate the number of reducers needed for this job, based on job input, and configuration parameters. |
|
static int |
estimateReducers(long totalInputFileSize,
long bytesPerReducer,
int maxReducers,
boolean powersOfTwo)
|
|
static
|
executeWithRetry(Utilities.SQLCommand<T> cmd,
PreparedStatement stmt,
int baseWindow,
int maxRetries)
Retry SQL execution with random backoff (same as the one implemented in HDFS-767). |
|
static String |
formatBinaryString(byte[] array,
int start,
int length)
|
|
static String |
formatMsecToStr(long msec)
Format number of milliseconds to strings |
|
static String |
generateFileName(Byte tag,
String bigBucketFileName)
|
|
static String |
generatePath(org.apache.hadoop.fs.Path baseURI,
String filename)
|
|
static org.apache.hadoop.fs.Path |
generatePath(org.apache.hadoop.fs.Path basePath,
String dumpFilePrefix,
Byte tag,
String bigBucketFileName)
|
|
static String |
generateTarFileName(String name)
|
|
static org.apache.hadoop.fs.Path |
generateTarPath(org.apache.hadoop.fs.Path basePath,
String filename)
|
|
static org.apache.hadoop.fs.Path |
generateTmpPath(org.apache.hadoop.fs.Path basePath,
String id)
|
|
static List<String> |
getColumnNames(Properties props)
|
|
static List<String> |
getColumnNamesFromFieldSchema(List<org.apache.hadoop.hive.metastore.api.FieldSchema> partCols)
|
|
static List<String> |
getColumnNamesFromSortCols(List<org.apache.hadoop.hive.metastore.api.Order> sortCols)
|
|
static List<String> |
getColumnTypes(Properties props)
|
|
static String[] |
getDbTableName(String dbtable)
Extract db and table name from dbtable string, where db and table are separated by "." If there is no db name part, set the current sessions default db |
|
static int |
getDefaultNotificationInterval(org.apache.hadoop.conf.Configuration hconf)
Gets the default notification interval to send progress updates to the tracker. |
|
static List<String> |
getFieldSchemaString(List<org.apache.hadoop.hive.metastore.api.FieldSchema> fl)
|
|
static String |
getFileExtension(org.apache.hadoop.mapred.JobConf jc,
boolean isCompressed)
Deprecated. Use getFileExtension(JobConf, boolean, HiveOutputFormat) |
|
static String |
getFileExtension(org.apache.hadoop.mapred.JobConf jc,
boolean isCompressed,
HiveOutputFormat<?,?> hiveOutputFormat)
Based on compression option, output format, and configured output codec - get extension for output file. |
|
static String |
getFileNameFromDirName(String dirName)
|
|
static int |
getFooterCount(TableDesc table,
org.apache.hadoop.mapred.JobConf job)
Get footer line count for a table. |
|
static List<LinkedHashMap<String,String>> |
getFullDPSpecs(org.apache.hadoop.conf.Configuration conf,
DynamicPartitionCtx dpCtx)
Construct a list of full partition spec from Dynamic Partition Context and the directory names corresponding to these dynamic partitions. |
|
static String |
getHashedStatsPrefix(String statsPrefix,
int maxPrefixLength)
If statsPrefix's length is greater than maxPrefixLength and maxPrefixLength > 0, then it returns an MD5 hash of statsPrefix followed by path separator, otherwise it returns statsPrefix |
|
static int |
getHeaderCount(TableDesc table)
Get header line count for a table. |
|
static double |
getHighestSamplePercentage(MapWork work)
Returns the highest sample percentage of any alias in the given MapWork |
|
static List<org.apache.hadoop.fs.Path> |
getInputPaths(org.apache.hadoop.mapred.JobConf job,
MapWork work,
org.apache.hadoop.fs.Path hiveScratchDir,
Context ctx)
Computes a list of all input paths needed to compute the given MapWork. |
|
static List<org.apache.hadoop.fs.Path> |
getInputPathsTez(org.apache.hadoop.mapred.JobConf job,
MapWork work)
On Tez we're not creating dummy files when getting/setting input paths. |
|
static org.apache.hadoop.fs.ContentSummary |
getInputSummary(Context ctx,
MapWork work,
org.apache.hadoop.fs.PathFilter filter)
Calculate the total size of input files. |
|
static List<String> |
getInternalColumnNamesFromSignature(List<ColumnInfo> colInfos)
|
|
static MapredWork |
getMapRedWork(org.apache.hadoop.conf.Configuration conf)
|
|
static MapWork |
getMapWork(org.apache.hadoop.conf.Configuration conf)
|
|
static List<ExecDriver> |
getMRTasks(List<Task<? extends Serializable>> tasks)
|
|
static String |
getNameMessage(Exception e)
|
|
static String |
getOpTreeSkel(Operator<?> op)
|
|
static PartitionDesc |
getPartitionDesc(Partition part)
|
|
static PartitionDesc |
getPartitionDescFromTableDesc(TableDesc tblDesc,
Partition part)
|
|
static org.apache.hadoop.fs.Path |
getPlanPath(org.apache.hadoop.conf.Configuration conf)
|
|
static String |
getPrefixedTaskIdFromFilename(String filename)
Get the part-spec + task id from the filename. |
|
static long |
getRandomWaitTime(int baseWindow,
int failures,
Random r)
Introducing a random factor to the wait time before another retry. |
|
static ReduceWork |
getReduceWork(org.apache.hadoop.conf.Configuration conf)
|
|
static String |
getResourceFiles(org.apache.hadoop.conf.Configuration conf,
SessionState.ResourceType t)
|
|
static StatsPublisher |
getStatsPublisher(org.apache.hadoop.mapred.JobConf jc)
|
|
static TableDesc |
getTableDesc(String cols,
String colTypes)
|
|
static TableDesc |
getTableDesc(Table tbl)
|
|
static String |
getTaskId(org.apache.hadoop.conf.Configuration hconf)
Gets the task id if we are running as a Hadoop job. |
|
static String |
getTaskIdFromFilename(String filename)
Get the task id from the filename. |
|
static List<TezTask> |
getTezTasks(List<Task<? extends Serializable>> tasks)
|
|
static long |
getTotalInputFileSize(org.apache.hadoop.fs.ContentSummary inputSummary,
MapWork work,
double highestSamplePercentage)
Computes the total input file size. |
|
static long |
getTotalInputNumFiles(org.apache.hadoop.fs.ContentSummary inputSummary,
MapWork work,
double highestSamplePercentage)
Computes the total number of input files. |
|
static boolean |
isEmptyPath(org.apache.hadoop.mapred.JobConf job,
org.apache.hadoop.fs.Path dirPath)
|
|
static boolean |
isEmptyPath(org.apache.hadoop.mapred.JobConf job,
org.apache.hadoop.fs.Path dirPath,
Context ctx)
|
|
static boolean |
isTempPath(org.apache.hadoop.fs.FileStatus file)
Detect if the supplied file is a temporary path. |
|
static boolean |
isVectorMode(org.apache.hadoop.conf.Configuration conf)
Returns true if a plan is both configured for vectorized execution and vectorization is allowed. |
|
static String |
join(String... elements)
|
|
static org.apache.hadoop.fs.FileStatus[] |
listStatusIfExists(org.apache.hadoop.fs.Path path,
org.apache.hadoop.fs.FileSystem fs)
returns null if path is not exist |
|
static ArrayList |
makeList(Object... olist)
|
|
static HashMap |
makeMap(Object... olist)
|
|
static Properties |
makeProperties(String... olist)
|
|
static List<String> |
mergeUniqElems(List<String> src,
List<String> dest)
|
|
static void |
mvFileToFinalPath(org.apache.hadoop.fs.Path specPath,
org.apache.hadoop.conf.Configuration hconf,
boolean success,
org.apache.commons.logging.Log log,
DynamicPartitionCtx dpCtx,
FileSinkDesc conf,
org.apache.hadoop.mapred.Reporter reporter)
|
|
static String |
now()
|
|
static PreparedStatement |
prepareWithRetry(Connection conn,
String stmt,
int waitWindow,
int maxRetries)
Retry preparing a SQL statement with random backoff (same as the one implemented in HDFS-767). |
|
static Utilities.StreamStatus |
readColumn(DataInput in,
OutputStream out)
|
|
static String |
realFile(String newFile,
org.apache.hadoop.conf.Configuration conf)
Shamelessly cloned from GenericOptionsParser. |
|
static void |
removeFromClassPath(String[] pathsToRemove)
remove elements from the classpath. |
|
static HashMap<String,org.apache.hadoop.fs.FileStatus> |
removeTempOrDuplicateFiles(org.apache.hadoop.fs.FileStatus[] items,
org.apache.hadoop.fs.FileSystem fs)
|
|
static void |
removeTempOrDuplicateFiles(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path path)
Remove all temporary files and duplicate (double-committed) files from a given directory. |
|
static ArrayList<String> |
removeTempOrDuplicateFiles(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path path,
DynamicPartitionCtx dpCtx)
Remove all temporary files and duplicate (double-committed) files from a given directory. |
|
static void |
rename(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path src,
org.apache.hadoop.fs.Path dst)
Rename src to dst, or in the case dst already exists, move files in src to dst. |
|
static void |
renameOrMoveFiles(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path src,
org.apache.hadoop.fs.Path dst)
Rename src to dst, or in the case dst already exists, move files in src to dst. |
|
static String |
replaceTaskIdFromFilename(String filename,
int bucketNum)
Replace the task id from the filename. |
|
static String |
replaceTaskIdFromFilename(String filename,
String fileId)
|
|
static void |
reworkMapRedWork(Task<? extends Serializable> task,
boolean reworkMapredWork,
HiveConf conf)
The check here is kind of not clean. |
|
static String |
serializeExpression(ExprNodeGenericFuncDesc expr)
|
|
static byte[] |
serializeExpressionToKryo(ExprNodeGenericFuncDesc expr)
Serializes expression via Kryo. |
|
static void |
serializePlan(Object plan,
OutputStream out,
org.apache.hadoop.conf.Configuration conf)
Serializes the plan. |
|
static void |
setBaseWork(org.apache.hadoop.conf.Configuration conf,
String name,
BaseWork work)
Pushes work into the global work map |
|
static void |
setColumnNameList(org.apache.hadoop.mapred.JobConf jobConf,
Operator op)
|
|
static void |
setColumnNameList(org.apache.hadoop.mapred.JobConf jobConf,
Operator op,
boolean excludeVCs)
|
|
static void |
setColumnTypeList(org.apache.hadoop.mapred.JobConf jobConf,
Operator op)
|
|
static void |
setColumnTypeList(org.apache.hadoop.mapred.JobConf jobConf,
Operator op,
boolean excludeVCs)
|
|
static void |
setInputAttributes(org.apache.hadoop.conf.Configuration conf,
MapWork mWork)
Set hive input format, and input format file if necessary. |
|
static void |
setInputPaths(org.apache.hadoop.mapred.JobConf job,
List<org.apache.hadoop.fs.Path> pathsToAdd)
setInputPaths add all the paths in the provided list to the Job conf object as input paths for the job. |
|
static void |
setMapRedWork(org.apache.hadoop.conf.Configuration conf,
MapredWork w,
org.apache.hadoop.fs.Path hiveScratchDir)
|
|
static void |
setMapWork(org.apache.hadoop.conf.Configuration conf,
MapWork work)
|
|
static org.apache.hadoop.fs.Path |
setMapWork(org.apache.hadoop.conf.Configuration conf,
MapWork w,
org.apache.hadoop.fs.Path hiveScratchDir,
boolean useCache)
|
|
static void |
setReduceWork(org.apache.hadoop.conf.Configuration conf,
ReduceWork work)
|
|
static org.apache.hadoop.fs.Path |
setReduceWork(org.apache.hadoop.conf.Configuration conf,
ReduceWork w,
org.apache.hadoop.fs.Path hiveScratchDir,
boolean useCache)
|
|
static void |
setWorkflowAdjacencies(org.apache.hadoop.conf.Configuration conf,
QueryPlan plan)
|
|
static double |
showTime(long time)
|
|
static boolean |
skipHeader(org.apache.hadoop.mapred.RecordReader<org.apache.hadoop.io.WritableComparable,org.apache.hadoop.io.Writable> currRecReader,
int headerCount,
org.apache.hadoop.io.WritableComparable key,
org.apache.hadoop.io.Writable value)
Skip header lines in the table file when reading the record. |
|
static long |
sumOf(Map<String,Long> aliasToSize,
Set<String> aliases)
|
|
static long |
sumOfExcept(Map<String,Long> aliasToSize,
Set<String> aliases,
Set<String> excepts)
|
|
static org.apache.hadoop.fs.Path |
toTaskTempPath(org.apache.hadoop.fs.Path orig)
|
|
static org.apache.hadoop.fs.Path |
toTempPath(org.apache.hadoop.fs.Path orig)
|
|
static org.apache.hadoop.fs.Path |
toTempPath(String orig)
Given a path, convert to a temporary path. |
|
static void |
validateColumnNames(List<String> colNames,
List<String> checkCols)
|
Methods inherited from class java.lang.Object |
---|
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Field Detail |
---|
public static String HADOOP_LOCAL_FS
public static String MAP_PLAN_NAME
public static String REDUCE_PLAN_NAME
public static final String MAPRED_MAPPER_CLASS
public static final String MAPRED_REDUCER_CLASS
public static List<String> reduceFieldNameList
public static ThreadLocal<com.esotericsoftware.kryo.Kryo> runtimeSerializationKryo
public static TableDesc defaultTd
public static final int carriageReturnCode
public static final int newLineCode
public static final int tabCode
public static final int ctrlaCode
public static final String INDENT
public static String nullStringStorage
public static String nullStringOutput
public static Random randGen
public static final String NSTR
public static String suffix
public static final char sqlEscapeChar
Method Detail |
---|
public static void clearWork(org.apache.hadoop.conf.Configuration conf)
public static MapredWork getMapRedWork(org.apache.hadoop.conf.Configuration conf)
public static void setMapWork(org.apache.hadoop.conf.Configuration conf, MapWork work)
public static MapWork getMapWork(org.apache.hadoop.conf.Configuration conf)
public static void setReduceWork(org.apache.hadoop.conf.Configuration conf, ReduceWork work)
public static ReduceWork getReduceWork(org.apache.hadoop.conf.Configuration conf)
public static void setBaseWork(org.apache.hadoop.conf.Configuration conf, String name, BaseWork work)
public static void setWorkflowAdjacencies(org.apache.hadoop.conf.Configuration conf, QueryPlan plan)
public static List<String> getFieldSchemaString(List<org.apache.hadoop.hive.metastore.api.FieldSchema> fl)
public static void setMapRedWork(org.apache.hadoop.conf.Configuration conf, MapredWork w, org.apache.hadoop.fs.Path hiveScratchDir)
public static org.apache.hadoop.fs.Path setMapWork(org.apache.hadoop.conf.Configuration conf, MapWork w, org.apache.hadoop.fs.Path hiveScratchDir, boolean useCache)
public static org.apache.hadoop.fs.Path setReduceWork(org.apache.hadoop.conf.Configuration conf, ReduceWork w, org.apache.hadoop.fs.Path hiveScratchDir, boolean useCache)
public static org.apache.hadoop.fs.Path getPlanPath(org.apache.hadoop.conf.Configuration conf)
public static byte[] serializeExpressionToKryo(ExprNodeGenericFuncDesc expr)
expr
- Expression.
public static ExprNodeGenericFuncDesc deserializeExpressionFromKryo(byte[] bytes)
bytes
- Bytes containing the expression.
public static String serializeExpression(ExprNodeGenericFuncDesc expr)
public static ExprNodeGenericFuncDesc deserializeExpression(String s)
public static Set<Operator<?>> cloneOperatorTree(org.apache.hadoop.conf.Configuration conf, Set<Operator<?>> roots)
public static void serializePlan(Object plan, OutputStream out, org.apache.hadoop.conf.Configuration conf)
plan
- The plan, such as QueryPlan, MapredWork, etc.out
- The stream to write to.conf
- to pick which serialization format is desired.public static <T> T deserializePlan(InputStream in, Class<T> planClass, org.apache.hadoop.conf.Configuration conf)
in
- The stream to read from.planClass
- class of planconf
- configuration
public static MapredWork clonePlan(MapredWork plan)
plan
- The plan.
public static String getTaskId(org.apache.hadoop.conf.Configuration hconf)
public static HashMap makeMap(Object... olist)
public static Properties makeProperties(String... olist)
public static ArrayList makeList(Object... olist)
public static TableDesc getTableDesc(Table tbl)
public static TableDesc getTableDesc(String cols, String colTypes)
public static PartitionDesc getPartitionDesc(Partition part) throws HiveException
HiveException
public static PartitionDesc getPartitionDescFromTableDesc(TableDesc tblDesc, Partition part) throws HiveException
HiveException
public static String getOpTreeSkel(Operator<?> op)
public static boolean contentsEqual(InputStream is1, InputStream is2, boolean ignoreWhitespace) throws IOException
IOException
public static String abbreviate(String str, int max)
public static Utilities.StreamStatus readColumn(DataInput in, OutputStream out) throws IOException
IOException
public static OutputStream createCompressedStream(org.apache.hadoop.mapred.JobConf jc, OutputStream out) throws IOException
jc
- Job Configurationout
- Output Stream to be converted into compressed output stream
IOException
public static OutputStream createCompressedStream(org.apache.hadoop.mapred.JobConf jc, OutputStream out, boolean isCompressed) throws IOException
jc
- Job Configurationout
- Output Stream to be converted into compressed output streamisCompressed
- whether the output stream needs to be compressed or not
IOException
@Deprecated public static String getFileExtension(org.apache.hadoop.mapred.JobConf jc, boolean isCompressed)
getFileExtension(JobConf, boolean, HiveOutputFormat)
jc
- Job ConfigurationisCompressed
- Whether the output file is compressed or not
public static String getFileExtension(org.apache.hadoop.mapred.JobConf jc, boolean isCompressed, HiveOutputFormat<?,?> hiveOutputFormat)
The property hive.output.file.extension
is used to determine
the extension - if set, it will override other logic for choosing an
extension.
jc
- Job ConfigurationisCompressed
- Whether the output file is compressed or nothiveOutputFormat
- The output format, used to detect if the format is text
public static org.apache.hadoop.io.SequenceFile.Writer createSequenceWriter(org.apache.hadoop.mapred.JobConf jc, org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path file, Class<?> keyClass, Class<?> valClass) throws IOException
jc
- Job configurationfs
- File System to create file infile
- Path to be createdkeyClass
- Java Class for keyvalClass
- Java Class for value
IOException
public static org.apache.hadoop.io.SequenceFile.Writer createSequenceWriter(org.apache.hadoop.mapred.JobConf jc, org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path file, Class<?> keyClass, Class<?> valClass, boolean isCompressed) throws IOException
jc
- Job configurationfs
- File System to create file infile
- Path to be createdkeyClass
- Java Class for keyvalClass
- Java Class for value
IOException
public static RCFile.Writer createRCFileWriter(org.apache.hadoop.mapred.JobConf jc, org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path file, boolean isCompressed) throws IOException
jc
- Job configurationfs
- File System to create file infile
- Path to be created
IOException
public static String realFile(String newFile, org.apache.hadoop.conf.Configuration conf) throws IOException
IOException
public static List<String> mergeUniqElems(List<String> src, List<String> dest)
public static org.apache.hadoop.fs.Path toTaskTempPath(org.apache.hadoop.fs.Path orig)
public static org.apache.hadoop.fs.Path toTempPath(org.apache.hadoop.fs.Path orig)
public static org.apache.hadoop.fs.Path toTempPath(String orig)
public static boolean isTempPath(org.apache.hadoop.fs.FileStatus file)
public static void rename(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path src, org.apache.hadoop.fs.Path dst) throws IOException, HiveException
fs
- the FileSystem where src and dst are on.src
- the src directorydst
- the target directory
IOException
HiveException
public static void renameOrMoveFiles(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path src, org.apache.hadoop.fs.Path dst) throws IOException, HiveException
fs
- the FileSystem where src and dst are on.src
- the src directorydst
- the target directory
IOException
HiveException
public static String getTaskIdFromFilename(String filename)
filename
- filename to extract taskid frompublic static String getPrefixedTaskIdFromFilename(String filename)
filename
- filename to extract taskid frompublic static String getFileNameFromDirName(String dirName)
public static String replaceTaskIdFromFilename(String filename, int bucketNum)
filename
- filename to replace taskid "0_0" or "0_0.gz" by 33 to "33_0" or "33_0.gz"public static String replaceTaskIdFromFilename(String filename, String fileId)
public static org.apache.hadoop.fs.FileStatus[] listStatusIfExists(org.apache.hadoop.fs.Path path, org.apache.hadoop.fs.FileSystem fs) throws IOException
IOException
public static void mvFileToFinalPath(org.apache.hadoop.fs.Path specPath, org.apache.hadoop.conf.Configuration hconf, boolean success, org.apache.commons.logging.Log log, DynamicPartitionCtx dpCtx, FileSinkDesc conf, org.apache.hadoop.mapred.Reporter reporter) throws IOException, HiveException
IOException
HiveException
public static void removeTempOrDuplicateFiles(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path path) throws IOException
IOException
public static ArrayList<String> removeTempOrDuplicateFiles(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path path, DynamicPartitionCtx dpCtx) throws IOException
IOException
public static HashMap<String,org.apache.hadoop.fs.FileStatus> removeTempOrDuplicateFiles(org.apache.hadoop.fs.FileStatus[] items, org.apache.hadoop.fs.FileSystem fs) throws IOException
IOException
public static String getNameMessage(Exception e)
public static String getResourceFiles(org.apache.hadoop.conf.Configuration conf, SessionState.ResourceType t)
public static ClassLoader addToClassPath(ClassLoader cloader, String[] newPaths) throws Exception
newPaths
- Array of classpath elements
Exception
public static ClassLoader addToClassPath(ClassLoader cloader, String[] newPaths, boolean downloadFiles) throws Exception
Exception
public static void removeFromClassPath(String[] pathsToRemove) throws Exception
pathsToRemove
- Array of classpath elements
Exception
public static String formatBinaryString(byte[] array, int start, int length)
public static List<String> getColumnNamesFromSortCols(List<org.apache.hadoop.hive.metastore.api.Order> sortCols)
public static List<String> getColumnNamesFromFieldSchema(List<org.apache.hadoop.hive.metastore.api.FieldSchema> partCols)
public static List<String> getInternalColumnNamesFromSignature(List<ColumnInfo> colInfos)
public static List<String> getColumnNames(Properties props)
public static List<String> getColumnTypes(Properties props)
public static String[] getDbTableName(String dbtable) throws HiveException
dbtable
-
HiveException
public static void validateColumnNames(List<String> colNames, List<String> checkCols) throws SemanticException
SemanticException
public static int getDefaultNotificationInterval(org.apache.hadoop.conf.Configuration hconf)
hconf
-
public static void copyTableJobPropertiesToConf(TableDesc tbl, org.apache.hadoop.mapred.JobConf job)
tbl
- table descriptor from which to readjob
- configuration which receives configured propertiespublic static org.apache.hadoop.fs.ContentSummary getInputSummary(Context ctx, MapWork work, org.apache.hadoop.fs.PathFilter filter) throws IOException
ctx
- the hadoop job contextwork
- map reduce job planfilter
- filter to apply to the input paths before calculating size
IOException
public static long sumOf(Map<String,Long> aliasToSize, Set<String> aliases)
public static long sumOfExcept(Map<String,Long> aliasToSize, Set<String> aliases, Set<String> excepts)
public static boolean isEmptyPath(org.apache.hadoop.mapred.JobConf job, org.apache.hadoop.fs.Path dirPath, Context ctx) throws Exception
Exception
public static boolean isEmptyPath(org.apache.hadoop.mapred.JobConf job, org.apache.hadoop.fs.Path dirPath) throws Exception
Exception
public static List<TezTask> getTezTasks(List<Task<? extends Serializable>> tasks)
public static List<ExecDriver> getMRTasks(List<Task<? extends Serializable>> tasks)
public static List<LinkedHashMap<String,String>> getFullDPSpecs(org.apache.hadoop.conf.Configuration conf, DynamicPartitionCtx dpCtx) throws HiveException
HiveException
public static StatsPublisher getStatsPublisher(org.apache.hadoop.mapred.JobConf jc)
public static String getHashedStatsPrefix(String statsPrefix, int maxPrefixLength)
statsPrefix
- prefix of stats keymaxPrefixLength
- max length of stats key
public static String join(String... elements)
public static void setColumnNameList(org.apache.hadoop.mapred.JobConf jobConf, Operator op)
public static void setColumnNameList(org.apache.hadoop.mapred.JobConf jobConf, Operator op, boolean excludeVCs)
public static void setColumnTypeList(org.apache.hadoop.mapred.JobConf jobConf, Operator op)
public static void setColumnTypeList(org.apache.hadoop.mapred.JobConf jobConf, Operator op, boolean excludeVCs)
public static org.apache.hadoop.fs.Path generatePath(org.apache.hadoop.fs.Path basePath, String dumpFilePrefix, Byte tag, String bigBucketFileName)
public static String generateFileName(Byte tag, String bigBucketFileName)
public static org.apache.hadoop.fs.Path generateTmpPath(org.apache.hadoop.fs.Path basePath, String id)
public static org.apache.hadoop.fs.Path generateTarPath(org.apache.hadoop.fs.Path basePath, String filename)
public static String generateTarFileName(String name)
public static String generatePath(org.apache.hadoop.fs.Path baseURI, String filename)
public static String now()
public static double showTime(long time)
public static void reworkMapRedWork(Task<? extends Serializable> task, boolean reworkMapredWork, HiveConf conf) throws SemanticException
task
- reworkMapredWork
- conf
-
SemanticException
public static <T> T executeWithRetry(Utilities.SQLCommand<T> cmd, PreparedStatement stmt, int baseWindow, int maxRetries) throws SQLException
cmd
- the SQL commandstmt
- the prepared statement of SQL.baseWindow
- The base time window (in milliseconds) before the next retry.
see getRandomWaitTime(int, int, java.util.Random)
for details.maxRetries
- the maximum # of retries when getting a SQLTransientException.
SQLException
- throws SQLRecoverableException or SQLNonTransientException the
first time it is caught, or SQLTransientException when the maxRetries has reached.public static Connection connectWithRetry(String connectionString, int waitWindow, int maxRetries) throws SQLException
connectionString
- the JDBC connection string.waitWindow
- The base time window (in milliseconds) before the next retry.
see getRandomWaitTime(int, int, java.util.Random)
for details.maxRetries
- the maximum # of retries when getting a SQLTransientException.
SQLException
- throws SQLRecoverableException or SQLNonTransientException the
first time it is caught, or SQLTransientException when the maxRetries has reached.public static PreparedStatement prepareWithRetry(Connection conn, String stmt, int waitWindow, int maxRetries) throws SQLException
conn
- a JDBC connection.stmt
- the SQL statement to be prepared.waitWindow
- The base time window (in milliseconds) before the next retry.
see getRandomWaitTime(int, int, java.util.Random)
for details.maxRetries
- the maximum # of retries when getting a SQLTransientException.
SQLException
- throws SQLRecoverableException or SQLNonTransientException the
first time it is caught, or SQLTransientException when the maxRetries has reached.public static long getRandomWaitTime(int baseWindow, int failures, Random r)
baseWindow
- the base waiting window.failures
- number of failures so far.r
- a random generator.
public static String escapeSqlLike(String key)
key
- the string that will be used for the SQL LIKE operator.
public static String formatMsecToStr(long msec)
msec
- milliseconds
public static int estimateNumberOfReducers(HiveConf conf, org.apache.hadoop.fs.ContentSummary inputSummary, MapWork work, boolean finalMapRed) throws IOException
IOException
public static int estimateReducers(long totalInputFileSize, long bytesPerReducer, int maxReducers, boolean powersOfTwo)
public static long getTotalInputFileSize(org.apache.hadoop.fs.ContentSummary inputSummary, MapWork work, double highestSamplePercentage)
inputSummary
- work
- highestSamplePercentage
-
public static long getTotalInputNumFiles(org.apache.hadoop.fs.ContentSummary inputSummary, MapWork work, double highestSamplePercentage)
inputSummary
- work
- highestSamplePercentage
-
public static double getHighestSamplePercentage(MapWork work)
public static List<org.apache.hadoop.fs.Path> getInputPathsTez(org.apache.hadoop.mapred.JobConf job, MapWork work) throws Exception
Exception
public static List<org.apache.hadoop.fs.Path> getInputPaths(org.apache.hadoop.mapred.JobConf job, MapWork work, org.apache.hadoop.fs.Path hiveScratchDir, Context ctx) throws Exception
job
- JobConf used to run the jobwork
- MapWork encapsulating the info about the taskhiveScratchDir
- The tmp dir used to create dummy files if neededctx
- Context object
Exception
public static void setInputPaths(org.apache.hadoop.mapred.JobConf job, List<org.apache.hadoop.fs.Path> pathsToAdd)
job
- pathsToAdd
- public static void setInputAttributes(org.apache.hadoop.conf.Configuration conf, MapWork mWork)
public static void createTmpDirs(org.apache.hadoop.conf.Configuration conf, MapWork mWork) throws IOException
conf
- Used to get the right FileSystemmWork
- Used to find FileSinkOperators
IOException
public static void createTmpDirs(org.apache.hadoop.conf.Configuration conf, ReduceWork rWork) throws IOException
conf
- Used to get the right FileSystemrWork
- Used to find FileSinkOperators
IOException
public static boolean isVectorMode(org.apache.hadoop.conf.Configuration conf)
public static void clearWorkMap()
public static File createTempDir(String baseDir)
baseDir
- - directory under which new temp dir will be created
public static boolean skipHeader(org.apache.hadoop.mapred.RecordReader<org.apache.hadoop.io.WritableComparable,org.apache.hadoop.io.Writable> currRecReader, int headerCount, org.apache.hadoop.io.WritableComparable key, org.apache.hadoop.io.Writable value) throws IOException
currRecReader
- Record reader.headerCount
- Header line number of the table files.key
- Key of current reading record.value
- Value of current reading record.
IOException
public static int getHeaderCount(TableDesc table) throws IOException
table
- Table description for target table.
IOException
public static int getFooterCount(TableDesc table, org.apache.hadoop.mapred.JobConf job) throws IOException
table
- Table description for target table.job
- Job configuration for current job.
IOException
public static boolean createDirsWithPermission(org.apache.hadoop.conf.Configuration conf, org.apache.hadoop.fs.Path mkdir, org.apache.hadoop.fs.permission.FsPermission fsPermission) throws IOException
conf
- the configuration used to derive the filesystem to create the pathmkdir
- the path to be createdfsPermission
- ignored if it is hive server session and doAs is enabled
IOException
- if hdfs experiences any error conditionspublic static boolean createDirsWithPermission(org.apache.hadoop.conf.Configuration conf, org.apache.hadoop.fs.Path mkdir, org.apache.hadoop.fs.permission.FsPermission fsPermission, boolean recursive) throws IOException
IOException
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |