org.apache.hadoop.hive.ql.plan
Class MapWork
java.lang.Object
org.apache.hadoop.hive.ql.plan.AbstractOperatorDesc
org.apache.hadoop.hive.ql.plan.BaseWork
org.apache.hadoop.hive.ql.plan.MapWork
- All Implemented Interfaces:
- Serializable, Cloneable, OperatorDesc
- Direct Known Subclasses:
- ColumnTruncateWork, MergeWork, PartialScanWork
public class MapWork
- extends BaseWork
MapWork represents all the information used to run a map task on the cluster.
It is first used when the query planner breaks the logical plan into tasks and
used throughout physical optimization to track map-side operator plans, input
paths, aliases, etc.
ExecDriver will serialize the contents of this class and make sure it is
distributed on the cluster. The ExecMapper will ultimately deserialize this
class on the data nodes and setup it's operator pipeline accordingly.
This class is also used in the explain command any property with the
appropriate annotation will be displayed in the explain output.
- See Also:
- Serialized Form
Method Summary |
void |
addIndexIntermediateFile(String fileName)
|
void |
addMapWork(String path,
String alias,
Operator<?> work,
PartitionDesc pd)
|
void |
configureJobConf(org.apache.hadoop.mapred.JobConf job)
|
void |
deriveExplainAttributes()
Derive additional attributes to be rendered by EXPLAIN. |
ArrayList<String> |
getAliases()
|
LinkedHashMap<String,PartitionDesc> |
getAliasToPartnInfo()
|
LinkedHashMap<String,Operator<? extends OperatorDesc>> |
getAliasToWork()
|
Set<Operator<?>> |
getAllRootOperators()
|
Map<String,List<BucketingSortingCtx.BucketCol>> |
getBucketedColsByDirectory()
|
boolean |
getHadoopSupportsSplittable()
|
String |
getIndexIntermediateFile()
|
String |
getInputformat()
|
QBJoinTree |
getJoinTree()
|
MapredLocalWork |
getMapLocalWork()
|
Long |
getMaxSplitSize()
|
Long |
getMinSplitSize()
|
Long |
getMinSplitSizePerNode()
|
Long |
getMinSplitSizePerRack()
|
HashMap<String,SplitSample> |
getNameToSplitSample()
|
Integer |
getNumMapTasks()
|
LinkedHashMap<Operator<? extends OperatorDesc>,OpParseContext> |
getOpParseCtxMap()
|
ArrayList<PartitionDesc> |
getPartitionDescs()
|
ArrayList<String> |
getPaths()
|
LinkedHashMap<String,ArrayList<String>> |
getPathToAliases()
|
LinkedHashMap<String,PartitionDesc> |
getPathToPartitionInfo()
|
int |
getSamplingType()
|
String |
getSamplingTypeString()
|
Map<String,Map<String,Integer>> |
getScratchColumnMap()
|
Map<String,Map<Integer,String>> |
getScratchColumnVectorTypes()
|
Map<String,List<BucketingSortingCtx.SortCol>> |
getSortedColsByDirectory()
|
org.apache.hadoop.fs.Path |
getTmpHDFSPath()
|
Map<String,ArrayList<String>> |
getTruncatedPathToAliases()
This is used to display and verify output of "Path -> Alias" in test framework. |
boolean |
getVectorMode()
|
String |
getVectorModeOn()
|
ArrayList<Operator<?>> |
getWorks()
|
void |
initialize()
|
boolean |
isInputFormatSorted()
|
boolean |
isMapperCannotSpanPartns()
|
boolean |
isUseBucketizedHiveInputFormat()
|
void |
mergeAliasedInput(String alias,
String pathDir,
PartitionDesc partitionInfo)
|
void |
mergingInto(MapWork mapWork)
|
void |
replaceRoots(Map<Operator<?>,Operator<?>> replacementMap)
|
void |
resolveDynamicPartitionStoredAsSubDirsMerge(HiveConf conf,
org.apache.hadoop.fs.Path path,
TableDesc tblDesc,
ArrayList<String> aliases,
PartitionDesc partDesc)
|
void |
setAliasToPartnInfo(LinkedHashMap<String,PartitionDesc> aliasToPartnInfo)
|
void |
setAliasToWork(LinkedHashMap<String,Operator<? extends OperatorDesc>> aliasToWork)
|
void |
setHadoopSupportsSplittable(boolean hadoopSupportsSplittable)
|
void |
setInputformat(String inputformat)
|
void |
setInputFormatSorted(boolean inputFormatSorted)
|
void |
setJoinTree(QBJoinTree joinTree)
|
void |
setMapLocalWork(MapredLocalWork mapLocalWork)
|
void |
setMapperCannotSpanPartns(boolean mapperCannotSpanPartns)
|
void |
setMaxSplitSize(Long maxSplitSize)
|
void |
setMinSplitSize(Long minSplitSize)
|
void |
setMinSplitSizePerNode(Long minSplitSizePerNode)
|
void |
setMinSplitSizePerRack(Long minSplitSizePerRack)
|
void |
setNameToSplitSample(HashMap<String,SplitSample> nameToSplitSample)
|
void |
setNumMapTasks(Integer numMapTasks)
|
void |
setOpParseCtxMap(LinkedHashMap<Operator<? extends OperatorDesc>,OpParseContext> opParseCtxMap)
|
void |
setPathToAliases(LinkedHashMap<String,ArrayList<String>> pathToAliases)
|
void |
setPathToPartitionInfo(LinkedHashMap<String,PartitionDesc> pathToPartitionInfo)
|
void |
setSamplingType(int samplingType)
|
void |
setScratchColumnMap(Map<String,Map<String,Integer>> scratchColumnMap)
|
void |
setScratchColumnVectorTypes(Map<String,Map<Integer,String>> scratchColumnVectorTypes)
|
void |
setTmpHDFSPath(org.apache.hadoop.fs.Path tmpHDFSPath)
|
void |
setUseBucketizedHiveInputFormat(boolean useBucketizedHiveInputFormat)
|
void |
setVectorMode(boolean vectorMode)
|
SAMPLING_ON_PREV_MR
public static final int SAMPLING_ON_PREV_MR
- See Also:
- Constant Field Values
SAMPLING_ON_START
public static final int SAMPLING_ON_START
- See Also:
- Constant Field Values
MapWork
public MapWork()
MapWork
public MapWork(String name)
getPathToAliases
public LinkedHashMap<String,ArrayList<String>> getPathToAliases()
setPathToAliases
public void setPathToAliases(LinkedHashMap<String,ArrayList<String>> pathToAliases)
getTruncatedPathToAliases
public Map<String,ArrayList<String>> getTruncatedPathToAliases()
- This is used to display and verify output of "Path -> Alias" in test framework.
QTestUtil masks "Path -> Alias" and makes verification impossible.
By keeping "Path -> Alias" intact and adding a new display name which is not
masked by QTestUtil by removing prefix.
Notes: we would still be masking for intermediate directories.
- Returns:
getPathToPartitionInfo
public LinkedHashMap<String,PartitionDesc> getPathToPartitionInfo()
setPathToPartitionInfo
public void setPathToPartitionInfo(LinkedHashMap<String,PartitionDesc> pathToPartitionInfo)
deriveExplainAttributes
public void deriveExplainAttributes()
- Derive additional attributes to be rendered by EXPLAIN.
TODO: this method is relied upon by custom input formats to set jobconf properties.
This is madness? - This is Hive Storage Handlers!
getAliasToPartnInfo
public LinkedHashMap<String,PartitionDesc> getAliasToPartnInfo()
- Returns:
- the aliasToPartnInfo
setAliasToPartnInfo
public void setAliasToPartnInfo(LinkedHashMap<String,PartitionDesc> aliasToPartnInfo)
- Parameters:
aliasToPartnInfo
- the aliasToPartnInfo to set
getAliasToWork
public LinkedHashMap<String,Operator<? extends OperatorDesc>> getAliasToWork()
setAliasToWork
public void setAliasToWork(LinkedHashMap<String,Operator<? extends OperatorDesc>> aliasToWork)
getMapLocalWork
public MapredLocalWork getMapLocalWork()
- Returns:
- the mapredLocalWork
setMapLocalWork
public void setMapLocalWork(MapredLocalWork mapLocalWork)
- Parameters:
mapLocalWork
- the mapredLocalWork to set
getNameToSplitSample
public HashMap<String,SplitSample> getNameToSplitSample()
setNameToSplitSample
public void setNameToSplitSample(HashMap<String,SplitSample> nameToSplitSample)
getNumMapTasks
public Integer getNumMapTasks()
setNumMapTasks
public void setNumMapTasks(Integer numMapTasks)
addMapWork
public void addMapWork(String path,
String alias,
Operator<?> work,
PartitionDesc pd)
isInputFormatSorted
public boolean isInputFormatSorted()
setInputFormatSorted
public void setInputFormatSorted(boolean inputFormatSorted)
resolveDynamicPartitionStoredAsSubDirsMerge
public void resolveDynamicPartitionStoredAsSubDirsMerge(HiveConf conf,
org.apache.hadoop.fs.Path path,
TableDesc tblDesc,
ArrayList<String> aliases,
PartitionDesc partDesc)
getVectorModeOn
public String getVectorModeOn()
replaceRoots
public void replaceRoots(Map<Operator<?>,Operator<?>> replacementMap)
- Specified by:
replaceRoots
in class BaseWork
getAllRootOperators
public Set<Operator<?>> getAllRootOperators()
- Specified by:
getAllRootOperators
in class BaseWork
mergeAliasedInput
public void mergeAliasedInput(String alias,
String pathDir,
PartitionDesc partitionInfo)
initialize
public void initialize()
getMaxSplitSize
public Long getMaxSplitSize()
setMaxSplitSize
public void setMaxSplitSize(Long maxSplitSize)
getMinSplitSize
public Long getMinSplitSize()
setMinSplitSize
public void setMinSplitSize(Long minSplitSize)
getMinSplitSizePerNode
public Long getMinSplitSizePerNode()
setMinSplitSizePerNode
public void setMinSplitSizePerNode(Long minSplitSizePerNode)
getMinSplitSizePerRack
public Long getMinSplitSizePerRack()
setMinSplitSizePerRack
public void setMinSplitSizePerRack(Long minSplitSizePerRack)
getInputformat
public String getInputformat()
setInputformat
public void setInputformat(String inputformat)
isUseBucketizedHiveInputFormat
public boolean isUseBucketizedHiveInputFormat()
setUseBucketizedHiveInputFormat
public void setUseBucketizedHiveInputFormat(boolean useBucketizedHiveInputFormat)
getJoinTree
public QBJoinTree getJoinTree()
setJoinTree
public void setJoinTree(QBJoinTree joinTree)
setMapperCannotSpanPartns
public void setMapperCannotSpanPartns(boolean mapperCannotSpanPartns)
isMapperCannotSpanPartns
public boolean isMapperCannotSpanPartns()
getHadoopSupportsSplittable
public boolean getHadoopSupportsSplittable()
setHadoopSupportsSplittable
public void setHadoopSupportsSplittable(boolean hadoopSupportsSplittable)
getIndexIntermediateFile
public String getIndexIntermediateFile()
getAliases
public ArrayList<String> getAliases()
getWorks
public ArrayList<Operator<?>> getWorks()
getPaths
public ArrayList<String> getPaths()
getPartitionDescs
public ArrayList<PartitionDesc> getPartitionDescs()
getOpParseCtxMap
public LinkedHashMap<Operator<? extends OperatorDesc>,OpParseContext> getOpParseCtxMap()
setOpParseCtxMap
public void setOpParseCtxMap(LinkedHashMap<Operator<? extends OperatorDesc>,OpParseContext> opParseCtxMap)
getTmpHDFSPath
public org.apache.hadoop.fs.Path getTmpHDFSPath()
setTmpHDFSPath
public void setTmpHDFSPath(org.apache.hadoop.fs.Path tmpHDFSPath)
mergingInto
public void mergingInto(MapWork mapWork)
getBucketedColsByDirectory
public Map<String,List<BucketingSortingCtx.BucketCol>> getBucketedColsByDirectory()
getSortedColsByDirectory
public Map<String,List<BucketingSortingCtx.SortCol>> getSortedColsByDirectory()
addIndexIntermediateFile
public void addIndexIntermediateFile(String fileName)
getSamplingType
public int getSamplingType()
setSamplingType
public void setSamplingType(int samplingType)
getSamplingTypeString
public String getSamplingTypeString()
configureJobConf
public void configureJobConf(org.apache.hadoop.mapred.JobConf job)
- Specified by:
configureJobConf
in class BaseWork
getScratchColumnVectorTypes
public Map<String,Map<Integer,String>> getScratchColumnVectorTypes()
setScratchColumnVectorTypes
public void setScratchColumnVectorTypes(Map<String,Map<Integer,String>> scratchColumnVectorTypes)
getScratchColumnMap
public Map<String,Map<String,Integer>> getScratchColumnMap()
setScratchColumnMap
public void setScratchColumnMap(Map<String,Map<String,Integer>> scratchColumnMap)
getVectorMode
public boolean getVectorMode()
setVectorMode
public void setVectorMode(boolean vectorMode)
- Overrides:
setVectorMode
in class AbstractOperatorDesc
Copyright © 2014 The Apache Software Foundation. All rights reserved.