org.apache.hadoop.hive.ql.plan
Class MapWork

java.lang.Object
  extended by org.apache.hadoop.hive.ql.plan.AbstractOperatorDesc
      extended by org.apache.hadoop.hive.ql.plan.BaseWork
          extended by org.apache.hadoop.hive.ql.plan.MapWork
All Implemented Interfaces:
Serializable, Cloneable, OperatorDesc
Direct Known Subclasses:
ColumnTruncateWork, MergeWork, PartialScanWork

public class MapWork
extends BaseWork

MapWork represents all the information used to run a map task on the cluster. It is first used when the query planner breaks the logical plan into tasks and used throughout physical optimization to track map-side operator plans, input paths, aliases, etc. ExecDriver will serialize the contents of this class and make sure it is distributed on the cluster. The ExecMapper will ultimately deserialize this class on the data nodes and setup it's operator pipeline accordingly. This class is also used in the explain command any property with the appropriate annotation will be displayed in the explain output.

See Also:
Serialized Form

Field Summary
static int SAMPLING_ON_PREV_MR
           
static int SAMPLING_ON_START
           
 
Constructor Summary
MapWork()
           
MapWork(String name)
           
 
Method Summary
 void addIndexIntermediateFile(String fileName)
           
 void addMapWork(String path, String alias, Operator<?> work, PartitionDesc pd)
           
 void configureJobConf(org.apache.hadoop.mapred.JobConf job)
           
 void deriveExplainAttributes()
          Derive additional attributes to be rendered by EXPLAIN.
 ArrayList<String> getAliases()
           
 LinkedHashMap<String,PartitionDesc> getAliasToPartnInfo()
           
 LinkedHashMap<String,Operator<? extends OperatorDesc>> getAliasToWork()
           
 Set<Operator<?>> getAllRootOperators()
           
 Map<String,List<BucketingSortingCtx.BucketCol>> getBucketedColsByDirectory()
           
 boolean getHadoopSupportsSplittable()
           
 String getIndexIntermediateFile()
           
 String getInputformat()
           
 QBJoinTree getJoinTree()
           
 MapredLocalWork getMapLocalWork()
           
 Long getMaxSplitSize()
           
 Long getMinSplitSize()
           
 Long getMinSplitSizePerNode()
           
 Long getMinSplitSizePerRack()
           
 HashMap<String,SplitSample> getNameToSplitSample()
           
 Integer getNumMapTasks()
           
 LinkedHashMap<Operator<? extends OperatorDesc>,OpParseContext> getOpParseCtxMap()
           
 ArrayList<PartitionDesc> getPartitionDescs()
           
 ArrayList<String> getPaths()
           
 LinkedHashMap<String,ArrayList<String>> getPathToAliases()
           
 LinkedHashMap<String,PartitionDesc> getPathToPartitionInfo()
           
 int getSamplingType()
           
 String getSamplingTypeString()
           
 Map<String,Map<String,Integer>> getScratchColumnMap()
           
 Map<String,Map<Integer,String>> getScratchColumnVectorTypes()
           
 Map<String,List<BucketingSortingCtx.SortCol>> getSortedColsByDirectory()
           
 org.apache.hadoop.fs.Path getTmpHDFSPath()
           
 Map<String,ArrayList<String>> getTruncatedPathToAliases()
          This is used to display and verify output of "Path -> Alias" in test framework.
 boolean getVectorMode()
           
 String getVectorModeOn()
           
 ArrayList<Operator<?>> getWorks()
           
 void initialize()
           
 boolean isInputFormatSorted()
           
 boolean isMapperCannotSpanPartns()
           
 boolean isUseBucketizedHiveInputFormat()
           
 void mergeAliasedInput(String alias, String pathDir, PartitionDesc partitionInfo)
           
 void mergingInto(MapWork mapWork)
           
 void replaceRoots(Map<Operator<?>,Operator<?>> replacementMap)
           
 void resolveDynamicPartitionStoredAsSubDirsMerge(HiveConf conf, org.apache.hadoop.fs.Path path, TableDesc tblDesc, ArrayList<String> aliases, PartitionDesc partDesc)
           
 void setAliasToPartnInfo(LinkedHashMap<String,PartitionDesc> aliasToPartnInfo)
           
 void setAliasToWork(LinkedHashMap<String,Operator<? extends OperatorDesc>> aliasToWork)
           
 void setHadoopSupportsSplittable(boolean hadoopSupportsSplittable)
           
 void setInputformat(String inputformat)
           
 void setInputFormatSorted(boolean inputFormatSorted)
           
 void setJoinTree(QBJoinTree joinTree)
           
 void setMapLocalWork(MapredLocalWork mapLocalWork)
           
 void setMapperCannotSpanPartns(boolean mapperCannotSpanPartns)
           
 void setMaxSplitSize(Long maxSplitSize)
           
 void setMinSplitSize(Long minSplitSize)
           
 void setMinSplitSizePerNode(Long minSplitSizePerNode)
           
 void setMinSplitSizePerRack(Long minSplitSizePerRack)
           
 void setNameToSplitSample(HashMap<String,SplitSample> nameToSplitSample)
           
 void setNumMapTasks(Integer numMapTasks)
           
 void setOpParseCtxMap(LinkedHashMap<Operator<? extends OperatorDesc>,OpParseContext> opParseCtxMap)
           
 void setPathToAliases(LinkedHashMap<String,ArrayList<String>> pathToAliases)
           
 void setPathToPartitionInfo(LinkedHashMap<String,PartitionDesc> pathToPartitionInfo)
           
 void setSamplingType(int samplingType)
           
 void setScratchColumnMap(Map<String,Map<String,Integer>> scratchColumnMap)
           
 void setScratchColumnVectorTypes(Map<String,Map<Integer,String>> scratchColumnVectorTypes)
           
 void setTmpHDFSPath(org.apache.hadoop.fs.Path tmpHDFSPath)
           
 void setUseBucketizedHiveInputFormat(boolean useBucketizedHiveInputFormat)
           
 void setVectorMode(boolean vectorMode)
           
 
Methods inherited from class org.apache.hadoop.hive.ql.plan.BaseWork
addDummyOp, getAllOperators, getDummyOps, getName, isGatheringStats, setDummyOps, setGatheringStats, setName
 
Methods inherited from class org.apache.hadoop.hive.ql.plan.AbstractOperatorDesc
clone, getOpTraits, getStatistics, setOpTraits, setStatistics
 
Methods inherited from class java.lang.Object
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

SAMPLING_ON_PREV_MR

public static final int SAMPLING_ON_PREV_MR
See Also:
Constant Field Values

SAMPLING_ON_START

public static final int SAMPLING_ON_START
See Also:
Constant Field Values
Constructor Detail

MapWork

public MapWork()

MapWork

public MapWork(String name)
Method Detail

getPathToAliases

public LinkedHashMap<String,ArrayList<String>> getPathToAliases()

setPathToAliases

public void setPathToAliases(LinkedHashMap<String,ArrayList<String>> pathToAliases)

getTruncatedPathToAliases

public Map<String,ArrayList<String>> getTruncatedPathToAliases()
This is used to display and verify output of "Path -> Alias" in test framework. QTestUtil masks "Path -> Alias" and makes verification impossible. By keeping "Path -> Alias" intact and adding a new display name which is not masked by QTestUtil by removing prefix. Notes: we would still be masking for intermediate directories.

Returns:

getPathToPartitionInfo

public LinkedHashMap<String,PartitionDesc> getPathToPartitionInfo()

setPathToPartitionInfo

public void setPathToPartitionInfo(LinkedHashMap<String,PartitionDesc> pathToPartitionInfo)

deriveExplainAttributes

public void deriveExplainAttributes()
Derive additional attributes to be rendered by EXPLAIN. TODO: this method is relied upon by custom input formats to set jobconf properties. This is madness? - This is Hive Storage Handlers!


getAliasToPartnInfo

public LinkedHashMap<String,PartitionDesc> getAliasToPartnInfo()
Returns:
the aliasToPartnInfo

setAliasToPartnInfo

public void setAliasToPartnInfo(LinkedHashMap<String,PartitionDesc> aliasToPartnInfo)
Parameters:
aliasToPartnInfo - the aliasToPartnInfo to set

getAliasToWork

public LinkedHashMap<String,Operator<? extends OperatorDesc>> getAliasToWork()

setAliasToWork

public void setAliasToWork(LinkedHashMap<String,Operator<? extends OperatorDesc>> aliasToWork)

getMapLocalWork

public MapredLocalWork getMapLocalWork()
Returns:
the mapredLocalWork

setMapLocalWork

public void setMapLocalWork(MapredLocalWork mapLocalWork)
Parameters:
mapLocalWork - the mapredLocalWork to set

getNameToSplitSample

public HashMap<String,SplitSample> getNameToSplitSample()

setNameToSplitSample

public void setNameToSplitSample(HashMap<String,SplitSample> nameToSplitSample)

getNumMapTasks

public Integer getNumMapTasks()

setNumMapTasks

public void setNumMapTasks(Integer numMapTasks)

addMapWork

public void addMapWork(String path,
                       String alias,
                       Operator<?> work,
                       PartitionDesc pd)

isInputFormatSorted

public boolean isInputFormatSorted()

setInputFormatSorted

public void setInputFormatSorted(boolean inputFormatSorted)

resolveDynamicPartitionStoredAsSubDirsMerge

public void resolveDynamicPartitionStoredAsSubDirsMerge(HiveConf conf,
                                                        org.apache.hadoop.fs.Path path,
                                                        TableDesc tblDesc,
                                                        ArrayList<String> aliases,
                                                        PartitionDesc partDesc)

getVectorModeOn

public String getVectorModeOn()

replaceRoots

public void replaceRoots(Map<Operator<?>,Operator<?>> replacementMap)
Specified by:
replaceRoots in class BaseWork

getAllRootOperators

public Set<Operator<?>> getAllRootOperators()
Specified by:
getAllRootOperators in class BaseWork

mergeAliasedInput

public void mergeAliasedInput(String alias,
                              String pathDir,
                              PartitionDesc partitionInfo)

initialize

public void initialize()

getMaxSplitSize

public Long getMaxSplitSize()

setMaxSplitSize

public void setMaxSplitSize(Long maxSplitSize)

getMinSplitSize

public Long getMinSplitSize()

setMinSplitSize

public void setMinSplitSize(Long minSplitSize)

getMinSplitSizePerNode

public Long getMinSplitSizePerNode()

setMinSplitSizePerNode

public void setMinSplitSizePerNode(Long minSplitSizePerNode)

getMinSplitSizePerRack

public Long getMinSplitSizePerRack()

setMinSplitSizePerRack

public void setMinSplitSizePerRack(Long minSplitSizePerRack)

getInputformat

public String getInputformat()

setInputformat

public void setInputformat(String inputformat)

isUseBucketizedHiveInputFormat

public boolean isUseBucketizedHiveInputFormat()

setUseBucketizedHiveInputFormat

public void setUseBucketizedHiveInputFormat(boolean useBucketizedHiveInputFormat)

getJoinTree

public QBJoinTree getJoinTree()

setJoinTree

public void setJoinTree(QBJoinTree joinTree)

setMapperCannotSpanPartns

public void setMapperCannotSpanPartns(boolean mapperCannotSpanPartns)

isMapperCannotSpanPartns

public boolean isMapperCannotSpanPartns()

getHadoopSupportsSplittable

public boolean getHadoopSupportsSplittable()

setHadoopSupportsSplittable

public void setHadoopSupportsSplittable(boolean hadoopSupportsSplittable)

getIndexIntermediateFile

public String getIndexIntermediateFile()

getAliases

public ArrayList<String> getAliases()

getWorks

public ArrayList<Operator<?>> getWorks()

getPaths

public ArrayList<String> getPaths()

getPartitionDescs

public ArrayList<PartitionDesc> getPartitionDescs()

getOpParseCtxMap

public LinkedHashMap<Operator<? extends OperatorDesc>,OpParseContext> getOpParseCtxMap()

setOpParseCtxMap

public void setOpParseCtxMap(LinkedHashMap<Operator<? extends OperatorDesc>,OpParseContext> opParseCtxMap)

getTmpHDFSPath

public org.apache.hadoop.fs.Path getTmpHDFSPath()

setTmpHDFSPath

public void setTmpHDFSPath(org.apache.hadoop.fs.Path tmpHDFSPath)

mergingInto

public void mergingInto(MapWork mapWork)

getBucketedColsByDirectory

public Map<String,List<BucketingSortingCtx.BucketCol>> getBucketedColsByDirectory()

getSortedColsByDirectory

public Map<String,List<BucketingSortingCtx.SortCol>> getSortedColsByDirectory()

addIndexIntermediateFile

public void addIndexIntermediateFile(String fileName)

getSamplingType

public int getSamplingType()

setSamplingType

public void setSamplingType(int samplingType)

getSamplingTypeString

public String getSamplingTypeString()

configureJobConf

public void configureJobConf(org.apache.hadoop.mapred.JobConf job)
Specified by:
configureJobConf in class BaseWork

getScratchColumnVectorTypes

public Map<String,Map<Integer,String>> getScratchColumnVectorTypes()

setScratchColumnVectorTypes

public void setScratchColumnVectorTypes(Map<String,Map<Integer,String>> scratchColumnVectorTypes)

getScratchColumnMap

public Map<String,Map<String,Integer>> getScratchColumnMap()

setScratchColumnMap

public void setScratchColumnMap(Map<String,Map<String,Integer>> scratchColumnMap)

getVectorMode

public boolean getVectorMode()

setVectorMode

public void setVectorMode(boolean vectorMode)
Overrides:
setVectorMode in class AbstractOperatorDesc


Copyright © 2014 The Apache Software Foundation. All rights reserved.