org.apache.hadoop.hive.ql.stats
Class StatsUtils

java.lang.Object
  extended by org.apache.hadoop.hive.ql.stats.StatsUtils

public class StatsUtils
extends Object


Constructor Summary
StatsUtils()
           
 
Method Summary
static Statistics collectStatistics(HiveConf conf, PrunedPartitionList partList, Table table, TableScanOperator tableScanOperator)
          Collect table, partition and column level statistics
static int estimateRowSizeFromSchema(HiveConf conf, List<ColumnInfo> schema, List<String> neededColumns)
           
static long getAvgColLenOfFixedLengthTypes(String colType)
          Get size of fixed length primitives
static long getAvgColLenOfVariableLengthTypes(HiveConf conf, ObjectInspector oi, String colType)
          Get the raw data size of variable length data types
static List<Long> getBasicStatForPartitions(Table table, List<Partition> parts, String statType)
          Get basic stats of partitions
static long getBasicStatForTable(Table table, String statType)
          Get basic stats of table
static ColStatistics getColStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj cso, String tabName, String colName)
          Convert ColumnStatisticsObj to ColStatistics
static ColStatistics getColStatisticsFromExpression(HiveConf conf, Statistics parentStats, ExprNodeDesc end)
          Get column statistics expression nodes
static List<ColStatistics> getColStatisticsFromExprMap(HiveConf conf, Statistics parentStats, Map<String,ExprNodeDesc> colExprMap, RowSchema rowSchema)
          Get column statistics from parent statistics.
static long getDataSizeFromColumnStats(long numRows, List<ColStatistics> colStats)
          Compute raw data size from column statistics
static List<Long> getFileSizeForPartitions(HiveConf conf, List<Partition> parts)
          Find the bytes on disks occupied by list of partitions
static long getFileSizeForTable(HiveConf conf, Table table)
          Find the bytes on disk occupied by a table
static List<String> getFullQualifedColNameFromExprs(List<ExprNodeDesc> keyExprs, Map<String,ExprNodeDesc> map)
          Try to get fully qualified column name from expression node
static String getFullyQualifiedColumnName(String tabName, String colName)
          Returns fully qualified name of column
static String getFullyQualifiedColumnName(String dbName, String tabName, String colName)
          Returns fully qualified name of column
static String getFullyQualifiedColumnName(String dbName, String tabName, String partName, String colName)
          Returns fully qualified name of column
static long getNumRows(Table table)
          Get number of rows of a give table
static Map<String,List<ColStatistics>> getPartColumnStats(Table table, List<ColumnInfo> schema, List<String> partNames, List<String> neededColumns)
          Get table level column statistics from metastore for needed columns
static long getRawDataSize(Table table)
          Get raw data size of a give table
static long getSizeOfComplexTypes(HiveConf conf, ObjectInspector oi)
          Get the size of complex data types
static long getSizeOfMap(StandardConstantMapObjectInspector scmoi)
          Estimate the size of map object
static long getSizeOfPrimitiveTypeArraysFromType(String colType, int length)
          Get the size of arrays of primitive types
static long getSumIgnoreNegatives(List<Long> vals)
          Get sum of all values in the list that are >0
static List<ColStatistics> getTableColumnStats(Table table, List<ColumnInfo> schema, List<String> neededColumns)
          Get table level column statistics from metastore for needed columns
static long getTotalSize(Table table)
          Get total size of a give table
static long getWritableSize(ObjectInspector oi, Object value)
          Get size of primitive data types based on their respective writable object inspector
static String stripPrefixFromColumnName(String colName)
          Remove KEY/VALUE prefix from column name
 
Methods inherited from class java.lang.Object
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Constructor Detail

StatsUtils

public StatsUtils()
Method Detail

collectStatistics

public static Statistics collectStatistics(HiveConf conf,
                                           PrunedPartitionList partList,
                                           Table table,
                                           TableScanOperator tableScanOperator)
Collect table, partition and column level statistics

Parameters:
conf - - hive configuration
partList - - partition list
table - - table
tableScanOperator - - table scan operator
Returns:
statistics object
Throws:
HiveException

estimateRowSizeFromSchema

public static int estimateRowSizeFromSchema(HiveConf conf,
                                            List<ColumnInfo> schema,
                                            List<String> neededColumns)

getFileSizeForTable

public static long getFileSizeForTable(HiveConf conf,
                                       Table table)
Find the bytes on disk occupied by a table

Parameters:
conf - - hive conf
table - - table
Returns:
size on disk

getFileSizeForPartitions

public static List<Long> getFileSizeForPartitions(HiveConf conf,
                                                  List<Partition> parts)
Find the bytes on disks occupied by list of partitions

Parameters:
conf - - hive conf
parts - - partition list
Returns:
sizes of patitions

getSumIgnoreNegatives

public static long getSumIgnoreNegatives(List<Long> vals)
Get sum of all values in the list that are >0

Parameters:
vals - - list of values
Returns:
sum

getColStatistics

public static ColStatistics getColStatistics(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj cso,
                                             String tabName,
                                             String colName)
Convert ColumnStatisticsObj to ColStatistics

Parameters:
cso - - ColumnStatisticsObj
tabName - - table name
colName - - column name
Returns:
ColStatistics

getTableColumnStats

public static List<ColStatistics> getTableColumnStats(Table table,
                                                      List<ColumnInfo> schema,
                                                      List<String> neededColumns)
Get table level column statistics from metastore for needed columns

Parameters:
table - - table
schema - - output schema
neededColumns - - list of needed columns
Returns:
column statistics

getPartColumnStats

public static Map<String,List<ColStatistics>> getPartColumnStats(Table table,
                                                                 List<ColumnInfo> schema,
                                                                 List<String> partNames,
                                                                 List<String> neededColumns)
Get table level column statistics from metastore for needed columns

Parameters:
table - - table
schema - - output schema
neededColumns - - list of needed columns
Returns:
column statistics

getAvgColLenOfVariableLengthTypes

public static long getAvgColLenOfVariableLengthTypes(HiveConf conf,
                                                     ObjectInspector oi,
                                                     String colType)
Get the raw data size of variable length data types

Parameters:
conf - - hive conf
oi - - object inspector
colType - - column type
Returns:
raw data size

getSizeOfComplexTypes

public static long getSizeOfComplexTypes(HiveConf conf,
                                         ObjectInspector oi)
Get the size of complex data types

Parameters:
conf - - hive conf
oi - - object inspector
Returns:
raw data size

getAvgColLenOfFixedLengthTypes

public static long getAvgColLenOfFixedLengthTypes(String colType)
Get size of fixed length primitives

Parameters:
colType - - column type
Returns:
raw data size

getSizeOfPrimitiveTypeArraysFromType

public static long getSizeOfPrimitiveTypeArraysFromType(String colType,
                                                        int length)
Get the size of arrays of primitive types

Parameters:
colType - - column type
length - - array length
Returns:
raw data size

getSizeOfMap

public static long getSizeOfMap(StandardConstantMapObjectInspector scmoi)
Estimate the size of map object

Parameters:
scmoi - - object inspector
Returns:
size of map

getWritableSize

public static long getWritableSize(ObjectInspector oi,
                                   Object value)
Get size of primitive data types based on their respective writable object inspector

Parameters:
oi - - object inspector
value - - value
Returns:
raw data size

getColStatisticsFromExprMap

public static List<ColStatistics> getColStatisticsFromExprMap(HiveConf conf,
                                                              Statistics parentStats,
                                                              Map<String,ExprNodeDesc> colExprMap,
                                                              RowSchema rowSchema)
Get column statistics from parent statistics.

Parameters:
conf - - hive conf
parentStats - - parent statistics
colExprMap - - column expression map
rowSchema - - row schema
Returns:
column statistics

getColStatisticsFromExpression

public static ColStatistics getColStatisticsFromExpression(HiveConf conf,
                                                           Statistics parentStats,
                                                           ExprNodeDesc end)
Get column statistics expression nodes

Parameters:
conf - - hive conf
parentStats - - parent statistics
end - - expression nodes
Returns:
column statistics

getNumRows

public static long getNumRows(Table table)
Get number of rows of a give table

Returns:
number of rows

getRawDataSize

public static long getRawDataSize(Table table)
Get raw data size of a give table

Returns:
raw data size

getTotalSize

public static long getTotalSize(Table table)
Get total size of a give table

Returns:
total size

getBasicStatForTable

public static long getBasicStatForTable(Table table,
                                        String statType)
Get basic stats of table

Parameters:
dbName - - database name
tabName - - table name
statType - - type of stats
Returns:
value of stats

getBasicStatForPartitions

public static List<Long> getBasicStatForPartitions(Table table,
                                                   List<Partition> parts,
                                                   String statType)
Get basic stats of partitions

Parameters:
table - - table
parts - - partitions
statType - - type of stats
Returns:
value of stats

getDataSizeFromColumnStats

public static long getDataSizeFromColumnStats(long numRows,
                                              List<ColStatistics> colStats)
Compute raw data size from column statistics

Parameters:
numRows - - number of rows
colStats - - column statistics
Returns:
raw data size

stripPrefixFromColumnName

public static String stripPrefixFromColumnName(String colName)
Remove KEY/VALUE prefix from column name

Parameters:
colName - - column name
Returns:
column name

getFullyQualifiedColumnName

public static String getFullyQualifiedColumnName(String tabName,
                                                 String colName)
Returns fully qualified name of column

Parameters:
tabName -
colName -
Returns:

getFullyQualifiedColumnName

public static String getFullyQualifiedColumnName(String dbName,
                                                 String tabName,
                                                 String colName)
Returns fully qualified name of column

Parameters:
dbName -
tabName -
colName -
Returns:

getFullyQualifiedColumnName

public static String getFullyQualifiedColumnName(String dbName,
                                                 String tabName,
                                                 String partName,
                                                 String colName)
Returns fully qualified name of column

Parameters:
dbName -
tabName -
partName -
colName -
Returns:

getFullQualifedColNameFromExprs

public static List<String> getFullQualifedColNameFromExprs(List<ExprNodeDesc> keyExprs,
                                                           Map<String,ExprNodeDesc> map)
Try to get fully qualified column name from expression node

Parameters:
keyExprs - - expression nodes
map - - column expression map
Returns:
list of fully qualified names


Copyright © 2014 The Apache Software Foundation. All rights reserved.