org.apache.hadoop.hive.ql.plan
Class PlanUtils

java.lang.Object
  extended by org.apache.hadoop.hive.ql.plan.PlanUtils

public final class PlanUtils
extends Object

PlanUtils.


Nested Class Summary
static class PlanUtils.ExpressionTypes
          ExpressionTypes.
 
Field Summary
protected static org.apache.commons.logging.Log LOG
           
 
Method Summary
static void configureInputJobPropertiesForStorageHandler(TableDesc tableDesc)
          Loads the storage handler (if one exists) for the given table and invokes HiveStorageHandler.configureInputJobProperties(TableDesc, java.util.Map).
static void configureOutputJobPropertiesForStorageHandler(TableDesc tableDesc)
          Loads the storage handler (if one exists) for the given table and invokes HiveStorageHandler.configureOutputJobProperties(TableDesc, java.util.Map).
static long getCountForMapJoinDumpFilePrefix()
           
static TableDesc getDefaultQueryOutputTableDesc(String cols, String colTypes, String fileFormat)
           
static TableDesc getDefaultTableDesc(String separatorCode)
          Generate the table descriptor of MetadataTypedColumnsetSerDe with the separatorCode.
static TableDesc getDefaultTableDesc(String separatorCode, String columns)
          Generate the table descriptor of MetadataTypedColumnsetSerDe with the separatorCode and column names (comma separated string).
static TableDesc getDefaultTableDesc(String separatorCode, String columns, boolean lastColumnTakesRestOfTheLine)
          Generate the table descriptor of MetadataTypedColumnsetSerDe with the separatorCode and column names (comma separated string), and whether the last column should take the rest of the line.
static TableDesc getDefaultTableDesc(String separatorCode, String columns, String columnTypes, boolean lastColumnTakesRestOfTheLine)
          Generate the table descriptor of MetadataTypedColumnsetSerDe with the separatorCode and column names (comma separated string), and whether the last column should take the rest of the line.
static List<FieldSchema> getFieldSchemasFromColumnInfo(ArrayList<ColumnInfo> cols, String fieldPrefix)
          Convert the ColumnInfo to FieldSchema.
static List<FieldSchema> getFieldSchemasFromColumnList(List<ExprNodeDesc> cols, List<String> outputColumnNames, int start, String fieldPrefix)
          Convert the ColumnList to FieldSchema list.
static List<FieldSchema> getFieldSchemasFromColumnList(List<ExprNodeDesc> cols, String fieldPrefix)
          Convert the ColumnList to FieldSchema list.
static List<FieldSchema> getFieldSchemasFromColumnListWithLength(List<ExprNodeDesc> cols, List<List<Integer>> distinctColIndices, List<String> outputColumnNames, int length, String fieldPrefix)
          Convert the ColumnList to FieldSchema list.
static List<FieldSchema> getFieldSchemasFromRowSchema(RowSchema row, String fieldPrefix)
          Convert the RowSchema to FieldSchema list.
static TableDesc getIntermediateFileTableDesc(List<FieldSchema> fieldSchemas)
          Generate the table descriptor for intermediate files.
static TableDesc getMapJoinKeyTableDesc(List<FieldSchema> fieldSchemas)
          Generate the table descriptor for Map-side join key.
static TableDesc getMapJoinValueTableDesc(List<FieldSchema> fieldSchemas)
          Generate the table descriptor for Map-side join key.
static MapredWork getMapRedWork()
           
static TableDesc getReduceKeyTableDesc(List<FieldSchema> fieldSchemas, String order)
          Generate the table descriptor for reduce key.
static ReduceSinkDesc getReduceSinkDesc(ArrayList<ExprNodeDesc> keyCols, ArrayList<ExprNodeDesc> valueCols, List<String> outputColumnNames, boolean includeKeyCols, int tag, ArrayList<ExprNodeDesc> partitionCols, String order, int numReducers)
          Create the reduce sink descriptor.
static ReduceSinkDesc getReduceSinkDesc(ArrayList<ExprNodeDesc> keyCols, ArrayList<ExprNodeDesc> valueCols, List<String> outputColumnNames, boolean includeKey, int tag, int numPartitionFields, int numReducers)
          Create the reduce sink descriptor.
static ReduceSinkDesc getReduceSinkDesc(ArrayList<ExprNodeDesc> keyCols, int numKeys, ArrayList<ExprNodeDesc> valueCols, List<List<Integer>> distinctColIndices, List<String> outputKeyColumnNames, List<String> outputValueColumnNames, boolean includeKeyCols, int tag, ArrayList<ExprNodeDesc> partitionCols, String order, int numReducers)
          Create the reduce sink descriptor.
static ReduceSinkDesc getReduceSinkDesc(ArrayList<ExprNodeDesc> keyCols, int numKeys, ArrayList<ExprNodeDesc> valueCols, List<List<Integer>> distinctColIndices, List<String> outputKeyColumnNames, List<String> outputValueColumnNames, boolean includeKey, int tag, int numPartitionFields, int numReducers)
          Create the reduce sink descriptor.
static TableDesc getReduceValueTableDesc(List<FieldSchema> fieldSchemas)
          Generate the table descriptor for intermediate files.
static TableDesc getTableDesc(Class<? extends Deserializer> serdeClass, String separatorCode, String columns)
          Generate the table descriptor of given serde with the separatorCode and column names (comma separated string).
static TableDesc getTableDesc(Class<? extends Deserializer> serdeClass, String separatorCode, String columns, boolean lastColumnTakesRestOfTheLine)
          Generate the table descriptor of the serde specified with the separatorCode and column names (comma separated string), and whether the last column should take the rest of the line.
static TableDesc getTableDesc(Class<? extends Deserializer> serdeClass, String separatorCode, String columns, String columnTypes, boolean lastColumnTakesRestOfTheLine)
           
static TableDesc getTableDesc(Class<? extends Deserializer> serdeClass, String separatorCode, String columns, String columnTypes, boolean lastColumnTakesRestOfTheLine, boolean useDelimitedJSON)
           
static TableDesc getTableDesc(Class<? extends Deserializer> serdeClass, String separatorCode, String columns, String columnTypes, boolean lastColumnTakesRestOfTheLine, boolean useDelimitedJSON, String fileFormat)
           
static TableDesc getTableDesc(CreateTableDesc crtTblDesc, String cols, String colTypes)
          Generate a table descriptor from a createTableDesc.
static List<FieldSchema> sortFieldSchemas(List<FieldSchema> schema)
           
static String stripQuotes(String val)
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

LOG

protected static final org.apache.commons.logging.Log LOG
Method Detail

getCountForMapJoinDumpFilePrefix

public static long getCountForMapJoinDumpFilePrefix()

getMapRedWork

public static MapredWork getMapRedWork()

getDefaultTableDesc

public static TableDesc getDefaultTableDesc(String separatorCode,
                                            String columns)
Generate the table descriptor of MetadataTypedColumnsetSerDe with the separatorCode and column names (comma separated string).


getTableDesc

public static TableDesc getTableDesc(Class<? extends Deserializer> serdeClass,
                                     String separatorCode,
                                     String columns)
Generate the table descriptor of given serde with the separatorCode and column names (comma separated string).


getDefaultTableDesc

public static TableDesc getDefaultTableDesc(String separatorCode,
                                            String columns,
                                            boolean lastColumnTakesRestOfTheLine)
Generate the table descriptor of MetadataTypedColumnsetSerDe with the separatorCode and column names (comma separated string), and whether the last column should take the rest of the line.


getTableDesc

public static TableDesc getTableDesc(Class<? extends Deserializer> serdeClass,
                                     String separatorCode,
                                     String columns,
                                     boolean lastColumnTakesRestOfTheLine)
Generate the table descriptor of the serde specified with the separatorCode and column names (comma separated string), and whether the last column should take the rest of the line.


getDefaultTableDesc

public static TableDesc getDefaultTableDesc(String separatorCode,
                                            String columns,
                                            String columnTypes,
                                            boolean lastColumnTakesRestOfTheLine)
Generate the table descriptor of MetadataTypedColumnsetSerDe with the separatorCode and column names (comma separated string), and whether the last column should take the rest of the line.


getTableDesc

public static TableDesc getTableDesc(Class<? extends Deserializer> serdeClass,
                                     String separatorCode,
                                     String columns,
                                     String columnTypes,
                                     boolean lastColumnTakesRestOfTheLine)

getTableDesc

public static TableDesc getTableDesc(Class<? extends Deserializer> serdeClass,
                                     String separatorCode,
                                     String columns,
                                     String columnTypes,
                                     boolean lastColumnTakesRestOfTheLine,
                                     boolean useDelimitedJSON)

getTableDesc

public static TableDesc getTableDesc(Class<? extends Deserializer> serdeClass,
                                     String separatorCode,
                                     String columns,
                                     String columnTypes,
                                     boolean lastColumnTakesRestOfTheLine,
                                     boolean useDelimitedJSON,
                                     String fileFormat)

getDefaultQueryOutputTableDesc

public static TableDesc getDefaultQueryOutputTableDesc(String cols,
                                                       String colTypes,
                                                       String fileFormat)

getTableDesc

public static TableDesc getTableDesc(CreateTableDesc crtTblDesc,
                                     String cols,
                                     String colTypes)
Generate a table descriptor from a createTableDesc.


getDefaultTableDesc

public static TableDesc getDefaultTableDesc(String separatorCode)
Generate the table descriptor of MetadataTypedColumnsetSerDe with the separatorCode. MetaDataTypedColumnsetSerDe is used because LazySimpleSerDe does not support a table with a single column "col" with type "array".


getReduceKeyTableDesc

public static TableDesc getReduceKeyTableDesc(List<FieldSchema> fieldSchemas,
                                              String order)
Generate the table descriptor for reduce key.


getMapJoinKeyTableDesc

public static TableDesc getMapJoinKeyTableDesc(List<FieldSchema> fieldSchemas)
Generate the table descriptor for Map-side join key.


getMapJoinValueTableDesc

public static TableDesc getMapJoinValueTableDesc(List<FieldSchema> fieldSchemas)
Generate the table descriptor for Map-side join key.


getIntermediateFileTableDesc

public static TableDesc getIntermediateFileTableDesc(List<FieldSchema> fieldSchemas)
Generate the table descriptor for intermediate files.


getReduceValueTableDesc

public static TableDesc getReduceValueTableDesc(List<FieldSchema> fieldSchemas)
Generate the table descriptor for intermediate files.


getFieldSchemasFromColumnListWithLength

public static List<FieldSchema> getFieldSchemasFromColumnListWithLength(List<ExprNodeDesc> cols,
                                                                        List<List<Integer>> distinctColIndices,
                                                                        List<String> outputColumnNames,
                                                                        int length,
                                                                        String fieldPrefix)
Convert the ColumnList to FieldSchema list. Adds uniontype for distinctColIndices.


getFieldSchemasFromColumnList

public static List<FieldSchema> getFieldSchemasFromColumnList(List<ExprNodeDesc> cols,
                                                              List<String> outputColumnNames,
                                                              int start,
                                                              String fieldPrefix)
Convert the ColumnList to FieldSchema list.


getFieldSchemasFromColumnList

public static List<FieldSchema> getFieldSchemasFromColumnList(List<ExprNodeDesc> cols,
                                                              String fieldPrefix)
Convert the ColumnList to FieldSchema list.


getFieldSchemasFromRowSchema

public static List<FieldSchema> getFieldSchemasFromRowSchema(RowSchema row,
                                                             String fieldPrefix)
Convert the RowSchema to FieldSchema list.


getFieldSchemasFromColumnInfo

public static List<FieldSchema> getFieldSchemasFromColumnInfo(ArrayList<ColumnInfo> cols,
                                                              String fieldPrefix)
Convert the ColumnInfo to FieldSchema.


sortFieldSchemas

public static List<FieldSchema> sortFieldSchemas(List<FieldSchema> schema)

getReduceSinkDesc

public static ReduceSinkDesc getReduceSinkDesc(ArrayList<ExprNodeDesc> keyCols,
                                               ArrayList<ExprNodeDesc> valueCols,
                                               List<String> outputColumnNames,
                                               boolean includeKeyCols,
                                               int tag,
                                               ArrayList<ExprNodeDesc> partitionCols,
                                               String order,
                                               int numReducers)
Create the reduce sink descriptor.

Parameters:
keyCols - The columns to be stored in the key
valueCols - The columns to be stored in the value
outputColumnNames - The output columns names
tag - The tag for this reducesink
partitionCols - The columns for partitioning.
numReducers - The number of reducers, set to -1 for automatic inference based on input data size.
Returns:
The reduceSinkDesc object.

getReduceSinkDesc

public static ReduceSinkDesc getReduceSinkDesc(ArrayList<ExprNodeDesc> keyCols,
                                               int numKeys,
                                               ArrayList<ExprNodeDesc> valueCols,
                                               List<List<Integer>> distinctColIndices,
                                               List<String> outputKeyColumnNames,
                                               List<String> outputValueColumnNames,
                                               boolean includeKeyCols,
                                               int tag,
                                               ArrayList<ExprNodeDesc> partitionCols,
                                               String order,
                                               int numReducers)
Create the reduce sink descriptor.

Parameters:
keyCols - The columns to be stored in the key
numKeys - number of distribution key numbers. Equals to group-by-key numbers usually.
valueCols - The columns to be stored in the value
distinctColIndices - column indices for distinct aggregate parameters
outputKeyColumnNames - The output key columns names
outputValueColumnNames - The output value columns names
tag - The tag for this reducesink
partitionCols - The columns for partitioning.
numReducers - The number of reducers, set to -1 for automatic inference based on input data size.
Returns:
The reduceSinkDesc object.

getReduceSinkDesc

public static ReduceSinkDesc getReduceSinkDesc(ArrayList<ExprNodeDesc> keyCols,
                                               ArrayList<ExprNodeDesc> valueCols,
                                               List<String> outputColumnNames,
                                               boolean includeKey,
                                               int tag,
                                               int numPartitionFields,
                                               int numReducers)
                                        throws SemanticException
Create the reduce sink descriptor.

Parameters:
keyCols - The columns to be stored in the key
valueCols - The columns to be stored in the value
outputColumnNames - The output columns names
tag - The tag for this reducesink
numPartitionFields - The first numPartitionFields of keyCols will be partition columns. If numPartitionFields=-1, then partition randomly.
numReducers - The number of reducers, set to -1 for automatic inference based on input data size.
Returns:
The reduceSinkDesc object.
Throws:
SemanticException

getReduceSinkDesc

public static ReduceSinkDesc getReduceSinkDesc(ArrayList<ExprNodeDesc> keyCols,
                                               int numKeys,
                                               ArrayList<ExprNodeDesc> valueCols,
                                               List<List<Integer>> distinctColIndices,
                                               List<String> outputKeyColumnNames,
                                               List<String> outputValueColumnNames,
                                               boolean includeKey,
                                               int tag,
                                               int numPartitionFields,
                                               int numReducers)
                                        throws SemanticException
Create the reduce sink descriptor.

Parameters:
keyCols - The columns to be stored in the key
numKeys - number of distribution keys. Equals to group-by-key numbers usually.
valueCols - The columns to be stored in the value
distinctColIndices - column indices for distinct aggregates
outputKeyColumnNames - The output key columns names
outputValueColumnNames - The output value columns names
tag - The tag for this reducesink
numPartitionFields - The first numPartitionFields of keyCols will be partition columns. If numPartitionFields=-1, then partition randomly.
numReducers - The number of reducers, set to -1 for automatic inference based on input data size.
Returns:
The reduceSinkDesc object.
Throws:
SemanticException

configureInputJobPropertiesForStorageHandler

public static void configureInputJobPropertiesForStorageHandler(TableDesc tableDesc)
Loads the storage handler (if one exists) for the given table and invokes HiveStorageHandler.configureInputJobProperties(TableDesc, java.util.Map).

Parameters:
tableDesc - table descriptor

configureOutputJobPropertiesForStorageHandler

public static void configureOutputJobPropertiesForStorageHandler(TableDesc tableDesc)
Loads the storage handler (if one exists) for the given table and invokes HiveStorageHandler.configureOutputJobProperties(TableDesc, java.util.Map).

Parameters:
tableDesc - table descriptor

stripQuotes

public static String stripQuotes(String val)


Copyright © 2013 The Apache Software Foundation