org.apache.hadoop.hive.ql.exec
Class Utilities

java.lang.Object
  extended by org.apache.hadoop.hive.ql.exec.Utilities

public final class Utilities
extends Object

Utilities.


Nested Class Summary
static class Utilities.CollectionPersistenceDelegate
           
static class Utilities.EnumDelegate
          Java 1.5 workaround.
static class Utilities.ListDelegate
           
static class Utilities.MapDelegate
           
static class Utilities.ReduceField
          ReduceField.
static class Utilities.SetDelegate
           
static class Utilities.SQLCommand<T>
           
static class Utilities.StreamPrinter
          StreamPrinter.
static class Utilities.StreamStatus
          StreamStatus.
static class Utilities.Tuple<T,V>
          Tuple.
 
Field Summary
static int carriageReturnCode
           
static int ctrlaCode
           
static TableDesc defaultTd
           
static Object getInputSummaryLock
           
static String HADOOP_LOCAL_FS
          The object in the reducer are composed of these top level fields.
static String INDENT
           
static int newLineCode
           
static String NSTR
           
static String nullStringOutput
           
static String nullStringStorage
           
static Random randGen
           
static char sqlEscapeChar
           
static String suffix
           
static int tabCode
           
 
Method Summary
static String abbreviate(String str, int max)
          convert "From src insert blah blah" to "From src insert ...
static void addMapWork(MapredWork mr, Table tbl, String alias, Operator<?> work)
           
static ClassLoader addToClassPath(ClassLoader cloader, String[] newPaths)
          Add new elements to the classpath.
static String checkJDOPushDown(Table tab, ExprNodeDesc expr)
          Check if the partition pruning expression can be pushed down to JDO filtering.
static void clearMapRedWork(org.apache.hadoop.conf.Configuration job)
           
static Connection connectWithRetry(String connectionString, int waitWindow, int maxRetries)
          Retry connecting to a database with random backoff (same as the one implemented in HDFS-767).
static boolean contentsEqual(InputStream is1, InputStream is2, boolean ignoreWhitespace)
           
static void copyTableJobPropertiesToConf(TableDesc tbl, org.apache.hadoop.mapred.JobConf job)
          Copies the storage handler properties configured for a table descriptor to a runtime job configuration.
static OutputStream createCompressedStream(org.apache.hadoop.mapred.JobConf jc, OutputStream out)
          Convert an output stream to a compressed output stream based on codecs and compression options specified in the Job Configuration.
static OutputStream createCompressedStream(org.apache.hadoop.mapred.JobConf jc, OutputStream out, boolean isCompressed)
          Convert an output stream to a compressed output stream based on codecs codecs in the Job Configuration.
static RCFile.Writer createRCFileWriter(org.apache.hadoop.mapred.JobConf jc, org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path file, boolean isCompressed)
          Create a RCFile output stream based on job configuration Uses user supplied compression flag (rather than obtaining it from the Job Configuration).
static org.apache.hadoop.io.SequenceFile.Writer createSequenceWriter(org.apache.hadoop.mapred.JobConf jc, org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path file, Class<?> keyClass, Class<?> valClass)
          Create a sequencefile output stream based on job configuration.
static org.apache.hadoop.io.SequenceFile.Writer createSequenceWriter(org.apache.hadoop.mapred.JobConf jc, org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path file, Class<?> keyClass, Class<?> valClass, boolean isCompressed)
          Create a sequencefile output stream based on job configuration Uses user supplied compression flag (rather than obtaining it from the Job Configuration).
static ExprNodeDesc deserializeExpression(String s, org.apache.hadoop.conf.Configuration conf)
           
static MapredLocalWork deserializeMapRedLocalWork(InputStream in, org.apache.hadoop.conf.Configuration conf)
           
static MapredWork deserializeMapRedWork(InputStream in, org.apache.hadoop.conf.Configuration conf)
           
static QueryPlan deserializeQueryPlan(InputStream in, org.apache.hadoop.conf.Configuration conf)
          Deserialize the whole query plan.
static String escapeSqlLike(String key)
          Escape the '_', '%', as well as the escape characters inside the string key.
static
<T> T
executeWithRetry(Utilities.SQLCommand<T> cmd, PreparedStatement stmt, int baseWindow, int maxRetries)
          Retry SQL execution with random backoff (same as the one implemented in HDFS-767).
static String formatBinaryString(byte[] array, int start, int length)
           
static String formatMsecToStr(long msec)
          Format number of milliseconds to strings
static String generateFileName(Byte tag, String bigBucketFileName)
           
static String generatePath(org.apache.hadoop.fs.Path baseURI, String filename)
           
static String generatePath(String baseURI, String dumpFilePrefix, Byte tag, String bigBucketFileName)
           
static String generateTarFileName(String name)
           
static String generateTarURI(org.apache.hadoop.fs.Path baseURI, String filename)
           
static String generateTarURI(String baseURI, String filename)
           
static String generateTmpURI(String baseURI, String id)
           
static Class getBuiltinUtilsClass()
           
static List<String> getColumnNames(Properties props)
           
static List<String> getColumnNamesFromFieldSchema(List<FieldSchema> partCols)
           
static List<String> getColumnNamesFromSortCols(List<Order> sortCols)
           
static List<String> getColumnTypes(Properties props)
           
static int getDefaultNotificationInterval(org.apache.hadoop.conf.Configuration hconf)
          Gets the default notification interval to send progress updates to the tracker.
static List<String> getFieldSchemaString(List<FieldSchema> fl)
           
static String getFileExtension(org.apache.hadoop.mapred.JobConf jc, boolean isCompressed)
          Deprecated. Use getFileExtension(JobConf, boolean, HiveOutputFormat)
static String getFileExtension(org.apache.hadoop.mapred.JobConf jc, boolean isCompressed, HiveOutputFormat<?,?> hiveOutputFormat)
          Based on compression option, output format, and configured output codec - get extension for output file.
static String getFileNameFromDirName(String dirName)
           
static org.apache.hadoop.fs.FileStatus[] getFileStatusRecurse(org.apache.hadoop.fs.Path path, int level, org.apache.hadoop.fs.FileSystem fs)
          Get all file status from a root path and recursively go deep into certain levels.
static List<LinkedHashMap<String,String>> getFullDPSpecs(org.apache.hadoop.conf.Configuration conf, DynamicPartitionCtx dpCtx)
          Construct a list of full partition spec from Dynamic Partition Context and the directory names corresponding to these dynamic partitions.
static String getHiveJobID(org.apache.hadoop.conf.Configuration job)
           
static org.apache.hadoop.fs.ContentSummary getInputSummary(Context ctx, MapredWork work, org.apache.hadoop.fs.PathFilter filter)
          Calculate the total size of input files.
static MapredWork getMapRedWork(org.apache.hadoop.conf.Configuration job)
           
static List<ExecDriver> getMRTasks(List<Task<? extends Serializable>> tasks)
           
static String getNameMessage(Exception e)
           
static String getOpTreeSkel(Operator<?> op)
           
static PartitionDesc getPartitionDesc(Partition part)
           
static PartitionDesc getPartitionDescFromTableDesc(TableDesc tblDesc, Partition part)
           
static String getPrefixedTaskIdFromFilename(String filename)
          Get the part-spec + task id from the filename.
static long getRandomWaitTime(int baseWindow, int failures, Random r)
          Introducing a random factor to the wait time before another retry.
static String getResourceFiles(org.apache.hadoop.conf.Configuration conf, SessionState.ResourceType t)
           
static StatsPublisher getStatsPublisher(org.apache.hadoop.mapred.JobConf jc)
           
static TableDesc getTableDesc(String cols, String colTypes)
           
static TableDesc getTableDesc(Table tbl)
           
static String getTaskId(org.apache.hadoop.conf.Configuration hconf)
          Gets the task id if we are running as a Hadoop job.
static String getTaskIdFromFilename(String filename)
          Get the task id from the filename.
static boolean isEmptyPath(org.apache.hadoop.mapred.JobConf job, org.apache.hadoop.fs.Path dirPath)
           
static boolean isEmptyPath(org.apache.hadoop.mapred.JobConf job, String dirPath, Context ctx)
           
static boolean isTempPath(org.apache.hadoop.fs.FileStatus file)
          Detect if the supplied file is a temporary path.
static ArrayList makeList(Object... olist)
           
static HashMap makeMap(Object... olist)
           
static Properties makeProperties(String... olist)
           
static List<String> mergeUniqElems(List<String> src, List<String> dest)
           
static void mvFileToFinalPath(String specPath, org.apache.hadoop.conf.Configuration hconf, boolean success, org.apache.commons.logging.Log log, DynamicPartitionCtx dpCtx, FileSinkDesc conf, org.apache.hadoop.mapred.Reporter reporter)
           
static String now()
           
static PreparedStatement prepareWithRetry(Connection conn, String stmt, int waitWindow, int maxRetries)
          Retry preparing a SQL statement with random backoff (same as the one implemented in HDFS-767).
static Utilities.StreamStatus readColumn(DataInput in, OutputStream out)
           
static String realFile(String newFile, org.apache.hadoop.conf.Configuration conf)
          Shamelessly cloned from GenericOptionsParser.
static void removeFromClassPath(String[] pathsToRemove)
          remove elements from the classpath.
static HashMap<String,org.apache.hadoop.fs.FileStatus> removeTempOrDuplicateFiles(org.apache.hadoop.fs.FileStatus[] items, org.apache.hadoop.fs.FileSystem fs)
           
static void removeTempOrDuplicateFiles(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path path)
          Remove all temporary files and duplicate (double-committed) files from a given directory.
static ArrayList<String> removeTempOrDuplicateFiles(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path path, DynamicPartitionCtx dpCtx)
          Remove all temporary files and duplicate (double-committed) files from a given directory.
static void rename(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path src, org.apache.hadoop.fs.Path dst)
          Rename src to dst, or in the case dst already exists, move files in src to dst.
static void renameOrMoveFiles(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path src, org.apache.hadoop.fs.Path dst)
          Rename src to dst, or in the case dst already exists, move files in src to dst.
static String replaceTaskIdFromFilename(String filename, int bucketNum)
          Replace the task id from the filename.
static String replaceTaskIdFromFilename(String filename, String fileId)
           
static void reworkMapRedWork(Task<? extends Serializable> task, boolean reworkMapredWork, HiveConf conf)
          The check here is kind of not clean.
static String serializeExpression(ExprNodeDesc expr)
           
static void serializeMapRedLocalWork(MapredLocalWork w, OutputStream out)
          Serialize the mapredLocalWork object to an output stream.
static void serializeMapRedWork(MapredWork w, OutputStream out)
          Serialize the mapredWork object to an output stream.
static void serializeQueryPlan(QueryPlan plan, OutputStream out)
          Serialize the whole query plan.
static void serializeTasks(Task<? extends Serializable> t, OutputStream out)
          Serialize a single Task.
static void setColumnNameList(org.apache.hadoop.mapred.JobConf jobConf, Operator op)
           
static void setColumnTypeList(org.apache.hadoop.mapred.JobConf jobConf, Operator op)
           
static void setMapRedWork(org.apache.hadoop.conf.Configuration job, MapredWork w, String hiveScratchDir)
           
static void setWorkflowAdjacencies(org.apache.hadoop.conf.Configuration conf, QueryPlan plan)
           
static double showTime(long time)
           
static boolean supportCombineFileInputFormat()
           
static org.apache.hadoop.fs.Path toTaskTempPath(org.apache.hadoop.fs.Path orig)
           
static org.apache.hadoop.fs.Path toTaskTempPath(String orig)
           
static org.apache.hadoop.fs.Path toTempPath(org.apache.hadoop.fs.Path orig)
           
static org.apache.hadoop.fs.Path toTempPath(String orig)
          Given a path, convert to a temporary path.
static void validateColumnNames(List<String> colNames, List<String> checkCols)
           
static void validatePartSpec(Table tbl, Map<String,String> partSpec)
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

HADOOP_LOCAL_FS

public static String HADOOP_LOCAL_FS
The object in the reducer are composed of these top level fields.


defaultTd

public static TableDesc defaultTd

carriageReturnCode

public static final int carriageReturnCode
See Also:
Constant Field Values

newLineCode

public static final int newLineCode
See Also:
Constant Field Values

tabCode

public static final int tabCode
See Also:
Constant Field Values

ctrlaCode

public static final int ctrlaCode
See Also:
Constant Field Values

INDENT

public static final String INDENT
See Also:
Constant Field Values

nullStringStorage

public static String nullStringStorage

nullStringOutput

public static String nullStringOutput

randGen

public static Random randGen

NSTR

public static final String NSTR
See Also:
Constant Field Values

getInputSummaryLock

public static Object getInputSummaryLock

suffix

public static String suffix

sqlEscapeChar

public static final char sqlEscapeChar
See Also:
Constant Field Values
Method Detail

clearMapRedWork

public static void clearMapRedWork(org.apache.hadoop.conf.Configuration job)

getMapRedWork

public static MapredWork getMapRedWork(org.apache.hadoop.conf.Configuration job)

setWorkflowAdjacencies

public static void setWorkflowAdjacencies(org.apache.hadoop.conf.Configuration conf,
                                          QueryPlan plan)

getFieldSchemaString

public static List<String> getFieldSchemaString(List<FieldSchema> fl)

setMapRedWork

public static void setMapRedWork(org.apache.hadoop.conf.Configuration job,
                                 MapredWork w,
                                 String hiveScratchDir)

getHiveJobID

public static String getHiveJobID(org.apache.hadoop.conf.Configuration job)

serializeExpression

public static String serializeExpression(ExprNodeDesc expr)

deserializeExpression

public static ExprNodeDesc deserializeExpression(String s,
                                                 org.apache.hadoop.conf.Configuration conf)

serializeTasks

public static void serializeTasks(Task<? extends Serializable> t,
                                  OutputStream out)
Serialize a single Task.


serializeQueryPlan

public static void serializeQueryPlan(QueryPlan plan,
                                      OutputStream out)
Serialize the whole query plan.


deserializeQueryPlan

public static QueryPlan deserializeQueryPlan(InputStream in,
                                             org.apache.hadoop.conf.Configuration conf)
Deserialize the whole query plan.


serializeMapRedWork

public static void serializeMapRedWork(MapredWork w,
                                       OutputStream out)
Serialize the mapredWork object to an output stream. DO NOT use this to write to standard output since it closes the output stream. DO USE mapredWork.toXML() instead.


deserializeMapRedWork

public static MapredWork deserializeMapRedWork(InputStream in,
                                               org.apache.hadoop.conf.Configuration conf)

serializeMapRedLocalWork

public static void serializeMapRedLocalWork(MapredLocalWork w,
                                            OutputStream out)
Serialize the mapredLocalWork object to an output stream. DO NOT use this to write to standard output since it closes the output stream. DO USE mapredWork.toXML() instead.


deserializeMapRedLocalWork

public static MapredLocalWork deserializeMapRedLocalWork(InputStream in,
                                                         org.apache.hadoop.conf.Configuration conf)

getTaskId

public static String getTaskId(org.apache.hadoop.conf.Configuration hconf)
Gets the task id if we are running as a Hadoop job. Gets a random number otherwise.


makeMap

public static HashMap makeMap(Object... olist)

makeProperties

public static Properties makeProperties(String... olist)

makeList

public static ArrayList makeList(Object... olist)

getTableDesc

public static TableDesc getTableDesc(Table tbl)

getTableDesc

public static TableDesc getTableDesc(String cols,
                                     String colTypes)

getPartitionDesc

public static PartitionDesc getPartitionDesc(Partition part)
                                      throws HiveException
Throws:
HiveException

getPartitionDescFromTableDesc

public static PartitionDesc getPartitionDescFromTableDesc(TableDesc tblDesc,
                                                          Partition part)
                                                   throws HiveException
Throws:
HiveException

addMapWork

public static void addMapWork(MapredWork mr,
                              Table tbl,
                              String alias,
                              Operator<?> work)

getOpTreeSkel

public static String getOpTreeSkel(Operator<?> op)

contentsEqual

public static boolean contentsEqual(InputStream is1,
                                    InputStream is2,
                                    boolean ignoreWhitespace)
                             throws IOException
Throws:
IOException

abbreviate

public static String abbreviate(String str,
                                int max)
convert "From src insert blah blah" to "From src insert ... blah"


readColumn

public static Utilities.StreamStatus readColumn(DataInput in,
                                                OutputStream out)
                                         throws IOException
Throws:
IOException

createCompressedStream

public static OutputStream createCompressedStream(org.apache.hadoop.mapred.JobConf jc,
                                                  OutputStream out)
                                           throws IOException
Convert an output stream to a compressed output stream based on codecs and compression options specified in the Job Configuration.

Parameters:
jc - Job Configuration
out - Output Stream to be converted into compressed output stream
Returns:
compressed output stream
Throws:
IOException

createCompressedStream

public static OutputStream createCompressedStream(org.apache.hadoop.mapred.JobConf jc,
                                                  OutputStream out,
                                                  boolean isCompressed)
                                           throws IOException
Convert an output stream to a compressed output stream based on codecs codecs in the Job Configuration. Caller specifies directly whether file is compressed or not

Parameters:
jc - Job Configuration
out - Output Stream to be converted into compressed output stream
isCompressed - whether the output stream needs to be compressed or not
Returns:
compressed output stream
Throws:
IOException

getFileExtension

@Deprecated
public static String getFileExtension(org.apache.hadoop.mapred.JobConf jc,
                                                 boolean isCompressed)
Deprecated. Use getFileExtension(JobConf, boolean, HiveOutputFormat)

Based on compression option and configured output codec - get extension for output file. This is only required for text files - not sequencefiles

Parameters:
jc - Job Configuration
isCompressed - Whether the output file is compressed or not
Returns:
the required file extension (example: .gz)

getFileExtension

public static String getFileExtension(org.apache.hadoop.mapred.JobConf jc,
                                      boolean isCompressed,
                                      HiveOutputFormat<?,?> hiveOutputFormat)
Based on compression option, output format, and configured output codec - get extension for output file. Text files require an extension, whereas others, like sequence files, do not.

The property hive.output.file.extension is used to determine the extension - if set, it will override other logic for choosing an extension.

Parameters:
jc - Job Configuration
isCompressed - Whether the output file is compressed or not
hiveOutputFormat - The output format, used to detect if the format is text
Returns:
the required file extension (example: .gz)

createSequenceWriter

public static org.apache.hadoop.io.SequenceFile.Writer createSequenceWriter(org.apache.hadoop.mapred.JobConf jc,
                                                                            org.apache.hadoop.fs.FileSystem fs,
                                                                            org.apache.hadoop.fs.Path file,
                                                                            Class<?> keyClass,
                                                                            Class<?> valClass)
                                                                     throws IOException
Create a sequencefile output stream based on job configuration.

Parameters:
jc - Job configuration
fs - File System to create file in
file - Path to be created
keyClass - Java Class for key
valClass - Java Class for value
Returns:
output stream over the created sequencefile
Throws:
IOException

createSequenceWriter

public static org.apache.hadoop.io.SequenceFile.Writer createSequenceWriter(org.apache.hadoop.mapred.JobConf jc,
                                                                            org.apache.hadoop.fs.FileSystem fs,
                                                                            org.apache.hadoop.fs.Path file,
                                                                            Class<?> keyClass,
                                                                            Class<?> valClass,
                                                                            boolean isCompressed)
                                                                     throws IOException
Create a sequencefile output stream based on job configuration Uses user supplied compression flag (rather than obtaining it from the Job Configuration).

Parameters:
jc - Job configuration
fs - File System to create file in
file - Path to be created
keyClass - Java Class for key
valClass - Java Class for value
Returns:
output stream over the created sequencefile
Throws:
IOException

createRCFileWriter

public static RCFile.Writer createRCFileWriter(org.apache.hadoop.mapred.JobConf jc,
                                               org.apache.hadoop.fs.FileSystem fs,
                                               org.apache.hadoop.fs.Path file,
                                               boolean isCompressed)
                                        throws IOException
Create a RCFile output stream based on job configuration Uses user supplied compression flag (rather than obtaining it from the Job Configuration).

Parameters:
jc - Job configuration
fs - File System to create file in
file - Path to be created
Returns:
output stream over the created rcfile
Throws:
IOException

realFile

public static String realFile(String newFile,
                              org.apache.hadoop.conf.Configuration conf)
                       throws IOException
Shamelessly cloned from GenericOptionsParser.

Throws:
IOException

mergeUniqElems

public static List<String> mergeUniqElems(List<String> src,
                                          List<String> dest)

toTaskTempPath

public static org.apache.hadoop.fs.Path toTaskTempPath(org.apache.hadoop.fs.Path orig)

toTaskTempPath

public static org.apache.hadoop.fs.Path toTaskTempPath(String orig)

toTempPath

public static org.apache.hadoop.fs.Path toTempPath(org.apache.hadoop.fs.Path orig)

toTempPath

public static org.apache.hadoop.fs.Path toTempPath(String orig)
Given a path, convert to a temporary path.


isTempPath

public static boolean isTempPath(org.apache.hadoop.fs.FileStatus file)
Detect if the supplied file is a temporary path.


rename

public static void rename(org.apache.hadoop.fs.FileSystem fs,
                          org.apache.hadoop.fs.Path src,
                          org.apache.hadoop.fs.Path dst)
                   throws IOException,
                          HiveException
Rename src to dst, or in the case dst already exists, move files in src to dst. If there is an existing file with the same name, the new file's name will be appended with "_1", "_2", etc.

Parameters:
fs - the FileSystem where src and dst are on.
src - the src directory
dst - the target directory
Throws:
IOException
HiveException

renameOrMoveFiles

public static void renameOrMoveFiles(org.apache.hadoop.fs.FileSystem fs,
                                     org.apache.hadoop.fs.Path src,
                                     org.apache.hadoop.fs.Path dst)
                              throws IOException,
                                     HiveException
Rename src to dst, or in the case dst already exists, move files in src to dst. If there is an existing file with the same name, the new file's name will be appended with "_1", "_2", etc.

Parameters:
fs - the FileSystem where src and dst are on.
src - the src directory
dst - the target directory
Throws:
IOException
HiveException

getTaskIdFromFilename

public static String getTaskIdFromFilename(String filename)
Get the task id from the filename. It is assumed that the filename is derived from the output of getTaskId

Parameters:
filename - filename to extract taskid from

getPrefixedTaskIdFromFilename

public static String getPrefixedTaskIdFromFilename(String filename)
Get the part-spec + task id from the filename. It is assumed that the filename is derived from the output of getTaskId

Parameters:
filename - filename to extract taskid from

getFileNameFromDirName

public static String getFileNameFromDirName(String dirName)

replaceTaskIdFromFilename

public static String replaceTaskIdFromFilename(String filename,
                                               int bucketNum)
Replace the task id from the filename. It is assumed that the filename is derived from the output of getTaskId

Parameters:
filename - filename to replace taskid "0_0" or "0_0.gz" by 33 to "33_0" or "33_0.gz"

replaceTaskIdFromFilename

public static String replaceTaskIdFromFilename(String filename,
                                               String fileId)

getFileStatusRecurse

public static org.apache.hadoop.fs.FileStatus[] getFileStatusRecurse(org.apache.hadoop.fs.Path path,
                                                                     int level,
                                                                     org.apache.hadoop.fs.FileSystem fs)
                                                              throws IOException
Get all file status from a root path and recursively go deep into certain levels.

Parameters:
path - the root path
level - the depth of directory should explore
fs - the file system
Returns:
array of FileStatus
Throws:
IOException

mvFileToFinalPath

public static void mvFileToFinalPath(String specPath,
                                     org.apache.hadoop.conf.Configuration hconf,
                                     boolean success,
                                     org.apache.commons.logging.Log log,
                                     DynamicPartitionCtx dpCtx,
                                     FileSinkDesc conf,
                                     org.apache.hadoop.mapred.Reporter reporter)
                              throws IOException,
                                     HiveException
Throws:
IOException
HiveException

removeTempOrDuplicateFiles

public static void removeTempOrDuplicateFiles(org.apache.hadoop.fs.FileSystem fs,
                                              org.apache.hadoop.fs.Path path)
                                       throws IOException
Remove all temporary files and duplicate (double-committed) files from a given directory.

Throws:
IOException

removeTempOrDuplicateFiles

public static ArrayList<String> removeTempOrDuplicateFiles(org.apache.hadoop.fs.FileSystem fs,
                                                           org.apache.hadoop.fs.Path path,
                                                           DynamicPartitionCtx dpCtx)
                                                    throws IOException
Remove all temporary files and duplicate (double-committed) files from a given directory.

Returns:
a list of path names corresponding to should-be-created empty buckets.
Throws:
IOException

removeTempOrDuplicateFiles

public static HashMap<String,org.apache.hadoop.fs.FileStatus> removeTempOrDuplicateFiles(org.apache.hadoop.fs.FileStatus[] items,
                                                                                         org.apache.hadoop.fs.FileSystem fs)
                                                                                  throws IOException
Throws:
IOException

getNameMessage

public static String getNameMessage(Exception e)

getResourceFiles

public static String getResourceFiles(org.apache.hadoop.conf.Configuration conf,
                                      SessionState.ResourceType t)

addToClassPath

public static ClassLoader addToClassPath(ClassLoader cloader,
                                         String[] newPaths)
                                  throws Exception
Add new elements to the classpath.

Parameters:
newPaths - Array of classpath elements
Throws:
Exception

removeFromClassPath

public static void removeFromClassPath(String[] pathsToRemove)
                                throws Exception
remove elements from the classpath.

Parameters:
pathsToRemove - Array of classpath elements
Throws:
Exception

formatBinaryString

public static String formatBinaryString(byte[] array,
                                        int start,
                                        int length)

getColumnNamesFromSortCols

public static List<String> getColumnNamesFromSortCols(List<Order> sortCols)

getColumnNamesFromFieldSchema

public static List<String> getColumnNamesFromFieldSchema(List<FieldSchema> partCols)

getColumnNames

public static List<String> getColumnNames(Properties props)

getColumnTypes

public static List<String> getColumnTypes(Properties props)

validateColumnNames

public static void validateColumnNames(List<String> colNames,
                                       List<String> checkCols)
                                throws SemanticException
Throws:
SemanticException

getDefaultNotificationInterval

public static int getDefaultNotificationInterval(org.apache.hadoop.conf.Configuration hconf)
Gets the default notification interval to send progress updates to the tracker. Useful for operators that may not output data for a while.

Parameters:
hconf -
Returns:
the interval in milliseconds

copyTableJobPropertiesToConf

public static void copyTableJobPropertiesToConf(TableDesc tbl,
                                                org.apache.hadoop.mapred.JobConf job)
Copies the storage handler properties configured for a table descriptor to a runtime job configuration.

Parameters:
tbl - table descriptor from which to read
job - configuration which receives configured properties

getInputSummary

public static org.apache.hadoop.fs.ContentSummary getInputSummary(Context ctx,
                                                                  MapredWork work,
                                                                  org.apache.hadoop.fs.PathFilter filter)
                                                           throws IOException
Calculate the total size of input files.

Parameters:
ctx - the hadoop job context
work - map reduce job plan
filter - filter to apply to the input paths before calculating size
Returns:
the summary of all the input paths.
Throws:
IOException

isEmptyPath

public static boolean isEmptyPath(org.apache.hadoop.mapred.JobConf job,
                                  String dirPath,
                                  Context ctx)
                           throws Exception
Throws:
Exception

isEmptyPath

public static boolean isEmptyPath(org.apache.hadoop.mapred.JobConf job,
                                  org.apache.hadoop.fs.Path dirPath)
                           throws Exception
Throws:
Exception

getMRTasks

public static List<ExecDriver> getMRTasks(List<Task<? extends Serializable>> tasks)

supportCombineFileInputFormat

public static boolean supportCombineFileInputFormat()

getFullDPSpecs

public static List<LinkedHashMap<String,String>> getFullDPSpecs(org.apache.hadoop.conf.Configuration conf,
                                                                DynamicPartitionCtx dpCtx)
                                                         throws HiveException
Construct a list of full partition spec from Dynamic Partition Context and the directory names corresponding to these dynamic partitions.

Throws:
HiveException

getStatsPublisher

public static StatsPublisher getStatsPublisher(org.apache.hadoop.mapred.JobConf jc)

setColumnNameList

public static void setColumnNameList(org.apache.hadoop.mapred.JobConf jobConf,
                                     Operator op)

setColumnTypeList

public static void setColumnTypeList(org.apache.hadoop.mapred.JobConf jobConf,
                                     Operator op)

validatePartSpec

public static void validatePartSpec(Table tbl,
                                    Map<String,String> partSpec)
                             throws SemanticException
Throws:
SemanticException

generatePath

public static String generatePath(String baseURI,
                                  String dumpFilePrefix,
                                  Byte tag,
                                  String bigBucketFileName)

generateFileName

public static String generateFileName(Byte tag,
                                      String bigBucketFileName)

generateTmpURI

public static String generateTmpURI(String baseURI,
                                    String id)

generateTarURI

public static String generateTarURI(String baseURI,
                                    String filename)

generateTarURI

public static String generateTarURI(org.apache.hadoop.fs.Path baseURI,
                                    String filename)

generateTarFileName

public static String generateTarFileName(String name)

generatePath

public static String generatePath(org.apache.hadoop.fs.Path baseURI,
                                  String filename)

now

public static String now()

showTime

public static double showTime(long time)

checkJDOPushDown

public static String checkJDOPushDown(Table tab,
                                      ExprNodeDesc expr)
Check if the partition pruning expression can be pushed down to JDO filtering. The partition expression contains only partition columns. The criteria that an expression can be pushed down are that: 1) the expression only contains function specified in supportedJDOFuncs(). Now only {=, AND, OR} can be pushed down. 2) the partition column type and the constant type have to be String. This is restriction by the current JDO filtering implementation.

Parameters:
tab - The table that contains the partition columns.
expr - the partition pruning expression
Returns:
null if the partition pruning expression can be pushed down to JDO filtering.

reworkMapRedWork

public static void reworkMapRedWork(Task<? extends Serializable> task,
                                    boolean reworkMapredWork,
                                    HiveConf conf)
                             throws SemanticException
The check here is kind of not clean. It first use a for loop to go through all input formats, and choose the ones that extend ReworkMapredInputFormat to a set. And finally go through the ReworkMapredInputFormat set, and call rework for each one. Technically all these can be avoided if all Hive's input formats can share a same interface. As in today's hive and Hadoop, it is not possible because a lot of Hive's input formats are in Hadoop's code. And most of Hadoop's input formats just extend InputFormat interface.

Parameters:
task -
reworkMapredWork -
conf -
Throws:
SemanticException

executeWithRetry

public static <T> T executeWithRetry(Utilities.SQLCommand<T> cmd,
                                     PreparedStatement stmt,
                                     int baseWindow,
                                     int maxRetries)
                          throws SQLException
Retry SQL execution with random backoff (same as the one implemented in HDFS-767). This function only retries when the SQL query throws a SQLTransientException (which might be able to succeed with a simple retry). It doesn't retry when the exception is a SQLRecoverableException or SQLNonTransientException. For SQLRecoverableException the caller needs to reconnect to the database and restart the whole transaction.

Parameters:
cmd - the SQL command
stmt - the prepared statement of SQL.
baseWindow - The base time window (in milliseconds) before the next retry. see getRandomWaitTime(int, int, java.util.Random) for details.
maxRetries - the maximum # of retries when getting a SQLTransientException.
Throws:
SQLException - throws SQLRecoverableException or SQLNonTransientException the first time it is caught, or SQLTransientException when the maxRetries has reached.

connectWithRetry

public static Connection connectWithRetry(String connectionString,
                                          int waitWindow,
                                          int maxRetries)
                                   throws SQLException
Retry connecting to a database with random backoff (same as the one implemented in HDFS-767). This function only retries when the SQL query throws a SQLTransientException (which might be able to succeed with a simple retry). It doesn't retry when the exception is a SQLRecoverableException or SQLNonTransientException. For SQLRecoverableException the caller needs to reconnect to the database and restart the whole transaction.

Parameters:
connectionString - the JDBC connection string.
waitWindow - The base time window (in milliseconds) before the next retry. see getRandomWaitTime(int, int, java.util.Random) for details.
maxRetries - the maximum # of retries when getting a SQLTransientException.
Throws:
SQLException - throws SQLRecoverableException or SQLNonTransientException the first time it is caught, or SQLTransientException when the maxRetries has reached.

prepareWithRetry

public static PreparedStatement prepareWithRetry(Connection conn,
                                                 String stmt,
                                                 int waitWindow,
                                                 int maxRetries)
                                          throws SQLException
Retry preparing a SQL statement with random backoff (same as the one implemented in HDFS-767). This function only retries when the SQL query throws a SQLTransientException (which might be able to succeed with a simple retry). It doesn't retry when the exception is a SQLRecoverableException or SQLNonTransientException. For SQLRecoverableException the caller needs to reconnect to the database and restart the whole transaction.

Parameters:
conn - a JDBC connection.
stmt - the SQL statement to be prepared.
waitWindow - The base time window (in milliseconds) before the next retry. see getRandomWaitTime(int, int, java.util.Random) for details.
maxRetries - the maximum # of retries when getting a SQLTransientException.
Throws:
SQLException - throws SQLRecoverableException or SQLNonTransientException the first time it is caught, or SQLTransientException when the maxRetries has reached.

getRandomWaitTime

public static long getRandomWaitTime(int baseWindow,
                                     int failures,
                                     Random r)
Introducing a random factor to the wait time before another retry. The wait time is dependent on # of failures and a random factor. At the first time of getting an exception , the wait time is a random number between 0..baseWindow msec. If the first retry still fails, we will wait baseWindow msec grace period before the 2nd retry. Also at the second retry, the waiting window is expanded to 2*baseWindow msec alleviating the request rate from the server. Similarly the 3rd retry will wait 2*baseWindow msec. grace period before retry and the waiting window is expanded to 3*baseWindow msec and so on.

Parameters:
baseWindow - the base waiting window.
failures - number of failures so far.
r - a random generator.
Returns:
number of milliseconds for the next wait time.

escapeSqlLike

public static String escapeSqlLike(String key)
Escape the '_', '%', as well as the escape characters inside the string key.

Parameters:
key - the string that will be used for the SQL LIKE operator.
Returns:
a string with escaped '_' and '%'.

formatMsecToStr

public static String formatMsecToStr(long msec)
Format number of milliseconds to strings

Parameters:
msec - milliseconds
Returns:
a formatted string like "x days y hours z minutes a seconds b msec"

getBuiltinUtilsClass

public static Class getBuiltinUtilsClass()
                                  throws ClassNotFoundException
Throws:
ClassNotFoundException


Copyright © 2011 The Apache Software Foundation