org.apache.hadoop.hive.ql.exec
Class Task<T extends Serializable>

java.lang.Object
  extended by org.apache.hadoop.hive.ql.exec.Task<T>
All Implemented Interfaces:
Serializable, Node
Direct Known Subclasses:
BlockMergeTask, ColumnStatsTask, ConditionalTask, CopyTask, DDLTask, DependencyCollectionTask, ExecDriver, ExplainTask, FetchTask, FunctionTask, IndexMetadataChangeTask, MapredLocalTask, MoveTask, StatsTask

public abstract class Task<T extends Serializable>
extends Object
implements Serializable, Node

Task implementation.

See Also:
Serialized Form

Nested Class Summary
static class Task.FeedType
           
 
Field Summary
static int BACKUP_COMMON_JOIN
           
protected  List<Task<? extends Serializable>> backupChildrenTasks
           
protected  Task<? extends Serializable> backupTask
           
protected  List<Task<? extends Serializable>> childTasks
           
protected  boolean clonedConf
           
static int COMMON_JOIN
           
protected  HiveConf conf
           
protected  SessionState.LogHelper console
           
static int CONVERTED_LOCAL_MAPJOIN
           
static int CONVERTED_MAPJOIN
           
protected  Hive db
           
protected  DriverContext driverContext
           
protected  List<Task<? extends Serializable>> feedSubscribers
           
protected  String id
           
protected  boolean initialized
           
protected  boolean isdone
           
protected  String jobID
           
static int LOCAL_MAPJOIN
           
protected static org.apache.commons.logging.Log LOG
           
static int NO_TAG
           
protected  List<Task<? extends Serializable>> parentTasks
           
protected  QueryPlan queryPlan
           
protected  boolean queued
           
protected  boolean started
           
protected  HashMap<String,Long> taskCounters
           
protected  TaskHandle taskHandle
           
protected  int taskTag
           
protected  T work
           
 
Constructor Summary
Task()
           
 
Method Summary
 boolean addDependentTask(Task<? extends Serializable> dependent)
          Add a dependent task on the current task.
protected  void cloneConf()
           
 boolean done()
           
protected abstract  int execute(DriverContext driverContext)
          This method is overridden in each Task.
 int executeTask()
          This method is called in the Driver on every task.
 boolean fetch(ArrayList<String> res)
           
 Task<? extends Serializable> getAndInitBackupTask()
           
 List<Task<? extends Serializable>> getBackupChildrenTasks()
           
 Task<? extends Serializable> getBackupTask()
           
 List<? extends Node> getChildren()
          Gets the vector of children nodes.
 List<Task<? extends Serializable>> getChildTasks()
           
 HashMap<String,Long> getCounters()
           
 List<Task<? extends Serializable>> getDependentTasks()
          The default dependent tasks are just child tasks, but different types could implement their own (e.g.
 List<Task<? extends Serializable>> getFeedSubscribers()
           
 String getId()
           
 boolean getInitialized()
           
 String getJobID()
           
 List<Task<? extends Serializable>> getParentTasks()
           
 QueryPlan getQueryPlan()
           
 boolean getQueued()
           
 Operator<? extends OperatorDesc> getReducer()
           
 List<FieldSchema> getResultSchema()
           
 int getTaskTag()
           
 Collection<Operator<? extends OperatorDesc>> getTopOperators()
           
abstract  StageType getType()
          Should be overridden to return the type of the specific task among the types in StageType.
 T getWork()
           
 boolean hasReduce()
           
 boolean ifRetryCmdWhenFail()
           
 void initialize(HiveConf conf, QueryPlan queryPlan, DriverContext driverContext)
           
 boolean isLocalMode()
           
 boolean isMapRedLocalTask()
           
 boolean isMapRedTask()
           
 boolean isRunnable()
           
 void localizeMRTmpFiles(Context ctx)
          Localize a task tree
protected abstract  void localizeMRTmpFilesImpl(Context ctx)
          If this task uses any map-reduce intermediate data (either for reading or for writing), localize them (using the supplied Context).
protected  void pushFeed(Task.FeedType feedType, Object feedValue)
           
protected  void receiveFeed(Task.FeedType feedType, Object feedValue)
           
 void removeDependentTask(Task<? extends Serializable> dependent)
          Remove the dependent task.
 void removeFromChildrenTasks()
           
 boolean requireLock()
           
 void setBackupChildrenTasks(List<Task<? extends Serializable>> backupChildrenTasks)
           
 void setBackupTask(Task<? extends Serializable> backupTask)
           
 void setChildTasks(List<Task<? extends Serializable>> childTasks)
           
 void setDone()
           
 void setFeedSubscribers(List<Task<? extends Serializable>> s)
           
 void setId(String id)
           
 void setInitialized()
           
 void setLocalMode(boolean isLocalMode)
           
 void setParentTasks(List<Task<? extends Serializable>> parentTasks)
           
 void setQueryPlan(QueryPlan queryPlan)
           
 void setQueued()
           
 void setRetryCmdWhenFail(boolean retryCmdWhenFail)
           
 void setStarted()
           
 void setTaskTag(int taskTag)
           
 void setWork(T work)
           
 boolean started()
           
 void subscribeFeed(Task<? extends Serializable> publisher)
          Subscribe the feed of publisher.
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 
Methods inherited from interface org.apache.hadoop.hive.ql.lib.Node
getName
 

Field Detail

started

protected transient boolean started

initialized

protected transient boolean initialized

isdone

protected transient boolean isdone

queued

protected transient boolean queued

conf

protected transient HiveConf conf

db

protected transient Hive db

console

protected transient SessionState.LogHelper console

queryPlan

protected transient QueryPlan queryPlan

taskHandle

protected transient TaskHandle taskHandle

taskCounters

protected transient HashMap<String,Long> taskCounters

driverContext

protected transient DriverContext driverContext

clonedConf

protected transient boolean clonedConf

jobID

protected transient String jobID

backupTask

protected Task<? extends Serializable> backupTask

backupChildrenTasks

protected List<Task<? extends Serializable>> backupChildrenTasks

LOG

protected static transient org.apache.commons.logging.Log LOG

taskTag

protected int taskTag

NO_TAG

public static final int NO_TAG
See Also:
Constant Field Values

COMMON_JOIN

public static final int COMMON_JOIN
See Also:
Constant Field Values

CONVERTED_MAPJOIN

public static final int CONVERTED_MAPJOIN
See Also:
Constant Field Values

CONVERTED_LOCAL_MAPJOIN

public static final int CONVERTED_LOCAL_MAPJOIN
See Also:
Constant Field Values

BACKUP_COMMON_JOIN

public static final int BACKUP_COMMON_JOIN
See Also:
Constant Field Values

LOCAL_MAPJOIN

public static final int LOCAL_MAPJOIN
See Also:
Constant Field Values

feedSubscribers

protected transient List<Task<? extends Serializable>> feedSubscribers

id

protected String id

work

protected T extends Serializable work

childTasks

protected List<Task<? extends Serializable>> childTasks

parentTasks

protected List<Task<? extends Serializable>> parentTasks
Constructor Detail

Task

public Task()
Method Detail

initialize

public void initialize(HiveConf conf,
                       QueryPlan queryPlan,
                       DriverContext driverContext)

executeTask

public int executeTask()
This method is called in the Driver on every task. It updates counters and calls execute(), which is overridden in each task

Returns:
return value of execute()

execute

protected abstract int execute(DriverContext driverContext)
This method is overridden in each Task. TODO execute should return a TaskHandle.

Returns:
status of executing the task

fetch

public boolean fetch(ArrayList<String> res)
              throws IOException,
                     CommandNeedRetryException
Throws:
IOException
CommandNeedRetryException

setChildTasks

public void setChildTasks(List<Task<? extends Serializable>> childTasks)

getChildren

public List<? extends Node> getChildren()
Description copied from interface: Node
Gets the vector of children nodes. This is used in the graph walker algorithms.

Specified by:
getChildren in interface Node
Returns:
List

getChildTasks

public List<Task<? extends Serializable>> getChildTasks()

setParentTasks

public void setParentTasks(List<Task<? extends Serializable>> parentTasks)

getParentTasks

public List<Task<? extends Serializable>> getParentTasks()

getBackupTask

public Task<? extends Serializable> getBackupTask()

setBackupTask

public void setBackupTask(Task<? extends Serializable> backupTask)

getBackupChildrenTasks

public List<Task<? extends Serializable>> getBackupChildrenTasks()

setBackupChildrenTasks

public void setBackupChildrenTasks(List<Task<? extends Serializable>> backupChildrenTasks)

getAndInitBackupTask

public Task<? extends Serializable> getAndInitBackupTask()

removeFromChildrenTasks

public void removeFromChildrenTasks()

getDependentTasks

public List<Task<? extends Serializable>> getDependentTasks()
The default dependent tasks are just child tasks, but different types could implement their own (e.g. ConditionalTask will use the listTasks as dependents).

Returns:
a list of tasks that are dependent on this task.

addDependentTask

public boolean addDependentTask(Task<? extends Serializable> dependent)
Add a dependent task on the current task. Return if the dependency already existed or is this a new one

Returns:
true if the task got added false if it already existed

removeDependentTask

public void removeDependentTask(Task<? extends Serializable> dependent)
Remove the dependent task.

Parameters:
dependent - the task to remove

setStarted

public void setStarted()

started

public boolean started()

done

public boolean done()

setDone

public void setDone()

setQueued

public void setQueued()

getQueued

public boolean getQueued()

setInitialized

public void setInitialized()

getInitialized

public boolean getInitialized()

isRunnable

public boolean isRunnable()

setWork

public void setWork(T work)

getWork

public T getWork()

setId

public void setId(String id)

getId

public String getId()

isMapRedTask

public boolean isMapRedTask()

isMapRedLocalTask

public boolean isMapRedLocalTask()

getTopOperators

public Collection<Operator<? extends OperatorDesc>> getTopOperators()

hasReduce

public boolean hasReduce()

getReducer

public Operator<? extends OperatorDesc> getReducer()

getCounters

public HashMap<String,Long> getCounters()

getType

public abstract StageType getType()
Should be overridden to return the type of the specific task among the types in StageType.

Returns:
StageType.* or null if not overridden

localizeMRTmpFilesImpl

protected abstract void localizeMRTmpFilesImpl(Context ctx)
If this task uses any map-reduce intermediate data (either for reading or for writing), localize them (using the supplied Context). Map-Reduce intermediate directories are allocated using Context.getMRTmpFileURI() and can be localized using localizeMRTmpFileURI(). This method is declared abstract to force any task code to explicitly deal with this aspect of execution.

Parameters:
ctx - context object with which to localize

localizeMRTmpFiles

public final void localizeMRTmpFiles(Context ctx)
Localize a task tree

Parameters:
ctx - context object with which to localize

subscribeFeed

public void subscribeFeed(Task<? extends Serializable> publisher)
Subscribe the feed of publisher. To prevent cycles, a task can only subscribe to its ancestor. Feed is a generic form of execution-time feedback (type, value) pair from one task to another task. Examples include dynamic partitions (which are only available at execution time). The MoveTask may pass the list of dynamic partitions to the StatsTask since after the MoveTask the list of dynamic partitions are lost (MoveTask moves them to the table's destination directory which is mixed with old partitions).

Parameters:
publisher - this feed provider.

getFeedSubscribers

public List<Task<? extends Serializable>> getFeedSubscribers()

setFeedSubscribers

public void setFeedSubscribers(List<Task<? extends Serializable>> s)

pushFeed

protected void pushFeed(Task.FeedType feedType,
                        Object feedValue)

receiveFeed

protected void receiveFeed(Task.FeedType feedType,
                           Object feedValue)

cloneConf

protected void cloneConf()

getTaskTag

public int getTaskTag()

setTaskTag

public void setTaskTag(int taskTag)

isLocalMode

public boolean isLocalMode()

setLocalMode

public void setLocalMode(boolean isLocalMode)

requireLock

public boolean requireLock()

ifRetryCmdWhenFail

public boolean ifRetryCmdWhenFail()

setRetryCmdWhenFail

public void setRetryCmdWhenFail(boolean retryCmdWhenFail)

getQueryPlan

public QueryPlan getQueryPlan()

setQueryPlan

public void setQueryPlan(QueryPlan queryPlan)

getJobID

public String getJobID()

getResultSchema

public List<FieldSchema> getResultSchema()


Copyright © 2013 The Apache Software Foundation