public class WorkerMonitor extends java.lang.Object implements MessageHandler
It gets worker state changes either from workers directly with protocol messages or from ZooKeeper server
It handles Job Master to Dashboard communications It handles Job Master to Driver interactions
Constructor and Description |
---|
WorkerMonitor(JobMaster jobMaster,
RRServer rrServer,
DashboardClient dashClient,
ZKJobUpdater zkJobUpdater,
JobAPI.Job job,
IDriver driver,
IWorkerFailureListener failureListener) |
Modifier and Type | Method and Description |
---|---|
boolean |
addJoinedWorkers(java.util.List<WorkerWithState> joinedWorkers)
when the job master restarts, it adds already joined workers with this method.
|
boolean |
allWorkersJoined()
if all workers are in one of these states: STARTED, RESTARTED or COMPLETED.
|
boolean |
broadcastMessage(Message message) |
void |
completed(int workerID)
called when a worker COMPLETED the job
|
boolean |
existWorker(int workerID)
return true if there is a worker with the given workerID
|
void |
failed(int workerID)
called when a worker FAILED
|
void |
fullyFailed(int workerID) |
int |
getNumberOfWorkers() |
java.util.List<java.lang.Integer> |
getWorkerIDs()
get the list of workerIDs sorted
|
JobMasterAPI.WorkerInfo |
getWorkerInfoForID(int id) |
java.util.List<JobMasterAPI.WorkerInfo> |
getWorkerInfoList()
get the list of workerIDs sorted
|
java.util.Collection<WorkerWithState> |
getWorkerList()
get the list of currently registered workers
|
int |
getWorkersListSize()
get the size of workers list
|
JobMasterAPI.WorkerState |
getWorkerState(int workerID) |
void |
informDriverForAllJoined()
inform the driver on restarts if all workers already joined
|
boolean |
isAllJoined() |
void |
onMessage(RequestID id,
int workerId,
Message message)
Handles a specific message type
|
void |
restarted(WorkerWithState workerWithState)
if the worker is coming from failure
|
boolean |
sendMessageToWorkerList(Message message,
java.util.List<java.lang.Integer> workerList)
send a protocol buffer message to a list of workers
|
void |
setWorkerEventSender(IWorkerEventSender workerEventSender) |
void |
started(WorkerWithState workerWithState)
new worker joins for the first time
returns null if the join is successful,
otherwise, it returns an explanation for the failure
|
void |
workersScaledDown(int instancesRemoved) |
void |
workersScaledUp(int instancesAdded) |
public WorkerMonitor(JobMaster jobMaster, RRServer rrServer, DashboardClient dashClient, ZKJobUpdater zkJobUpdater, JobAPI.Job job, IDriver driver, IWorkerFailureListener failureListener)
public void setWorkerEventSender(IWorkerEventSender workerEventSender)
public int getNumberOfWorkers()
public int getWorkersListSize()
public java.util.Collection<WorkerWithState> getWorkerList()
public JobMasterAPI.WorkerInfo getWorkerInfoForID(int id)
public JobMasterAPI.WorkerState getWorkerState(int workerID)
public java.util.List<java.lang.Integer> getWorkerIDs()
public java.util.List<JobMasterAPI.WorkerInfo> getWorkerInfoList()
public boolean existWorker(int workerID)
public boolean isAllJoined()
public void onMessage(RequestID id, int workerId, Message message)
MessageHandler
onMessage
in interface MessageHandler
id
- request idworkerId
- worker idmessage
- protbuf messagepublic void started(WorkerWithState workerWithState)
public void restarted(WorkerWithState workerWithState)
public void completed(int workerID)
public void failed(int workerID)
public void fullyFailed(int workerID)
public void workersScaledDown(int instancesRemoved)
public void workersScaledUp(int instancesAdded)
public boolean addJoinedWorkers(java.util.List<WorkerWithState> joinedWorkers)
public void informDriverForAllJoined()
public boolean broadcastMessage(Message message)
public boolean sendMessageToWorkerList(Message message, java.util.List<java.lang.Integer> workerList)
public boolean allWorkersJoined()
This is used to send allWorkersJoined message We omit started but failed workers We include started but completed workers