public class ZKWorkerController extends java.lang.Object implements IWorkerController
Modifier and Type | Field and Description |
---|---|
static java.util.logging.Logger |
LOG |
Constructor and Description |
---|
ZKWorkerController(Config config,
java.lang.String jobName,
java.lang.String workerIpAndPort,
int numberOfWorkers,
NodeInfo nodeInfo,
JobAPI.ComputeResource computeResource) |
Modifier and Type | Method and Description |
---|---|
void |
close()
close the children cache
close persistent node for this worker
close the connection
if this is the last worker to complete, delete all relevant znode for this job
|
java.util.List<WorkerInfo> |
getAllWorkers()
wait to make sure that the number of workers reached the total number of workers in the job
return all joined workers in the job including the ones that have already left
return null if the timeLimit is reached or en exception is thrown while waiting
|
java.util.List<WorkerInfo> |
getCurrentWorkers()
Get current list of workers from local children cache
This list does not have the workers that have already left
|
java.util.List<WorkerInfo> |
getJoinedWorkers()
Get all joined workers including the ones that have already completed and left
|
int |
getNumberOfCurrentWorkers()
get number of current workers in the job as seen from this worker
|
int |
getNumberOfWorkers()
return the number of all workers in this job,
including non-started ones and finished ones
|
WorkerInfo |
getWorkerInfo()
return the WorkerInfo object for this worker
|
WorkerInfo |
getWorkerInfoForID(int id)
return the WorkerInfo object for the given ID
|
boolean |
initialize()
connect to the server
get a workerID for this worker
append this worker info to the body of job znode
create an ephemeral znode for this client
|
boolean |
initialize(int workerID)
connect to the server
get a workerID for this worker
append this worker info to the body of job znode
create an ephemeral znode for this client
|
void |
printWorkers(java.util.List<WorkerInfo> workers)
Print all given workers
|
void |
waitOnBarrier()
we use a DistributedAtomicInteger to count the number of workers
that have reached to the barrier point
Last worker to call this method and to increase the DistributedAtomicInteger,
removes the barrier and lets all previous waiting workers be released
other workers to call this method and to increase the DistributedAtomicInteger,
enables the barrier by calling setBarrier method and wait
it is enough to call setBarrier method by only the first worker,
however, it does not harm calling by many workers
if we let only the first worker to set the barrier with setBarrier method,
then, the second worker may call this method after the dai is increased
but before the setBarrier method is called.
|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
getCheckpointingClient, getFailureListener, getRuntimeObject
public ZKWorkerController(Config config, java.lang.String jobName, java.lang.String workerIpAndPort, int numberOfWorkers, NodeInfo nodeInfo, JobAPI.ComputeResource computeResource)
public boolean initialize()
public boolean initialize(int workerID)
public WorkerInfo getWorkerInfo()
IWorkerController
getWorkerInfo
in interface IWorkerController
public WorkerInfo getWorkerInfoForID(int id)
IWorkerController
getWorkerInfoForID
in interface IWorkerController
public int getNumberOfWorkers()
IWorkerController
getNumberOfWorkers
in interface IWorkerController
public void printWorkers(java.util.List<WorkerInfo> workers)
public java.util.List<WorkerInfo> getCurrentWorkers()
public int getNumberOfCurrentWorkers()
public java.util.List<WorkerInfo> getJoinedWorkers()
getJoinedWorkers
in interface IWorkerController
public java.util.List<WorkerInfo> getAllWorkers() throws TimeoutException
getAllWorkers
in interface IWorkerController
TimeoutException
public void waitOnBarrier() throws TimeoutException
waitOnBarrier
in interface IWorkerController
TimeoutException
public void close()