...
Code Block |
---|
|
public interface BlacklistStrategy {
/**
* Generate blacklisted items according to the abnormal task's location and cause.
*
* @param locations the abnormal tasks’ locations.
* @param cause the cause of blacklisted items.
* @param timestamp the create timestamp of blacklisted items.
* @return the generated blacklisted items.
*/
Collection<BlacklistedItem<?>> generateBlacklistedItems(
Collection<TaskManagerLocation> locations, Throwable cause, long timestamp);
} |
...
Code Block |
---|
|
public interface BlacklistTracker {
/**
* Starts the blacklist tracker.
*/
void start(ComponentMainThreadExecutor mainThreadExecutor);
/**
* Add new blacklisted items or update existing items.
*
* @param items The items to add or update
* @return Newly added or updated items.
*/
Collection<BlacklistedItem<?>> addNewBlacklistedItems(Collection<BlacklistedItem<?>> items);
/**
* Returns whether the given task manager is blacklisted.
*/
boolean isBlacklistedTaskManager(ResourceID resourceID);
/**
* Get all blacklisted nodes.
*/
Set<String> getBlacklistedNodes();
/** Close the blacklist tracker. */
void close();
}
|
...
Code Block |
---|
|
public interface BlacklistContext {
/**
* Perform the newly added or updated blacklist items on resources.
*/
void blacklistResources(Collection<BlacklistedItem<?>> newlyAddedOrUpdatedItems);
}
|
...
Code Block |
---|
|
public interface BlacklistedTaskManagerChecker {
/**
* Returns whether the given task manager is blacklisted.
*/
boolean isBlacklistedTaskManager(ResourceID resourceID);
}
public interface SlotPoolService {
/**
Start the * Start the encapsulated encapsulated slot pool implementation.
*/
void start(
JobMasterId jobMasterId,
String address,
ComponentMainThreadExecutor mainThreadExecutor,
BlacklistedTaskManagerChecker blacklistedTaskManagerChecker)
throws Exception;
/**
* Releases all slots belonging to the owning TaskExecutor if it has been registered.
*
* @param taskManagerId identifying the TaskExecutor
* @param cause cause for failing the slots
*/
void releaseSlotsOnTaskManager(ResourceID taskManagerId, Exception cause);
/**
* Releases all free slots belonging to the owning TaskExecutor if it has been registered.
*
* @param taskManagerId identifying the TaskExecutor
* @param cause cause for failing the slots
*/
void releaseFreeSlotsOnTaskManager(ResourceID taskManagerId, Exception cause);
//...
} |
...
Code Block |
---|
|
public interface SlotManager {
/**
* Starts the slot manager with the given leader id and resource manager actions.
*/
void start(
ResourceManagerId newResourceManagerId,
Executor newMainThreadExecutor,
ResourceActions newResourceActions,
BlacklistedTaskManagerChecker newBlacklistedTaskManagerChecker);
// ...
} |
ResourceManagerDriver
...
Code Block |
---|
title | ResourceManagerDriver |
---|
|
public interface BlacklistedNodeRetriever {
/**
* Retrieve blacklisted nodes.
*/
Set<String> getBlacklistedNodes();
}
public interface ResourceManagerDriver {
/**
* Initialize the deployment specific components.
*/
void initialize(
ResourceEventHandler<WorkerType> resourceEventHandler,
ScheduledExecutor mainThreadExecutor,
Executor ioExecutor,
BlacklistedNodeRetriever blacklistedNodeRetriever)
throws Exception;
//...
}
|
Synchronize Blacklist between JM & RM
...
Code Block |
---|
|
public interface BlacklistListener {
/**
* Notify new blacklisted items.
*/
void notifyNewBlacklistedItems(Collection<BlacklistedItem<?>> newItems);
}
public interface JobManagerGateway extends BlacklistListener {
// ...
}
public interface ResourceManagerGateway extends BlacklistListener {
// ...
} |
Enrich TaskManagerLocation with node information
...
Code Block |
---|
|
public class TaskManagerLocation {
public String getNodeId();
// ...
} |
Metrics
We propose to introduce following Gauge metrics to ResourceManagerMetricGroup:
...