@InterfaceAudience.Private public class AssignmentManager extends ZooKeeperListener
Monitors ZooKeeper for events related to regions in transition.
Handles existing regions in transition during master failover.
Modifier and Type | Class and Description |
---|---|
static class |
AssignmentManager.ServerHostRegion |
Modifier and Type | Field and Description |
---|---|
protected AtomicBoolean |
failoverCleanupDone
Indicator that AssignmentManager has recovered the region states so
that ServerShutdownHandler can be fully enabled and re-assign regions
of dead servers.
|
static ServerName |
HBCK_CODE_SERVERNAME |
protected Server |
server |
static boolean |
TEST_SKIP_SPLIT_HANDLING
For testing only! Set to true to skip handling of split.
|
watcher
Constructor and Description |
---|
AssignmentManager(Server server,
ServerManager serverManager,
LoadBalancer balancer,
ExecutorService service,
MetricsMaster metricsMaster,
TableLockManager tableLockManager)
Constructs a new assignment manager.
|
Modifier and Type | Method and Description |
---|---|
Lock |
acquireRegionLock(String encodedName)
To avoid racing with AM, external entities may need to lock a region,
for example, when SSH checks what regions to skip re-assigning.
|
void |
addPlan(String encodedName,
RegionPlan plan)
Add a regionPlan for the specified region.
|
void |
addPlans(Map<String,RegionPlan> plans)
Add a map of region plans.
|
void |
assign(HRegionInfo region,
boolean setOfflineInZK)
Assigns the specified region.
|
void |
assign(HRegionInfo region,
boolean setOfflineInZK,
boolean forceNewPlan)
Use care with forceNewPlan.
|
void |
assign(List<HRegionInfo> regions)
Assigns specified regions round robin, if any.
|
void |
assign(Map<HRegionInfo,ServerName> regions)
Assigns specified regions retaining assignments, if any.
|
void |
assignMeta(HRegionInfo hri)
Assigns the hbase:meta region or a replica.
|
void |
balance(RegionPlan plan) |
void |
deleteClosingOrClosedNode(HRegionInfo region,
ServerName sn) |
LoadBalancer |
getBalancer() |
int |
getNumRegionsOpened()
Used by unit tests.
|
RegionPlan |
getRegionReopenPlan(HRegionInfo hri) |
RegionStates |
getRegionStates()
This SHOULD not be public.
|
Pair<Integer,Integer> |
getReopenStatus(TableName tableName)
Used by the client to identify if all regions have the schema updates
|
Set<HRegionInfo> |
getReplicasToClose() |
Map<ServerName,List<HRegionInfo>> |
getSnapShotOfAssignment(Collection<HRegionInfo> infos) |
TableStateManager |
getTableStateManager() |
AssignmentManager.ServerHostRegion |
isCarryingMeta(ServerName serverName) |
AssignmentManager.ServerHostRegion |
isCarryingMetaReplica(ServerName serverName,
HRegionInfo metaHri) |
AssignmentManager.ServerHostRegion |
isCarryingMetaReplica(ServerName serverName,
int replicaId) |
boolean |
isFailoverCleanupDone()
Used by ServerShutdownHandler to make sure AssignmentManager has completed
the failover cleanup before re-assigning regions of dead servers.
|
void |
nodeChildrenChanged(String path)
New unassigned node has been created.
|
void |
nodeCreated(String path)
New unassigned node has been created.
|
void |
nodeDataChanged(String path)
Existing unassigned node has had data changed.
|
void |
nodeDeleted(String path)
Called when a node has been deleted
|
void |
offlineDisabledRegion(HRegionInfo regionInfo) |
protected String |
onRegionTransition(ServerName serverName,
RegionServerStatusProtos.RegionStateTransition transition)
Try to update some region states.
|
List<HRegionInfo> |
processServerShutdown(ServerName sn)
Process shutdown server removing any assignments.
|
void |
regionOffline(HRegionInfo regionInfo)
Marks the region as offline.
|
void |
registerListener(AssignmentListener listener)
Add the listener to the notification list.
|
void |
removeClosedRegion(HRegionInfo hri)
When a region is closed, it should be removed from the regionsToReopen
|
static List<HRegionInfo> |
replicaRegionsNotRecordedInMeta(Set<HRegionInfo> regionsRecordedInMeta,
MasterServices master)
Get a list of replica regions that are:
not recorded in meta yet.
|
protected void |
setEnabledTable(TableName tableName) |
void |
setRegionsToReopen(List<HRegionInfo> regions)
Set the list of regions that will be reopened
because of an update in table schema
|
void |
shutdown()
Shutdown the threadpool executor service
|
void |
stop() |
void |
unassign(HRegionInfo region)
Unassigns the specified region.
|
void |
unassign(HRegionInfo region,
boolean force) |
void |
unassign(HRegionInfo region,
boolean force,
ServerName dest)
Unassigns the specified region.
|
boolean |
unregisterListener(AssignmentListener listener)
Remove the listener from the notification list.
|
void |
updateRegionsInTransitionMetrics()
Set Regions in transitions metrics.
|
protected boolean |
waitForAssignment(Collection<HRegionInfo> regionSet,
boolean waitTillAllAssigned,
int reassigningRegions,
long minEndTime)
Waits until the specified region has completed assignment, or the deadline is reached.
|
protected boolean |
waitForAssignment(Collection<HRegionInfo> regionSet,
boolean waitTillAllAssigned,
long deadline)
Waits until the specified region has completed assignment, or the deadline is reached.
|
boolean |
waitForAssignment(HRegionInfo regionInfo)
Waits until the specified region has completed assignment.
|
void |
waitOnRegionToClearRegionsInTransition(HRegionInfo hri)
Wait on region to clear regions-in-transition.
|
boolean |
waitOnRegionToClearRegionsInTransition(HRegionInfo hri,
long timeOut)
Wait on region to clear regions-in-transition or time out
|
protected boolean |
wasRegionOnDeadServerByMeta(HRegionInfo region,
ServerName sn) |
protected void |
zkEventWorkersSubmit(org.apache.hadoop.hbase.master.AssignmentManager.RegionRunnable regRunnable)
Submit a task, ensuring that there is only one task at a time that working on a given region.
|
getWatcher
public static final ServerName HBCK_CODE_SERVERNAME
protected final Server server
protected final AtomicBoolean failoverCleanupDone
public static boolean TEST_SKIP_SPLIT_HANDLING
public AssignmentManager(Server server, ServerManager serverManager, LoadBalancer balancer, ExecutorService service, MetricsMaster metricsMaster, TableLockManager tableLockManager) throws KeeperException, IOException, CoordinatedStateException
server
- instance of HMaster this AM running insideserverManager
- serverManager for associated HMasterbalancer
- implementation of LoadBalancer
service
- Executor servicemetricsMaster
- metrics managertableLockManager
- TableLock managerKeeperException
IOException
CoordinatedStateException
public void registerListener(AssignmentListener listener)
listener
- The AssignmentListener to registerpublic boolean unregisterListener(AssignmentListener listener)
listener
- The AssignmentListener to unregisterpublic TableStateManager getTableStateManager()
public RegionStates getRegionStates()
public RegionPlan getRegionReopenPlan(HRegionInfo hri)
public void addPlan(String encodedName, RegionPlan plan)
encodedName
- plan
- public void addPlans(Map<String,RegionPlan> plans)
public void setRegionsToReopen(List<HRegionInfo> regions)
regions
- list of regions that should be tracked for reopenpublic Pair<Integer,Integer> getReopenStatus(TableName tableName) throws IOException
tableName
- IOException
public boolean isFailoverCleanupDone()
public Lock acquireRegionLock(String encodedName)
public void removeClosedRegion(HRegionInfo hri)
hri
- HRegionInfo of the region which was closedpublic void nodeCreated(String path)
This happens when an RS begins the OPENING or CLOSING of a region by creating an unassigned node.
When this happens we must:
nodeCreated
in class ZooKeeperListener
path
- full path of the new nodepublic void nodeDataChanged(String path)
This happens when an RS transitions from OFFLINE to OPENING, or between OPENING/OPENED and CLOSING/CLOSED.
When this happens we must:
nodeDataChanged
in class ZooKeeperListener
path
- full path of the updated nodeprotected void zkEventWorkersSubmit(org.apache.hadoop.hbase.master.AssignmentManager.RegionRunnable regRunnable)
public void nodeDeleted(String path)
ZooKeeperListener
nodeDeleted
in class ZooKeeperListener
path
- full path of the deleted nodepublic void nodeChildrenChanged(String path)
This happens when an RS begins the OPENING, SPLITTING or CLOSING of a region by creating a znode.
When this happens we must:
nodeChildrenChanged
in class ZooKeeperListener
path
- full path of the node whose children have changedpublic void regionOffline(HRegionInfo regionInfo)
Used when a region has been closed and should remain closed.
regionInfo
- public void offlineDisabledRegion(HRegionInfo regionInfo)
public void assign(HRegionInfo region, boolean setOfflineInZK)
If a RegionPlan is available with a valid destination then it will be used to determine what server region is assigned to. If no RegionPlan is available, region will be assigned to a random available server.
Updates the RegionState and sends the OPEN RPC.
This will only succeed if the region is in transition and in a CLOSED or OFFLINE state or not in transition (in-memory not zk), and of course, the chosen server is up and running (It may have just crashed!). If the in-memory checks pass, the zk node is forced to OFFLINE before assigning.
region
- server to be assignedsetOfflineInZK
- whether ZK node should be created/transitioned to an
OFFLINE state before assigning the regionpublic void assign(HRegionInfo region, boolean setOfflineInZK, boolean forceNewPlan)
protected boolean wasRegionOnDeadServerByMeta(HRegionInfo region, ServerName sn)
public void unassign(HRegionInfo region)
Updates the RegionState and sends the CLOSE RPC unless region is being split by regionserver; then the unassign fails (silently) because we presume the region being unassigned no longer exists (its been split out of existence). TODO: What to do if split fails and is rolled back and parent is revivified?
If a RegionPlan is already set, it will remain.
region
- server to be unassignedpublic void unassign(HRegionInfo region, boolean force, ServerName dest)
Updates the RegionState and sends the CLOSE RPC unless region is being split by regionserver; then the unassign fails (silently) because we presume the region being unassigned no longer exists (its been split out of existence). TODO: What to do if split fails and is rolled back and parent is revivified?
If a RegionPlan is already set, it will remain.
region
- server to be unassignedforce
- if region should be closed even if already closingpublic void unassign(HRegionInfo region, boolean force)
public void deleteClosingOrClosedNode(HRegionInfo region, ServerName sn)
region
- regioninfo of znode to be deleted.public int getNumRegionsOpened()
public boolean waitForAssignment(HRegionInfo regionInfo) throws InterruptedException
If the region is already assigned, returns immediately. Otherwise, method blocks until the region is assigned.
regionInfo
- region to wait on assignment forInterruptedException
protected boolean waitForAssignment(Collection<HRegionInfo> regionSet, boolean waitTillAllAssigned, int reassigningRegions, long minEndTime) throws InterruptedException
InterruptedException
protected boolean waitForAssignment(Collection<HRegionInfo> regionSet, boolean waitTillAllAssigned, long deadline) throws InterruptedException
regionSet
- set of region to wait on. the set is modified and the assigned regions removedwaitTillAllAssigned
- true if we should wait all the regions to be assigneddeadline
- the timestamp after which the wait is abortedInterruptedException
public void assignMeta(HRegionInfo hri) throws KeeperException
Assumes that hbase:meta is currently closed and is not being actively served by any RegionServer.
Forcibly unsets the current meta region location in ZooKeeper and assigns hbase:meta to a random RegionServer.
hri
- TODOKeeperException
public void assign(Map<HRegionInfo,ServerName> regions) throws IOException, InterruptedException
This is a synchronous call and will return once every region has been assigned. If anything fails, an exception is thrown
InterruptedException
IOException
public void assign(List<HRegionInfo> regions) throws IOException, InterruptedException
This is a synchronous call and will return once every region has been assigned. If anything fails, an exception is thrown
InterruptedException
IOException
public static List<HRegionInfo> replicaRegionsNotRecordedInMeta(Set<HRegionInfo> regionsRecordedInMeta, MasterServices master) throws IOException
regionsRecordedInMeta
- the list of regions we know are recorded in meta
either as a default, or, as the location of a replicamaster
- IOException
public void updateRegionsInTransitionMetrics()
public void waitOnRegionToClearRegionsInTransition(HRegionInfo hri) throws IOException, InterruptedException
hri
- Region to wait on.IOException
InterruptedException
public boolean waitOnRegionToClearRegionsInTransition(HRegionInfo hri, long timeOut) throws InterruptedException
hri
- timeOut
- Milliseconds to wait for current region to be out of transition state.InterruptedException
public AssignmentManager.ServerHostRegion isCarryingMeta(ServerName serverName)
public AssignmentManager.ServerHostRegion isCarryingMetaReplica(ServerName serverName, int replicaId)
public AssignmentManager.ServerHostRegion isCarryingMetaReplica(ServerName serverName, HRegionInfo metaHri)
public List<HRegionInfo> processServerShutdown(ServerName sn)
sn
- Server that went down.public void balance(RegionPlan plan)
plan
- Plan to execute.public void stop()
public void shutdown()
protected void setEnabledTable(TableName tableName)
public Set<HRegionInfo> getReplicasToClose()
protected String onRegionTransition(ServerName serverName, RegionServerStatusProtos.RegionStateTransition transition)
public LoadBalancer getBalancer()
public Map<ServerName,List<HRegionInfo>> getSnapShotOfAssignment(Collection<HRegionInfo> infos)