Skip to content

Commit b2178e3

Browse files
rmdmattinglyRay Mattingly
and
Ray Mattingly
authored
HBASE-29202 Balancer conditionals make balancer actions more likely to be approved (#6821) (#6837)
Signed-off-by: Nick Dimiduk <[email protected]> Co-authored-by: Ray Mattingly <[email protected]>
1 parent b649bb9 commit b2178e3

File tree

4 files changed

+137
-14
lines changed

4 files changed

+137
-14
lines changed

hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerConditionals.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ int getViolationCountChange(BalancerClusterState cluster, BalanceAction action)
146146
// Reset cluster
147147
cluster.doAction(undoAction);
148148

149-
if (isViolatingPre && isViolatingPost) {
149+
if (isViolatingPre == isViolatingPost) {
150150
return 0;
151151
} else if (!isViolatingPre && isViolatingPost) {
152152
return 1;

hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/CandidateGeneratorTestUtil.java

+23-9
Original file line numberDiff line numberDiff line change
@@ -51,16 +51,22 @@ public final class CandidateGeneratorTestUtil {
5151
private CandidateGeneratorTestUtil() {
5252
}
5353

54+
enum ExhaustionType {
55+
COST_GOAL_ACHIEVED,
56+
NO_MORE_MOVES;
57+
}
58+
5459
static void runBalancerToExhaustion(Configuration conf,
5560
Map<ServerName, List<RegionInfo>> serverToRegions,
5661
Set<Function<BalancerClusterState, Boolean>> expectations, float targetMaxBalancerCost) {
57-
runBalancerToExhaustion(conf, serverToRegions, expectations, targetMaxBalancerCost, 15000);
62+
runBalancerToExhaustion(conf, serverToRegions, expectations, targetMaxBalancerCost, 15000,
63+
ExhaustionType.COST_GOAL_ACHIEVED);
5864
}
5965

6066
static void runBalancerToExhaustion(Configuration conf,
6167
Map<ServerName, List<RegionInfo>> serverToRegions,
6268
Set<Function<BalancerClusterState, Boolean>> expectations, float targetMaxBalancerCost,
63-
long maxRunningTime) {
69+
long maxRunningTime, ExhaustionType exhaustionType) {
6470
// Do the full plan. We're testing with a lot of regions
6571
conf.setBoolean("hbase.master.balancer.stochastic.runMaxSteps", true);
6672
conf.setLong(MAX_RUNNING_TIME_KEY, maxRunningTime);
@@ -76,7 +82,7 @@ static void runBalancerToExhaustion(Configuration conf,
7682
boolean isBalanced = false;
7783
while (!isBalanced) {
7884
balancerRuns++;
79-
if (balancerRuns > 1000) {
85+
if (balancerRuns > 10) {
8086
throw new RuntimeException("Balancer failed to find balance & meet expectations");
8187
}
8288
long start = System.currentTimeMillis();
@@ -111,16 +117,24 @@ static void runBalancerToExhaustion(Configuration conf,
111117
}
112118
}
113119
if (isBalanced) { // Check if the balancer thinks we're done too
114-
LOG.info("All balancer conditions passed. Checking if balancer thinks it's done.");
115-
if (stochasticLoadBalancer.needsBalance(HConstants.ENSEMBLE_TABLE_NAME, cluster)) {
116-
LOG.info("Balancer would still like to run");
117-
isBalanced = false;
120+
if (exhaustionType == ExhaustionType.COST_GOAL_ACHIEVED) {
121+
// If we expect to achieve the cost goal, then needsBalance should be false
122+
if (stochasticLoadBalancer.needsBalance(HConstants.ENSEMBLE_TABLE_NAME, cluster)) {
123+
LOG.info("Balancer cost goal is not achieved. needsBalance=true");
124+
isBalanced = false;
125+
}
118126
} else {
119-
LOG.info("Balancer is done");
127+
// If we anticipate running out of moves, then our last balance run should have produced
128+
// nothing
129+
if (regionPlans != null && !regionPlans.isEmpty()) {
130+
LOG.info("Balancer is not out of moves. regionPlans.size()={}", regionPlans.size());
131+
isBalanced = false;
132+
}
120133
}
121134
}
122135
}
123-
LOG.info("Balancing took {}sec", Duration.ofMillis(balancingMillis).toMinutes());
136+
LOG.info("Balancer is done. Balancing took {}sec",
137+
Duration.ofMillis(balancingMillis).toMinutes());
124138
}
125139

126140
/**

hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestLargeClusterBalancingTableIsolationAndReplicaDistribution.java

+4-4
Original file line numberDiff line numberDiff line change
@@ -104,10 +104,10 @@ public void testTableIsolationAndReplicaDistribution() {
104104
conf.setBoolean(BalancerConditionals.ISOLATE_META_TABLE_KEY, true);
105105
conf.setBoolean(BalancerConditionals.ISOLATE_SYSTEM_TABLES_KEY, true);
106106
DistributeReplicasTestConditional.enableConditionalReplicaDistributionForTest(conf);
107-
108-
runBalancerToExhaustion(conf, serverToRegions, ImmutableSet.of(this::isMetaTableIsolated,
109-
this::isSystemTableIsolated, CandidateGeneratorTestUtil::areAllReplicasDistributed), 10.0f,
110-
60_000);
107+
runBalancerToExhaustion(conf, serverToRegions,
108+
ImmutableSet.of(this::isMetaTableIsolated, this::isSystemTableIsolated,
109+
CandidateGeneratorTestUtil::areAllReplicasDistributed),
110+
10.0f, 60_000, CandidateGeneratorTestUtil.ExhaustionType.COST_GOAL_ACHIEVED);
111111
LOG.info("Meta table regions are successfully isolated, "
112112
+ "and region replicas are appropriately distributed.");
113113
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package org.apache.hadoop.hbase.master.balancer;
19+
20+
import static org.apache.hadoop.hbase.master.balancer.CandidateGeneratorTestUtil.isTableIsolated;
21+
import static org.apache.hadoop.hbase.master.balancer.CandidateGeneratorTestUtil.runBalancerToExhaustion;
22+
23+
import java.util.ArrayList;
24+
import java.util.HashMap;
25+
import java.util.List;
26+
import java.util.Map;
27+
import org.apache.hadoop.conf.Configuration;
28+
import org.apache.hadoop.hbase.HBaseClassTestRule;
29+
import org.apache.hadoop.hbase.ServerName;
30+
import org.apache.hadoop.hbase.TableName;
31+
import org.apache.hadoop.hbase.client.RegionInfo;
32+
import org.apache.hadoop.hbase.client.RegionInfoBuilder;
33+
import org.apache.hadoop.hbase.testclassification.MasterTests;
34+
import org.apache.hadoop.hbase.testclassification.MediumTests;
35+
import org.junit.BeforeClass;
36+
import org.junit.ClassRule;
37+
import org.junit.Test;
38+
import org.junit.experimental.categories.Category;
39+
import org.slf4j.Logger;
40+
import org.slf4j.LoggerFactory;
41+
42+
import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableSet;
43+
44+
/**
45+
* If your minCostNeedsBalance is set too low, then the balancer should still eventually stop making
46+
* moves as further cost improvements become impossible, and balancer plan calculation becomes
47+
* wasteful. This test ensures that the balancer will not get stuck in a loop of continuously moving
48+
* regions.
49+
*/
50+
@Category({ MasterTests.class, MediumTests.class })
51+
public class TestUnattainableBalancerCostGoal {
52+
53+
@ClassRule
54+
public static final HBaseClassTestRule CLASS_RULE =
55+
HBaseClassTestRule.forClass(TestUnattainableBalancerCostGoal.class);
56+
57+
private static final Logger LOG = LoggerFactory.getLogger(TestUnattainableBalancerCostGoal.class);
58+
59+
private static final TableName SYSTEM_TABLE_NAME = TableName.valueOf("hbase:system");
60+
private static final TableName NON_SYSTEM_TABLE_NAME = TableName.valueOf("userTable");
61+
62+
private static final int NUM_SERVERS = 10;
63+
private static final int NUM_REGIONS = 1000;
64+
private static final float UNACHIEVABLE_COST_GOAL = 0.01f;
65+
66+
private static final ServerName[] servers = new ServerName[NUM_SERVERS];
67+
private static final Map<ServerName, List<RegionInfo>> serverToRegions = new HashMap<>();
68+
69+
@BeforeClass
70+
public static void setup() {
71+
// Initialize servers
72+
for (int i = 0; i < NUM_SERVERS; i++) {
73+
servers[i] = ServerName.valueOf("server" + i, i, System.currentTimeMillis());
74+
}
75+
76+
// Create regions
77+
List<RegionInfo> allRegions = new ArrayList<>();
78+
for (int i = 0; i < NUM_REGIONS; i++) {
79+
TableName tableName = i < 3 ? SYSTEM_TABLE_NAME : NON_SYSTEM_TABLE_NAME;
80+
byte[] startKey = new byte[1];
81+
startKey[0] = (byte) i;
82+
byte[] endKey = new byte[1];
83+
endKey[0] = (byte) (i + 1);
84+
85+
RegionInfo regionInfo =
86+
RegionInfoBuilder.newBuilder(tableName).setStartKey(startKey).setEndKey(endKey).build();
87+
allRegions.add(regionInfo);
88+
}
89+
90+
// Assign all regions to the first server
91+
serverToRegions.put(servers[0], new ArrayList<>(allRegions));
92+
for (int i = 1; i < NUM_SERVERS; i++) {
93+
serverToRegions.put(servers[i], new ArrayList<>());
94+
}
95+
}
96+
97+
@Test
98+
public void testSystemTableIsolation() {
99+
Configuration conf = new Configuration(false);
100+
conf.setBoolean(BalancerConditionals.ISOLATE_SYSTEM_TABLES_KEY, true);
101+
runBalancerToExhaustion(conf, serverToRegions, ImmutableSet.of(this::isSystemTableIsolated),
102+
UNACHIEVABLE_COST_GOAL, 10_000, CandidateGeneratorTestUtil.ExhaustionType.NO_MORE_MOVES);
103+
LOG.info("Meta table regions are successfully isolated.");
104+
}
105+
106+
private boolean isSystemTableIsolated(BalancerClusterState cluster) {
107+
return isTableIsolated(cluster, SYSTEM_TABLE_NAME, "System");
108+
}
109+
}

0 commit comments

Comments
 (0)