Skip to content

Commit e4b5c0c

Browse files
committed
[enhance](cloud) Add a policy when be abnormal, tablet delay switch be
1 parent b2ebfe9 commit e4b5c0c

File tree

3 files changed

+46
-6
lines changed

3 files changed

+46
-6
lines changed

fe/fe-common/src/main/java/org/apache/doris/common/Config.java

+6
Original file line numberDiff line numberDiff line change
@@ -3030,6 +3030,12 @@ public static int metaServiceRpcRetryTimes() {
30303030
@ConfField(mutable = true, description = {"存算分离模式下是否开启大事务提交,默认false"})
30313031
public static boolean enable_cloud_txn_lazy_commit = false;
30323032

3033+
@ConfField(mutable = true, description = {"存算分离模式下,当tablet分布的be异常,是否立即映射tablet到新的be上,默认true"})
3034+
public static boolean enable_immediate_be_assign = true;
3035+
3036+
@ConfField(mutable = true, description = {"存算分离模式下,当tablet分布的be异常,临时映射(secondary) be的有效期,默认60s"})
3037+
public static int secondary_be_validity_seconds = 60 ;
3038+
30333039
// ATTN: DONOT add any config not related to cloud mode here
30343040
// ATTN: DONOT add any config not related to cloud mode here
30353041
// ATTN: DONOT add any config not related to cloud mode here

fe/fe-core/src/main/java/org/apache/doris/cloud/catalog/CloudReplica.java

+39-5
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
import org.apache.doris.catalog.Env;
2121
import org.apache.doris.catalog.Partition;
2222
import org.apache.doris.catalog.Replica;
23-
import org.apache.doris.catalog.Replica.ReplicaContext;
2423
import org.apache.doris.cloud.system.CloudSystemInfoService;
2524
import org.apache.doris.common.Config;
2625
import org.apache.doris.common.DdlException;
@@ -60,11 +59,16 @@ public class CloudReplica extends Replica {
6059
private long indexId = -1;
6160
@SerializedName(value = "idx")
6261
private long idx = -1;
62+
// no need serialize
63+
private long secondaryBe = -1;
6364

6465
private Random rand = new Random();
6566

6667
private Map<String, List<Long>> memClusterToBackends = new ConcurrentHashMap<String, List<Long>>();
6768

69+
// badBeIP, badTime
70+
private Map<String, Long> badBeAndBadTime = new ConcurrentHashMap<>();
71+
6872
public CloudReplica() {
6973
}
7074

@@ -216,20 +220,44 @@ private long getBackendIdImpl(String cluster) {
216220
long backendId = clusterToBackends.get(clusterId).get(0);
217221
Backend be = Env.getCurrentSystemInfo().getBackend(backendId);
218222
if (be != null && be.isQueryAvailable()) {
223+
// be normal
219224
if (LOG.isDebugEnabled()) {
220225
LOG.debug("backendId={} ", backendId);
221226
}
227+
badBeAndBadTime.remove(be.getHost());
228+
secondaryBe = -1;
222229
return backendId;
223230
}
231+
// be abnormal
232+
if (Config.enable_immediate_be_assign) {
233+
// rehash immediate
234+
return hashReplicaToBe(clusterId, false, true);
235+
}
236+
// be abnormal but use secondary
237+
if (be != null) {
238+
if (!badBeAndBadTime.containsKey(be.getHost())) {
239+
badBeAndBadTime.put(be.getHost(), System.currentTimeMillis());
240+
}
241+
if (secondaryBe == -1) {
242+
secondaryBe = hashReplicaToBe(clusterId, false, false);
243+
}
244+
if (System.currentTimeMillis() - badBeAndBadTime.get(be.getHost())
245+
> Config.secondary_be_validity_seconds * 1000L) {
246+
// set secondary be to tablet after secondary_be_validity_seconds
247+
setBeToTablet(clusterId, secondaryBe);
248+
secondaryBe = -1;
249+
}
250+
return secondaryBe;
251+
}
224252
}
225253
if (DebugPointUtil.isEnable("CloudReplica.getBackendIdImpl.clusterToBackends")) {
226254
LOG.info("Debug Point enable CloudReplica.getBackendIdImpl.clusterToBackends");
227255
return -1;
228256
}
229-
return hashReplicaToBe(clusterId, false);
257+
return hashReplicaToBe(clusterId, false, true);
230258
}
231259

232-
public long hashReplicaToBe(String clusterId, boolean isBackGround) {
260+
public long hashReplicaToBe(String clusterId, boolean isBackGround, boolean setToTablet) {
233261
// TODO(luwei) list should be sorted
234262
List<Backend> clusterBes = ((CloudSystemInfoService) Env.getCurrentSystemInfo())
235263
.getBackendsByClusterId(clusterId);
@@ -270,12 +298,18 @@ public long hashReplicaToBe(String clusterId, boolean isBackGround) {
270298
pickedBeId, getId(), partitionId, availableBes.size(), idx, index,
271299
hashCode == null ? -1 : hashCode.asLong());
272300

301+
if (setToTablet) {
302+
setBeToTablet(clusterId, pickedBeId);
303+
}
304+
305+
return pickedBeId;
306+
}
307+
308+
private void setBeToTablet(String clusterId, long pickedBeId) {
273309
// save to clusterToBackends map
274310
List<Long> bes = new ArrayList<Long>();
275311
bes.add(pickedBeId);
276312
clusterToBackends.put(clusterId, bes);
277-
278-
return pickedBeId;
279313
}
280314

281315
public List<Long> hashReplicaToBes(String clusterId, boolean isBackGround, int replicaNum) {

fe/fe-core/src/main/java/org/apache/doris/cloud/catalog/CloudTabletRebalancer.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -502,7 +502,7 @@ private void completeRouteInfo() {
502502
Map<String, List<Long>> clusterToBackends =
503503
((CloudReplica) replica).getClusterToBackends();
504504
if (!clusterToBackends.containsKey(cluster)) {
505-
long beId = ((CloudReplica) replica).hashReplicaToBe(cluster, true);
505+
long beId = ((CloudReplica) replica).hashReplicaToBe(cluster, true, true);
506506
if (beId <= 0) {
507507
assignedErrNum++;
508508
continue;

0 commit comments

Comments
 (0)