Skip to content

Commit 8e37fb4

Browse files
authored
[fix](cloud) fix routine load job stuck if commit transaction failed (#40539)
At the before commit stage, a write lock will be added. If the commit transaction fails, the thread will return directly and the write lock will no longer be released which cause job stuck.
1 parent f21eb48 commit 8e37fb4

File tree

2 files changed

+18
-2
lines changed

2 files changed

+18
-2
lines changed

fe/fe-core/src/main/java/org/apache/doris/cloud/transaction/CloudGlobalTransactionMgr.java

+17-1
Original file line numberDiff line numberDiff line change
@@ -505,7 +505,17 @@ private void commitTransaction(long dbId, List<Table> tableList, long transactio
505505
}
506506

507507
final CommitTxnRequest commitTxnRequest = builder.build();
508-
commitTxn(commitTxnRequest, transactionId, is2PC, dbId, tableList);
508+
try {
509+
commitTxn(commitTxnRequest, transactionId, is2PC, dbId, tableList);
510+
} catch (UserException e) {
511+
// For routine load, it is necessary to release the write lock when commit transaction fails,
512+
// otherwise it will cause the lock added in beforeCommitted to not be released.
513+
if (txnCommitAttachment != null && txnCommitAttachment instanceof RLTaskTxnCommitAttachment) {
514+
RLTaskTxnCommitAttachment rlTaskTxnCommitAttachment = (RLTaskTxnCommitAttachment) txnCommitAttachment;
515+
Env.getCurrentEnv().getRoutineLoadManager().getJob(rlTaskTxnCommitAttachment.getJobId()).writeUnlock();
516+
}
517+
throw e;
518+
}
509519
}
510520

511521
private void commitTxn(CommitTxnRequest commitTxnRequest, long transactionId, boolean is2PC, long dbId,
@@ -1037,6 +1047,12 @@ public void abortTransaction(Long dbId, Long transactionId, String reason,
10371047
Preconditions.checkNotNull(abortTxnResponse.getStatus());
10381048
} catch (RpcException e) {
10391049
LOG.warn("abortTxn failed, transactionId:{}, Exception", transactionId, e);
1050+
// For routine load, it is necessary to release the write lock when abort transaction fails,
1051+
// otherwise it will cause the lock added in beforeAborted to not be released.
1052+
if (txnCommitAttachment != null && txnCommitAttachment instanceof RLTaskTxnCommitAttachment) {
1053+
RLTaskTxnCommitAttachment rlTaskTxnCommitAttachment = (RLTaskTxnCommitAttachment) txnCommitAttachment;
1054+
Env.getCurrentEnv().getRoutineLoadManager().getJob(rlTaskTxnCommitAttachment.getJobId()).writeUnlock();
1055+
}
10401056
throw new UserException("abortTxn failed, errMsg:" + e.getMessage());
10411057
}
10421058

fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadJob.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -485,7 +485,7 @@ protected void writeLock() {
485485
lock.writeLock().lock();
486486
}
487487

488-
protected void writeUnlock() {
488+
public void writeUnlock() {
489489
lock.writeLock().unlock();
490490
}
491491

0 commit comments

Comments
 (0)