Skip to content

Commit b7c973e

Browse files
authored
[fix](scheduler) Fix invalid access after freed (apache#48168)
WRITE of size 1 at 0x6160007e86f0 thread T1983 (Pipe_normal [wo) #0 0x55fc8065b975 in std::__atomic_base<bool>::store(bool, std::memory_order) /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/atomic_base.h:457:2 #1 0x55fc8065b975 in std::__atomic_base<bool>::operator=(bool) /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/atomic_base.h:349:2 apache#2 0x55fc8065b975 in std::atomic<bool>::operator=(bool) /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/atomic:80:22 apache#3 0x55fc8065b975 in doris::pipeline::PipelineTask::set_running(bool) /root/doris/be/src/pipeline/pipeline_task.h:192:47 apache#4 0x55fc8065b975 in doris::pipeline::TaskScheduler::_do_work(int)::$_0::operator()() const /root/doris/be/src/pipeline/task_scheduler.cpp:121:23 apache#5 0x55fc8065b975 in doris::Defer<doris::pipeline::TaskScheduler::_do_work(int)::$_0>::~Defer() /root/doris/be/src/util/defer_op.h:37:16 apache#6 0x55fc8065b975 in doris::pipeline::TaskScheduler::_do_work(int) /root/doris/be/src/pipeline/task_scheduler.cpp:162:5 apache#7 0x55fc4c57cd19 in doris::ThreadPool::dispatch_thread() /root/doris/be/src/util/threadpool.cpp:608:24 apache#8 0x55fc4c55395e in std::function<void ()>::operator()() const /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/std_function.h:560:9 apache#9 0x55fc4c55395e in doris::Thread::supervise_thread(void*) /root/doris/be/src/util/thread.cpp:498:5 apache#10 0x7f9ee3d25608 in start_thread /build/glibc-SzIz7B/glibc-2.31/nptl/pthread_create.c:477:8 apache#11 0x7f9ee3fd2132 in __clone /build/glibc-SzIz7B/glibc-2.31/misc/../sysdeps/unix/sysv/linux/x86_64/clone.S:95 0x6160007e86f0 is located 624 bytes inside of 632-byte region [0x6160007e8480,0x6160007e86f8) freed by thread T1981 (Pipe_normal [wo) here: #0 0x55fc47aa680d in operator delete(void*) (/mnt/ssd01/pipline/OpenSourceDoris/clusterEnv/P0/Cluster0/be/lib/doris_be+0x3376e80d) (BuildId: 865149e62959581e) #1 0x55fc8059db84 in std::default_delete<doris::pipeline::PipelineTask>::operator()(doris::pipeline::PipelineTask*) const /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/unique_ptr.h:85:2 apache#2 0x55fc8059db84 in std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> >::~unique_ptr() /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/unique_ptr.h:361:4 apache#3 0x55fc8059db84 in void std::destroy_at<std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> > >(std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> >*) /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/stl_construct.h:88:15 apache#4 0x55fc8059db84 in void std::_Destroy<std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> > >(std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> >*) /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/stl_construct.h:138:7 apache#5 0x55fc8059db84 in void std::_Destroy_aux<false>::__destroy<std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> >*>(std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> >*, std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> >*) /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/stl_construct.h:152:6 apache#6 0x55fc8059db84 in void std::_Destroy<std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> >*>(std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> >*, std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> >*) /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/stl_construct.h:184:7 apache#7 0x55fc8059db84 in void std::_Destroy<std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> >*, std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> > >(std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> >*, std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> >*, std::allocator<std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> > >&) /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/alloc_traits.h:746:7 apache#8 0x55fc8059db84 in std::vector<std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> >, std::allocator<std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> > > >::~vector() /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/stl_vector.h:680:2 apache#9 0x55fc8052571c in void std::destroy_at<std::vector<std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> >, std::allocator<std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> > > > >(std::vector<std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> >, std::allocator<std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> > > >*) /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/stl_construct.h:88:15 apache#10 0x55fc8052571c in void std::_Destroy<std::vector<std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> >, std::allocator<std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> > > > >(std::vector<std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> >, std::allocator<std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> > > >*) /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/stl_construct.h:138:7 apache#11 0x55fc8052571c in void std::_Destroy_aux<false>::__destroy<std::vector<std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> >, std::allocator<std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> > > >*>(std::vector<std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> >, std::allocator<std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> > > >*, std::vector<std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> >, std::allocator<std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> > > >*) /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/stl_construct.h:152:6 apache#12 0x55fc8052571c in void std::_Destroy<std::vector<std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> >, std::allocator<std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> > > >*>(std::vector<std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> >, std::allocator<std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> > > >*, std::vector<std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> >, std::allocator<std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> > > >*) /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/stl_construct.h:184:7 apache#13 0x55fc8052571c in void std::_Destroy<std::vector<std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> >, std::allocator<std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> > > >*, std::vector<std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> >, std::allocator<std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> > > > >(std::vector<std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> >, std::allocator<std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> > > >*, std::vector<std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> >, std::allocator<std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> > > >*, std::allocator<std::vector<std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> >, std::allocator<std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> > > > >&) /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/alloc_traits.h:746:7 apache#14 0x55fc8052571c in std::vector<std::vector<std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> >, std::allocator<std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> > > >, std::allocator<std::vector<std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> >, std::allocator<std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> > > > > >::_M_erase_at_end(std::vector<std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> >, std::allocator<std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> > > >*) /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/stl_vector.h:1796:6 apache#15 0x55fc8052571c in std::vector<std::vector<std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> >, std::allocator<std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> > > >, std::allocator<std::vector<std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> >, std::allocator<std::unique_ptr<doris::pipeline::PipelineTask, std::default_delete<doris::pipeline::PipelineTask> > > > > >::clear() /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/stl_vector.h:1499:9 apache#16 0x55fc8052571c in doris::pipeline::PipelineFragmentContext::~PipelineFragmentContext() /root/doris/be/src/pipeline/pipeline_fragment_context.cpp:142:12 apache#17 0x55fc47ad30cc in std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release() /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/shared_ptr_base.h:168:6 apache#18 0x55fc80658d57 in std::__shared_count<(__gnu_cxx::_Lock_policy)2>::~__shared_count() /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/shared_ptr_base.h:702:11 apache#19 0x55fc80658d57 in std::__shared_ptr<doris::TaskExecutionContext, (__gnu_cxx::_Lock_policy)2>::~__shared_ptr() /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/shared_ptr_base.h:1149:31 apache#20 0x55fc80658d57 in doris::pipeline::close_task(doris::pipeline::PipelineTask*, doris::Status) /root/doris/be/src/pipeline/task_scheduler.cpp:100:1 apache#21 0x55fc8065aa17 in doris::pipeline::TaskScheduler::_do_work(int) /root/doris/be/src/pipeline/task_scheduler.cpp:160:36 apache#22 0x55fc4c57cd19 in doris::ThreadPool::dispatch_thread() /root/doris/be/src/util/threadpool.cpp:608:24 apache#23 0x55fc4c55395e in std::function<void ()>::operator()() const /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/std_function.h:560:9 apache#24 0x55fc4c55395e in doris::Thread::supervise_thread(void*) /root/doris/be/src/util/thread.cpp:498:5 apache#25 0x7f9ee3d25608 in start_thread /build/glibc-SzIz7B/glibc-2.31/nptl/pthread_create.c:477:8
1 parent 164d05e commit b7c973e

File tree

3 files changed

+17
-18
lines changed

3 files changed

+17
-18
lines changed

be/src/pipeline/pipeline_fragment_context.cpp

+1-3
Original file line numberDiff line numberDiff line change
@@ -1753,7 +1753,7 @@ void PipelineFragmentContext::_close_fragment_instance() {
17531753
std::dynamic_pointer_cast<PipelineFragmentContext>(shared_from_this()));
17541754
}
17551755

1756-
bool PipelineFragmentContext::decrement_running_task(PipelineId pipeline_id) {
1756+
void PipelineFragmentContext::decrement_running_task(PipelineId pipeline_id) {
17571757
// If all tasks of this pipeline has been closed, upstream tasks is never needed, and we just make those runnable here
17581758
DCHECK(_pip_id_to_pipeline.contains(pipeline_id));
17591759
if (_pip_id_to_pipeline[pipeline_id]->close_task()) {
@@ -1767,9 +1767,7 @@ bool PipelineFragmentContext::decrement_running_task(PipelineId pipeline_id) {
17671767
++_closed_tasks;
17681768
if (_closed_tasks == _total_tasks) {
17691769
_close_fragment_instance();
1770-
return true;
17711770
}
1772-
return false;
17731771
}
17741772

17751773
Status PipelineFragmentContext::send_report(bool done) {

be/src/pipeline/pipeline_fragment_context.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ class PipelineFragmentContext : public TaskExecutionContext {
100100

101101
[[nodiscard]] int get_fragment_id() const { return _fragment_id; }
102102

103-
bool decrement_running_task(PipelineId pipeline_id);
103+
void decrement_running_task(PipelineId pipeline_id);
104104

105105
Status send_report(bool);
106106

be/src/pipeline/task_scheduler.cpp

+15-14
Original file line numberDiff line numberDiff line change
@@ -88,15 +88,12 @@ bool close_task(PipelineTask* task, Status exec_status) {
8888
print_id(task->query_context()->query_id()),
8989
exec_status.to_string());
9090
}
91-
// decrement_running_task may delete fragment context and will core in some defer
92-
// code, because the defer code will access fragment context itself.
93-
auto lock_for_context = task->fragment_context()->shared_from_this();
9491
Status status = task->close(exec_status);
9592
if (!status.ok()) {
9693
task->fragment_context()->cancel(status);
9794
}
9895
task->finalize();
99-
return task->fragment_context()->decrement_running_task(task->pipeline_id());
96+
return true;
10097
}
10198

10299
void TaskScheduler::_do_work(int index) {
@@ -114,10 +111,20 @@ void TaskScheduler::_do_work(int index) {
114111
}
115112
task->log_detail_if_need();
116113
task->set_running(true);
117-
bool fragment_is_finished = false;
114+
bool eos = false;
115+
auto status = Status::OK();
118116
Defer task_running_defer {[&]() {
119117
// If fragment is finished, fragment context will be de-constructed with all tasks in it.
120-
if (!fragment_is_finished) {
118+
if (eos || !status.ok()) {
119+
// decrement_running_task may delete fragment context and will core in some defer
120+
// code, because the defer code will access fragment context itself.
121+
auto lock_for_context = task->fragment_context()->shared_from_this();
122+
bool close = close_task(task, status);
123+
task->set_running(false);
124+
if (close) {
125+
task->fragment_context()->decrement_running_task(task->pipeline_id());
126+
}
127+
} else {
121128
task->set_running(false);
122129
}
123130
}};
@@ -127,12 +134,10 @@ void TaskScheduler::_do_work(int index) {
127134

128135
// Close task if canceled
129136
if (canceled) {
130-
fragment_is_finished = close_task(task, fragment_ctx->get_query_ctx()->exec_status());
137+
status = fragment_ctx->get_query_ctx()->exec_status();
138+
DCHECK(!status.ok());
131139
continue;
132140
}
133-
134-
bool eos = false;
135-
auto status = Status::OK();
136141
task->set_core_id(index);
137142

138143
// Main logics of execution
@@ -155,10 +160,6 @@ void TaskScheduler::_do_work(int index) {
155160
} else { status = task->execute(&eos); },
156161
status);
157162
fragment_ctx->trigger_report_if_necessary();
158-
159-
if (eos || !status.ok()) {
160-
fragment_is_finished = close_task(task, status);
161-
}
162163
}
163164
}
164165

0 commit comments

Comments
 (0)