Skip to content

Commit 4c22f2c

Browse files
authored
[Improvement](schema scan) Use async scanner for schema scanners (#38403)
1 parent 3ca9315 commit 4c22f2c

File tree

54 files changed

+163
-69
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

54 files changed

+163
-69
lines changed

be/src/exec/schema_scanner.cpp

+68-1
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,10 @@
5252
#include "exec/schema_scanner/schema_workload_groups_scanner.h"
5353
#include "exec/schema_scanner/schema_workload_sched_policy_scanner.h"
5454
#include "olap/hll.h"
55+
#include "pipeline/dependency.h"
5556
#include "runtime/define_primitive_type.h"
57+
#include "runtime/fragment_mgr.h"
58+
#include "runtime/types.h"
5659
#include "util/string_util.h"
5760
#include "util/types.h"
5861
#include "vec/columns/column.h"
@@ -66,6 +69,7 @@
6669
#include "vec/core/column_with_type_and_name.h"
6770
#include "vec/core/types.h"
6871
#include "vec/data_types/data_type.h"
72+
#include "vec/data_types/data_type_factory.hpp"
6973

7074
namespace doris {
7175
class ObjectPool;
@@ -86,7 +90,60 @@ Status SchemaScanner::start(RuntimeState* state) {
8690
return Status::OK();
8791
}
8892

89-
Status SchemaScanner::get_next_block(vectorized::Block* block, bool* eos) {
93+
Status SchemaScanner::get_next_block(RuntimeState* state, vectorized::Block* block, bool* eos) {
94+
if (_data_block == nullptr) {
95+
return Status::InternalError("No data left!");
96+
}
97+
DCHECK(_async_thread_running == false);
98+
RETURN_IF_ERROR(_scanner_status.status());
99+
for (size_t i = 0; i < block->columns(); i++) {
100+
std::move(*block->get_by_position(i).column)
101+
.mutate()
102+
->insert_range_from(*_data_block->get_by_position(i).column, 0,
103+
_data_block->rows());
104+
}
105+
_data_block->clear_column_data();
106+
*eos = _eos;
107+
if (!*eos) {
108+
RETURN_IF_ERROR(get_next_block_async(state));
109+
}
110+
return Status::OK();
111+
}
112+
113+
Status SchemaScanner::get_next_block_async(RuntimeState* state) {
114+
_dependency->block();
115+
auto task_ctx = state->get_task_execution_context();
116+
RETURN_IF_ERROR(ExecEnv::GetInstance()->fragment_mgr()->get_thread_pool()->submit_func(
117+
[this, task_ctx, state]() {
118+
DCHECK(_async_thread_running == false);
119+
auto task_lock = task_ctx.lock();
120+
if (task_lock == nullptr) {
121+
_scanner_status.update(Status::InternalError("Task context not exists!"));
122+
return;
123+
}
124+
SCOPED_ATTACH_TASK(state);
125+
_dependency->block();
126+
_async_thread_running = true;
127+
_finish_dependency->block();
128+
if (!_opened) {
129+
_data_block = vectorized::Block::create_unique();
130+
_init_block(_data_block.get());
131+
_scanner_status.update(start(state));
132+
_opened = true;
133+
}
134+
bool eos = false;
135+
_scanner_status.update(get_next_block_internal(_data_block.get(), &eos));
136+
_eos = eos;
137+
_async_thread_running = false;
138+
_dependency->set_ready();
139+
if (eos) {
140+
_finish_dependency->set_ready();
141+
}
142+
}));
143+
return Status::OK();
144+
}
145+
146+
Status SchemaScanner::get_next_block_internal(vectorized::Block* block, bool* eos) {
90147
if (!_is_init) {
91148
return Status::InternalError("used before initialized.");
92149
}
@@ -179,6 +236,16 @@ std::unique_ptr<SchemaScanner> SchemaScanner::create(TSchemaTableType::type type
179236
}
180237
}
181238

239+
void SchemaScanner::_init_block(vectorized::Block* src_block) {
240+
const std::vector<SchemaScanner::ColumnDesc>& columns_desc(get_column_desc());
241+
for (int i = 0; i < columns_desc.size(); ++i) {
242+
TypeDescriptor descriptor(columns_desc[i].type);
243+
auto data_type = vectorized::DataTypeFactory::instance().create_data_type(descriptor, true);
244+
src_block->insert(vectorized::ColumnWithTypeAndName(data_type->create_column(), data_type,
245+
columns_desc[i].name));
246+
}
247+
}
248+
182249
Status SchemaScanner::fill_dest_column_for_range(vectorized::Block* block, size_t pos,
183250
const std::vector<void*>& datas) {
184251
const ColumnDesc& col_desc = _columns[pos];

be/src/exec/schema_scanner.h

+23-1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include <stddef.h>
2323
#include <stdint.h>
2424

25+
#include <condition_variable>
2526
#include <memory>
2627
#include <string>
2728
#include <vector>
@@ -43,6 +44,10 @@ namespace vectorized {
4344
class Block;
4445
}
4546

47+
namespace pipeline {
48+
class Dependency;
49+
}
50+
4651
struct SchemaScannerCommonParam {
4752
SchemaScannerCommonParam()
4853
: db(nullptr),
@@ -94,15 +99,23 @@ class SchemaScanner {
9499

95100
// init object need information, schema etc.
96101
virtual Status init(SchemaScannerParam* param, ObjectPool* pool);
102+
Status get_next_block(RuntimeState* state, vectorized::Block* block, bool* eos);
97103
// Start to work
98104
virtual Status start(RuntimeState* state);
99-
virtual Status get_next_block(vectorized::Block* block, bool* eos);
105+
virtual Status get_next_block_internal(vectorized::Block* block, bool* eos);
100106
const std::vector<ColumnDesc>& get_column_desc() const { return _columns; }
101107
// factory function
102108
static std::unique_ptr<SchemaScanner> create(TSchemaTableType::type type);
103109
TSchemaTableType::type type() const { return _schema_table_type; }
110+
void set_dependency(std::shared_ptr<pipeline::Dependency> dep,
111+
std::shared_ptr<pipeline::Dependency> fin_dep) {
112+
_dependency = dep;
113+
_finish_dependency = fin_dep;
114+
}
115+
Status get_next_block_async(RuntimeState* state);
104116

105117
protected:
118+
void _init_block(vectorized::Block* src_block);
106119
Status fill_dest_column_for_range(vectorized::Block* block, size_t pos,
107120
const std::vector<void*>& datas);
108121

@@ -125,6 +138,15 @@ class SchemaScanner {
125138
RuntimeProfile::Counter* _get_table_timer = nullptr;
126139
RuntimeProfile::Counter* _get_describe_timer = nullptr;
127140
RuntimeProfile::Counter* _fill_block_timer = nullptr;
141+
142+
std::shared_ptr<pipeline::Dependency> _dependency = nullptr;
143+
std::shared_ptr<pipeline::Dependency> _finish_dependency = nullptr;
144+
145+
std::unique_ptr<vectorized::Block> _data_block;
146+
AtomicStatus _scanner_status;
147+
std::atomic<bool> _eos = false;
148+
std::atomic<bool> _opened = false;
149+
std::atomic<bool> _async_thread_running = false;
128150
};
129151

130152
} // namespace doris

be/src/exec/schema_scanner/schema_active_queries_scanner.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ Status SchemaActiveQueriesScanner::_get_active_queries_block_from_fe() {
137137
return Status::OK();
138138
}
139139

140-
Status SchemaActiveQueriesScanner::get_next_block(vectorized::Block* block, bool* eos) {
140+
Status SchemaActiveQueriesScanner::get_next_block_internal(vectorized::Block* block, bool* eos) {
141141
if (!_is_init) {
142142
return Status::InternalError("Used before initialized.");
143143
}

be/src/exec/schema_scanner/schema_active_queries_scanner.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ class SchemaActiveQueriesScanner : public SchemaScanner {
3636
~SchemaActiveQueriesScanner() override;
3737

3838
Status start(RuntimeState* state) override;
39-
Status get_next_block(vectorized::Block* block, bool* eos) override;
39+
Status get_next_block_internal(vectorized::Block* block, bool* eos) override;
4040

4141
static std::vector<SchemaScanner::ColumnDesc> _s_tbls_columns;
4242

be/src/exec/schema_scanner/schema_backend_active_tasks.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,8 @@ Status SchemaBackendActiveTasksScanner::start(RuntimeState* state) {
5151
return Status::OK();
5252
}
5353

54-
Status SchemaBackendActiveTasksScanner::get_next_block(vectorized::Block* block, bool* eos) {
54+
Status SchemaBackendActiveTasksScanner::get_next_block_internal(vectorized::Block* block,
55+
bool* eos) {
5556
if (!_is_init) {
5657
return Status::InternalError("Used before initialized.");
5758
}

be/src/exec/schema_scanner/schema_backend_active_tasks.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ class SchemaBackendActiveTasksScanner : public SchemaScanner {
3636
~SchemaBackendActiveTasksScanner() override;
3737

3838
Status start(RuntimeState* state) override;
39-
Status get_next_block(vectorized::Block* block, bool* eos) override;
39+
Status get_next_block_internal(vectorized::Block* block, bool* eos) override;
4040

4141
static std::vector<SchemaScanner::ColumnDesc> _s_tbls_columns;
4242

be/src/exec/schema_scanner/schema_charsets_scanner.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ SchemaCharsetsScanner::SchemaCharsetsScanner()
4848

4949
SchemaCharsetsScanner::~SchemaCharsetsScanner() {}
5050

51-
Status SchemaCharsetsScanner::get_next_block(vectorized::Block* block, bool* eos) {
51+
Status SchemaCharsetsScanner::get_next_block_internal(vectorized::Block* block, bool* eos) {
5252
if (!_is_init) {
5353
return Status::InternalError("call this before initial.");
5454
}

be/src/exec/schema_scanner/schema_charsets_scanner.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ class SchemaCharsetsScanner : public SchemaScanner {
3636
SchemaCharsetsScanner();
3737
~SchemaCharsetsScanner() override;
3838

39-
Status get_next_block(vectorized::Block* block, bool* eos) override;
39+
Status get_next_block_internal(vectorized::Block* block, bool* eos) override;
4040

4141
private:
4242
struct CharsetStruct {

be/src/exec/schema_scanner/schema_collations_scanner.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ SchemaCollationsScanner::SchemaCollationsScanner()
5050

5151
SchemaCollationsScanner::~SchemaCollationsScanner() {}
5252

53-
Status SchemaCollationsScanner::get_next_block(vectorized::Block* block, bool* eos) {
53+
Status SchemaCollationsScanner::get_next_block_internal(vectorized::Block* block, bool* eos) {
5454
if (!_is_init) {
5555
return Status::InternalError("call this before initial.");
5656
}

be/src/exec/schema_scanner/schema_collations_scanner.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ class SchemaCollationsScanner : public SchemaScanner {
3636
SchemaCollationsScanner();
3737
~SchemaCollationsScanner() override;
3838

39-
Status get_next_block(vectorized::Block* block, bool* eos) override;
39+
Status get_next_block_internal(vectorized::Block* block, bool* eos) override;
4040

4141
private:
4242
struct CollationStruct {

be/src/exec/schema_scanner/schema_columns_scanner.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -347,7 +347,7 @@ Status SchemaColumnsScanner::_get_new_table() {
347347
return Status::OK();
348348
}
349349

350-
Status SchemaColumnsScanner::get_next_block(vectorized::Block* block, bool* eos) {
350+
Status SchemaColumnsScanner::get_next_block_internal(vectorized::Block* block, bool* eos) {
351351
if (!_is_init) {
352352
return Status::InternalError("use this class before inited.");
353353
}

be/src/exec/schema_scanner/schema_columns_scanner.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ class SchemaColumnsScanner : public SchemaScanner {
3838
SchemaColumnsScanner();
3939
~SchemaColumnsScanner() override;
4040
Status start(RuntimeState* state) override;
41-
Status get_next_block(vectorized::Block* block, bool* eos) override;
41+
Status get_next_block_internal(vectorized::Block* block, bool* eos) override;
4242

4343
private:
4444
Status _get_new_table();

be/src/exec/schema_scanner/schema_dummy_scanner.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ Status SchemaDummyScanner::start(RuntimeState* state) {
4040
return Status::OK();
4141
}
4242

43-
Status SchemaDummyScanner::get_next_block(vectorized::Block* block, bool* eos) {
43+
Status SchemaDummyScanner::get_next_block_internal(vectorized::Block* block, bool* eos) {
4444
*eos = true;
4545
return Status::OK();
4646
}

be/src/exec/schema_scanner/schema_dummy_scanner.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ class SchemaDummyScanner : public SchemaScanner {
3333
SchemaDummyScanner();
3434
~SchemaDummyScanner() override;
3535
Status start(RuntimeState* state = nullptr) override;
36-
Status get_next_block(vectorized::Block* block, bool* eos) override;
36+
Status get_next_block_internal(vectorized::Block* block, bool* eos) override;
3737
};
3838

3939
} // namespace doris

be/src/exec/schema_scanner/schema_files_scanner.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ Status SchemaFilesScanner::start(RuntimeState* state) {
113113
return Status::OK();
114114
}
115115

116-
Status SchemaFilesScanner::get_next_block(vectorized::Block* block, bool* eos) {
116+
Status SchemaFilesScanner::get_next_block_internal(vectorized::Block* block, bool* eos) {
117117
if (!_is_init) {
118118
return Status::InternalError("Used before initialized.");
119119
}

be/src/exec/schema_scanner/schema_files_scanner.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ class SchemaFilesScanner : public SchemaScanner {
3838
~SchemaFilesScanner() override;
3939

4040
Status start(RuntimeState* state) override;
41-
Status get_next_block(vectorized::Block* block, bool* eos) override;
41+
Status get_next_block_internal(vectorized::Block* block, bool* eos) override;
4242

4343
int _db_index;
4444
int _table_index;

be/src/exec/schema_scanner/schema_metadata_name_ids_scanner.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ Status SchemaMetadataNameIdsScanner::_fill_block_impl(vectorized::Block* block)
225225
return Status::OK();
226226
}
227227

228-
Status SchemaMetadataNameIdsScanner::get_next_block(vectorized::Block* block, bool* eos) {
228+
Status SchemaMetadataNameIdsScanner::get_next_block_internal(vectorized::Block* block, bool* eos) {
229229
if (!_is_init) {
230230
return Status::InternalError("Used before initialized.");
231231
}

be/src/exec/schema_scanner/schema_metadata_name_ids_scanner.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ class SchemaMetadataNameIdsScanner : public SchemaScanner {
3939
~SchemaMetadataNameIdsScanner() override;
4040

4141
Status start(RuntimeState* state) override;
42-
Status get_next_block(vectorized::Block* block, bool* eos) override;
42+
Status get_next_block_internal(vectorized::Block* block, bool* eos) override;
4343

4444
private:
4545
Status _get_new_table();

be/src/exec/schema_scanner/schema_partitions_scanner.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ Status SchemaPartitionsScanner::start(RuntimeState* state) {
101101
return Status::OK();
102102
}
103103

104-
Status SchemaPartitionsScanner::get_next_block(vectorized::Block* block, bool* eos) {
104+
Status SchemaPartitionsScanner::get_next_block_internal(vectorized::Block* block, bool* eos) {
105105
if (!_is_init) {
106106
return Status::InternalError("Used before initialized.");
107107
}

be/src/exec/schema_scanner/schema_partitions_scanner.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ class SchemaPartitionsScanner : public SchemaScanner {
3838
~SchemaPartitionsScanner() override;
3939

4040
Status start(RuntimeState* state) override;
41-
Status get_next_block(vectorized::Block* block, bool* eos) override;
41+
Status get_next_block_internal(vectorized::Block* block, bool* eos) override;
4242

4343
int _db_index;
4444
int _table_index;

be/src/exec/schema_scanner/schema_processlist_scanner.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ Status SchemaProcessListScanner::start(RuntimeState* state) {
6363
return Status::OK();
6464
}
6565

66-
Status SchemaProcessListScanner::get_next_block(vectorized::Block* block, bool* eos) {
66+
Status SchemaProcessListScanner::get_next_block_internal(vectorized::Block* block, bool* eos) {
6767
if (!_is_init) {
6868
return Status::InternalError("call this before initial.");
6969
}

be/src/exec/schema_scanner/schema_processlist_scanner.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ class SchemaProcessListScanner : public SchemaScanner {
4040
~SchemaProcessListScanner() override;
4141

4242
Status start(RuntimeState* state) override;
43-
Status get_next_block(vectorized::Block* block, bool* eos) override;
43+
Status get_next_block_internal(vectorized::Block* block, bool* eos) override;
4444

4545
static std::vector<SchemaScanner::ColumnDesc> _s_processlist_columns;
4646

be/src/exec/schema_scanner/schema_profiling_scanner.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ Status SchemaProfilingScanner::start(RuntimeState* state) {
8888
return Status::OK();
8989
}
9090

91-
Status SchemaProfilingScanner::get_next_block(vectorized::Block* block, bool* eos) {
91+
Status SchemaProfilingScanner::get_next_block_internal(vectorized::Block* block, bool* eos) {
9292
if (!_is_init) {
9393
return Status::InternalError("Used before initialized.");
9494
}

be/src/exec/schema_scanner/schema_profiling_scanner.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ class SchemaProfilingScanner : public SchemaScanner {
3838
~SchemaProfilingScanner() override;
3939

4040
Status start(RuntimeState* state) override;
41-
Status get_next_block(vectorized::Block* block, bool* eos) override;
41+
Status get_next_block_internal(vectorized::Block* block, bool* eos) override;
4242

4343
static std::vector<SchemaScanner::ColumnDesc> _s_tbls_columns;
4444
};

be/src/exec/schema_scanner/schema_routine_scanner.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ Status SchemaRoutinesScanner::get_block_from_fe() {
141141
return Status::OK();
142142
}
143143

144-
Status SchemaRoutinesScanner::get_next_block(vectorized::Block* block, bool* eos) {
144+
Status SchemaRoutinesScanner::get_next_block_internal(vectorized::Block* block, bool* eos) {
145145
if (!_is_init) {
146146
return Status::InternalError("Used before initialized.");
147147
}

be/src/exec/schema_scanner/schema_routine_scanner.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ class SchemaRoutinesScanner : public SchemaScanner {
3636
~SchemaRoutinesScanner() override = default;
3737

3838
Status start(RuntimeState* state) override;
39-
Status get_next_block(vectorized::Block* block, bool* eos) override;
39+
Status get_next_block_internal(vectorized::Block* block, bool* eos) override;
4040

4141
static std::vector<SchemaScanner::ColumnDesc> _s_tbls_columns;
4242

be/src/exec/schema_scanner/schema_rowsets_scanner.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ Status SchemaRowsetsScanner::_get_all_rowsets() {
9797
return Status::OK();
9898
}
9999

100-
Status SchemaRowsetsScanner::get_next_block(vectorized::Block* block, bool* eos) {
100+
Status SchemaRowsetsScanner::get_next_block_internal(vectorized::Block* block, bool* eos) {
101101
if (!_is_init) {
102102
return Status::InternalError("Used before initialized.");
103103
}

be/src/exec/schema_scanner/schema_rowsets_scanner.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ class SchemaRowsetsScanner : public SchemaScanner {
4040
~SchemaRowsetsScanner() override = default;
4141

4242
Status start(RuntimeState* state) override;
43-
Status get_next_block(vectorized::Block* block, bool* eos) override;
43+
Status get_next_block_internal(vectorized::Block* block, bool* eos) override;
4444

4545
private:
4646
Status _get_all_rowsets();

0 commit comments

Comments
 (0)