Skip to content

Commit 8ec52e4

Browse files
authored
[fix](cloud-mow) Fix the issue of missing and removing some old version delete bitmap (#44448)
Problem Summary: pr #40204 support removing old delete bitmap, however it donesn't consider boundary which will lead to miss some delete bitmap should be removed on be , it only affect local delete bitmap, the delete bitmap store in fdb is right, and these missing delete bitmaps can only be deleted by next base compaciton before this pr. pick pr:#44300
1 parent a565672 commit 8ec52e4

9 files changed

+252
-76
lines changed

be/src/cloud/cloud_cumulative_compaction.cpp

+1-4
Original file line numberDiff line numberDiff line change
@@ -393,12 +393,9 @@ Status CloudCumulativeCompaction::process_old_version_delete_bitmap() {
393393
rowset->rowset_id().to_string();
394394
DeleteBitmap::BitmapKey start {rowset->rowset_id(), seg_id, 0};
395395
DeleteBitmap::BitmapKey end {rowset->rowset_id(), seg_id, pre_max_version};
396-
DeleteBitmap::BitmapKey before_end {rowset->rowset_id(), seg_id,
397-
pre_max_version - 1};
398396
auto d = _tablet->tablet_meta()->delete_bitmap().get_agg(
399397
{rowset->rowset_id(), seg_id, pre_max_version});
400-
to_remove_vec.emplace_back(
401-
std::make_tuple(_tablet->tablet_id(), start, before_end));
398+
to_remove_vec.emplace_back(std::make_tuple(_tablet->tablet_id(), start, end));
402399
if (d->isEmpty()) {
403400
continue;
404401
}

be/src/cloud/cloud_delete_bitmap_action.cpp

+56-5
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#include <thread>
3434
#include <utility>
3535

36+
#include "cloud/cloud_meta_mgr.h"
3637
#include "cloud/cloud_tablet.h"
3738
#include "cloud/cloud_tablet_mgr.h"
3839
#include "common/logging.h"
@@ -78,8 +79,8 @@ static Status _check_param(HttpRequest* req, uint64_t* tablet_id) {
7879
return Status::OK();
7980
}
8081

81-
Status CloudDeleteBitmapAction::_handle_show_delete_bitmap_count(HttpRequest* req,
82-
std::string* json_result) {
82+
Status CloudDeleteBitmapAction::_handle_show_local_delete_bitmap_count(HttpRequest* req,
83+
std::string* json_result) {
8384
uint64_t tablet_id = 0;
8485
// check & retrieve tablet_id from req if it contains
8586
RETURN_NOT_OK_STATUS_WITH_WARN(_check_param(req, &tablet_id), "check param failed");
@@ -95,7 +96,49 @@ Status CloudDeleteBitmapAction::_handle_show_delete_bitmap_count(HttpRequest* re
9596
auto count = tablet->tablet_meta()->delete_bitmap().get_delete_bitmap_count();
9697
auto cardinality = tablet->tablet_meta()->delete_bitmap().cardinality();
9798
auto size = tablet->tablet_meta()->delete_bitmap().get_size();
98-
LOG(INFO) << "show_delete_bitmap_count,tablet_id=" << tablet_id << ",count=" << count
99+
LOG(INFO) << "show_local_delete_bitmap_count,tablet_id=" << tablet_id << ",count=" << count
100+
<< ",cardinality=" << cardinality << ",size=" << size;
101+
102+
rapidjson::Document root;
103+
root.SetObject();
104+
root.AddMember("delete_bitmap_count", count, root.GetAllocator());
105+
root.AddMember("cardinality", cardinality, root.GetAllocator());
106+
root.AddMember("size", size, root.GetAllocator());
107+
108+
// to json string
109+
rapidjson::StringBuffer strbuf;
110+
rapidjson::PrettyWriter<rapidjson::StringBuffer> writer(strbuf);
111+
root.Accept(writer);
112+
*json_result = std::string(strbuf.GetString());
113+
114+
return Status::OK();
115+
}
116+
117+
Status CloudDeleteBitmapAction::_handle_show_ms_delete_bitmap_count(HttpRequest* req,
118+
std::string* json_result) {
119+
uint64_t tablet_id = 0;
120+
// check & retrieve tablet_id from req if it contains
121+
RETURN_NOT_OK_STATUS_WITH_WARN(_check_param(req, &tablet_id), "check param failed");
122+
if (tablet_id == 0) {
123+
return Status::InternalError("check param failed: missing tablet_id");
124+
}
125+
TabletMetaSharedPtr tablet_meta;
126+
auto st = _engine.meta_mgr().get_tablet_meta(tablet_id, &tablet_meta);
127+
if (!st.ok()) {
128+
LOG(WARNING) << "failed to get_tablet_meta tablet=" << tablet_id
129+
<< ", st=" << st.to_string();
130+
return st;
131+
}
132+
auto tablet = std::make_shared<CloudTablet>(_engine, std::move(tablet_meta));
133+
st = _engine.meta_mgr().sync_tablet_rowsets(tablet.get(), false, true, true);
134+
if (!st.ok()) {
135+
LOG(WARNING) << "failed to sync tablet=" << tablet_id << ", st=" << st;
136+
return st;
137+
}
138+
auto count = tablet->tablet_meta()->delete_bitmap().get_delete_bitmap_count();
139+
auto cardinality = tablet->tablet_meta()->delete_bitmap().cardinality();
140+
auto size = tablet->tablet_meta()->delete_bitmap().get_size();
141+
LOG(INFO) << "show_ms_delete_bitmap_count,tablet_id=" << tablet_id << ",count=" << count
99142
<< ",cardinality=" << cardinality << ",size=" << size;
100143

101144
rapidjson::Document root;
@@ -115,9 +158,17 @@ Status CloudDeleteBitmapAction::_handle_show_delete_bitmap_count(HttpRequest* re
115158

116159
void CloudDeleteBitmapAction::handle(HttpRequest* req) {
117160
req->add_output_header(HttpHeaders::CONTENT_TYPE, HEADER_JSON.data());
118-
if (_delete_bitmap_action_type == DeleteBitmapActionType::COUNT_INFO) {
161+
if (_delete_bitmap_action_type == DeleteBitmapActionType::COUNT_LOCAL) {
162+
std::string json_result;
163+
Status st = _handle_show_local_delete_bitmap_count(req, &json_result);
164+
if (!st.ok()) {
165+
HttpChannel::send_reply(req, HttpStatus::OK, st.to_json());
166+
} else {
167+
HttpChannel::send_reply(req, HttpStatus::OK, json_result);
168+
}
169+
} else if (_delete_bitmap_action_type == DeleteBitmapActionType::COUNT_MS) {
119170
std::string json_result;
120-
Status st = _handle_show_delete_bitmap_count(req, &json_result);
171+
Status st = _handle_show_ms_delete_bitmap_count(req, &json_result);
121172
if (!st.ok()) {
122173
HttpChannel::send_reply(req, HttpStatus::OK, st.to_json());
123174
} else {

be/src/cloud/cloud_delete_bitmap_action.h

+3-2
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ class HttpRequest;
3131

3232
class ExecEnv;
3333

34-
enum class DeleteBitmapActionType { COUNT_INFO = 1 };
34+
enum class DeleteBitmapActionType { COUNT_LOCAL = 1, COUNT_MS = 2 };
3535

3636
/// This action is used for viewing the delete bitmap status
3737
class CloudDeleteBitmapAction : public HttpHandlerWithAuth {
@@ -45,7 +45,8 @@ class CloudDeleteBitmapAction : public HttpHandlerWithAuth {
4545
void handle(HttpRequest* req) override;
4646

4747
private:
48-
Status _handle_show_delete_bitmap_count(HttpRequest* req, std::string* json_result);
48+
Status _handle_show_local_delete_bitmap_count(HttpRequest* req, std::string* json_result);
49+
Status _handle_show_ms_delete_bitmap_count(HttpRequest* req, std::string* json_result);
4950

5051
private:
5152
CloudStorageEngine& _engine;

be/src/cloud/cloud_meta_mgr.cpp

+10-5
Original file line numberDiff line numberDiff line change
@@ -385,7 +385,7 @@ Status CloudMetaMgr::get_tablet_meta(int64_t tablet_id, TabletMetaSharedPtr* tab
385385
}
386386

387387
Status CloudMetaMgr::sync_tablet_rowsets(CloudTablet* tablet, bool warmup_delta_data,
388-
bool sync_delete_bitmap) {
388+
bool sync_delete_bitmap, bool full_sync) {
389389
using namespace std::chrono;
390390

391391
TEST_SYNC_POINT_RETURN_WITH_VALUE("CloudMetaMgr::sync_tablet_rowsets", Status::OK(), tablet);
@@ -411,7 +411,11 @@ Status CloudMetaMgr::sync_tablet_rowsets(CloudTablet* tablet, bool warmup_delta_
411411
idx->set_partition_id(tablet->partition_id());
412412
{
413413
std::shared_lock rlock(tablet->get_header_lock());
414-
req.set_start_version(tablet->max_version_unlocked() + 1);
414+
if (full_sync) {
415+
req.set_start_version(0);
416+
} else {
417+
req.set_start_version(tablet->max_version_unlocked() + 1);
418+
}
415419
req.set_base_compaction_cnt(tablet->base_compaction_cnt());
416420
req.set_cumulative_compaction_cnt(tablet->cumulative_compaction_cnt());
417421
req.set_cumulative_point(tablet->cumulative_layer_point());
@@ -471,7 +475,7 @@ Status CloudMetaMgr::sync_tablet_rowsets(CloudTablet* tablet, bool warmup_delta_
471475
DeleteBitmap delete_bitmap(tablet_id);
472476
int64_t old_max_version = req.start_version() - 1;
473477
auto st = sync_tablet_delete_bitmap(tablet, old_max_version, resp.rowset_meta(),
474-
resp.stats(), req.idx(), &delete_bitmap);
478+
resp.stats(), req.idx(), &delete_bitmap, full_sync);
475479
if (st.is<ErrorCode::ROWSETS_EXPIRED>() && tried++ < retry_times) {
476480
LOG_WARNING("rowset meta is expired, need to retry")
477481
.tag("tablet", tablet->tablet_id())
@@ -618,12 +622,13 @@ bool CloudMetaMgr::sync_tablet_delete_bitmap_by_cache(CloudTablet* tablet, int64
618622
Status CloudMetaMgr::sync_tablet_delete_bitmap(CloudTablet* tablet, int64_t old_max_version,
619623
std::ranges::range auto&& rs_metas,
620624
const TabletStatsPB& stats, const TabletIndexPB& idx,
621-
DeleteBitmap* delete_bitmap) {
625+
DeleteBitmap* delete_bitmap, bool full_sync) {
622626
if (rs_metas.empty()) {
623627
return Status::OK();
624628
}
625629

626-
if (sync_tablet_delete_bitmap_by_cache(tablet, old_max_version, rs_metas, delete_bitmap)) {
630+
if (!full_sync &&
631+
sync_tablet_delete_bitmap_by_cache(tablet, old_max_version, rs_metas, delete_bitmap)) {
627632
return Status::OK();
628633
} else {
629634
DeleteBitmapPtr new_delete_bitmap = std::make_shared<DeleteBitmap>(tablet->tablet_id());

be/src/cloud/cloud_meta_mgr.h

+3-2
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ class CloudMetaMgr {
5858
Status get_tablet_meta(int64_t tablet_id, std::shared_ptr<TabletMeta>* tablet_meta);
5959

6060
Status sync_tablet_rowsets(CloudTablet* tablet, bool warmup_delta_data = false,
61-
bool sync_delete_bitmap = true);
61+
bool sync_delete_bitmap = true, bool full_sync = false);
6262

6363
Status prepare_rowset(const RowsetMeta& rs_meta,
6464
std::shared_ptr<RowsetMeta>* existed_rs_meta = nullptr);
@@ -116,7 +116,8 @@ class CloudMetaMgr {
116116

117117
Status sync_tablet_delete_bitmap(CloudTablet* tablet, int64_t old_max_version,
118118
std::ranges::range auto&& rs_metas, const TabletStatsPB& stats,
119-
const TabletIndexPB& idx, DeleteBitmap* delete_bitmap);
119+
const TabletIndexPB& idx, DeleteBitmap* delete_bitmap,
120+
bool full_sync = false);
120121
void check_table_size_correctness(const RowsetMeta& rs_meta);
121122
int64_t get_segment_file_size(const RowsetMeta& rs_meta);
122123
int64_t get_inverted_index_file_szie(const RowsetMeta& rs_meta);

be/src/olap/tablet_meta.cpp

+5-1
Original file line numberDiff line numberDiff line change
@@ -1205,9 +1205,13 @@ void DeleteBitmap::remove_stale_delete_bitmap_from_queue(const std::vector<std::
12051205
}
12061206
auto start_bmk = std::get<1>(delete_bitmap_tuple);
12071207
auto end_bmk = std::get<2>(delete_bitmap_tuple);
1208+
// the key range of to be removed is [start_bmk,end_bmk),
1209+
// due to the different definitions of the right boundary,
1210+
// so use end_bmk as right boundary when removing local delete bitmap,
1211+
// use (end_bmk - 1) as right boundary when removing ms delete bitmap
12081212
remove(start_bmk, end_bmk);
12091213
to_delete.emplace_back(std::make_tuple(std::get<0>(start_bmk).to_string(), 0,
1210-
std::get<2>(end_bmk)));
1214+
std::get<2>(end_bmk) - 1));
12111215
}
12121216
_stale_delete_bitmap.erase(version_str);
12131217
}

be/src/service/http_service.cpp

+9-4
Original file line numberDiff line numberDiff line change
@@ -407,11 +407,16 @@ void HttpService::register_cloud_handler(CloudStorageEngine& engine) {
407407
TPrivilegeHier::GLOBAL, TPrivilegeType::ADMIN));
408408
_ev_http_server->register_handler(HttpMethod::GET, "/api/compaction/run_status",
409409
run_status_compaction_action);
410-
CloudDeleteBitmapAction* count_delete_bitmap_action =
411-
_pool.add(new CloudDeleteBitmapAction(DeleteBitmapActionType::COUNT_INFO, _env, engine,
410+
CloudDeleteBitmapAction* count_local_delete_bitmap_action =
411+
_pool.add(new CloudDeleteBitmapAction(DeleteBitmapActionType::COUNT_LOCAL, _env, engine,
412412
TPrivilegeHier::GLOBAL, TPrivilegeType::ADMIN));
413-
_ev_http_server->register_handler(HttpMethod::GET, "/api/delete_bitmap/count",
414-
count_delete_bitmap_action);
413+
_ev_http_server->register_handler(HttpMethod::GET, "/api/delete_bitmap/count_local",
414+
count_local_delete_bitmap_action);
415+
CloudDeleteBitmapAction* count_ms_delete_bitmap_action =
416+
_pool.add(new CloudDeleteBitmapAction(DeleteBitmapActionType::COUNT_MS, _env, engine,
417+
TPrivilegeHier::GLOBAL, TPrivilegeType::ADMIN));
418+
_ev_http_server->register_handler(HttpMethod::GET, "/api/delete_bitmap/count_ms",
419+
count_ms_delete_bitmap_action);
415420
#ifdef ENABLE_INJECTION_POINT
416421
InjectionPointAction* injection_point_action = _pool.add(new InjectionPointAction);
417422
_ev_http_server->register_handler(HttpMethod::GET, "/api/injection_point/{op}",
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,78 @@
11
-- This file is automatically generated. You should know what you did if you want to edit this
22
-- !sql --
3-
0 0 0
4-
1 8 8
3+
0 0 8
4+
1 1 1
5+
2 2 2
6+
3 3 3
7+
4 4 4
8+
5 5 5
9+
6 6 6
10+
7 7 7
11+
8 8 8
512

613
-- !sql --
7-
0 0 0
8-
1 8 8
14+
0 0 8
15+
1 1 1
16+
2 2 2
17+
3 3 3
18+
4 4 4
19+
5 5 5
20+
6 6 6
21+
7 7 7
22+
8 8 8
923

1024
-- !sql --
11-
0 0 0
25+
0 0 13
1226
1 13 13
27+
2 2 2
28+
3 3 3
29+
4 4 4
30+
5 5 5
31+
6 6 6
32+
7 7 7
33+
8 8 8
1334

1435
-- !sql --
15-
0 0 0
36+
0 0 13
1637
1 13 13
38+
2 2 2
39+
3 3 3
40+
4 4 4
41+
5 5 5
42+
6 6 6
43+
7 7 7
44+
8 8 8
1745

1846
-- !sql --
19-
0 0 0
47+
0 0 18
2048
1 23 23
49+
2 2 2
50+
3 3 3
51+
4 4 4
52+
5 5 5
53+
6 6 6
54+
7 7 7
55+
8 8 8
2156

2257
-- !sql --
23-
0 0 0
58+
0 0 18
2459
1 23 23
60+
2 2 2
61+
3 3 3
62+
4 4 4
63+
5 5 5
64+
6 6 6
65+
7 7 7
66+
8 8 8
2567

2668
-- !sql --
27-
0 0 0
69+
0 5 5
2870
1 28 28
71+
2 2 2
72+
3 3 3
73+
4 4 4
74+
5 5 5
75+
6 6 6
76+
7 7 7
77+
8 8 8
2978

0 commit comments

Comments
 (0)