Skip to content

Commit eba34e3

Browse files
authored
[fix](cloud-mow) MS should delete the existing keys before rewriting it when processing old version delete bitmap on cu compaction (#42379)
pr #40204 support deleting old version delete bitmap when doing cu compaction, it will update delete bitmap with agg result, then deleting old version delete bitmap. Updating delete bitmap means overwriting existing keys, however delete bitmap may split into multiple kvs to store in fdb, so we should delete the existing keys and rewrite it to fdb instead of overwriting directly.
1 parent d746146 commit eba34e3

7 files changed

+126
-39
lines changed

be/src/cloud/cloud_cumulative_compaction.cpp

+17-28
Original file line numberDiff line numberDiff line change
@@ -363,12 +363,12 @@ Status CloudCumulativeCompaction::modify_rowsets() {
363363
if (config::enable_delete_bitmap_merge_on_compaction &&
364364
_tablet->keys_type() == KeysType::UNIQUE_KEYS &&
365365
_tablet->enable_unique_key_merge_on_write() && _input_rowsets.size() != 1) {
366-
process_old_version_delete_bitmap();
366+
RETURN_IF_ERROR(process_old_version_delete_bitmap());
367367
}
368368
return Status::OK();
369369
}
370370

371-
void CloudCumulativeCompaction::process_old_version_delete_bitmap() {
371+
Status CloudCumulativeCompaction::process_old_version_delete_bitmap() {
372372
// agg previously rowset old version delete bitmap
373373
std::vector<RowsetSharedPtr> pre_rowsets {};
374374
std::vector<std::string> pre_rowset_ids {};
@@ -407,40 +407,29 @@ void CloudCumulativeCompaction::process_old_version_delete_bitmap() {
407407
}
408408
if (!new_delete_bitmap->empty()) {
409409
// store agg delete bitmap
410-
Status update_st;
411410
DBUG_EXECUTE_IF("CloudCumulativeCompaction.modify_rowsets.update_delete_bitmap_failed",
412411
{
413-
update_st = Status::InternalError(
412+
return Status::InternalError(
414413
"test fail to update delete bitmap for tablet_id {}",
415414
cloud_tablet()->tablet_id());
416415
});
417-
if (update_st.ok()) {
418-
update_st = _engine.meta_mgr().update_delete_bitmap_without_lock(
419-
*cloud_tablet(), new_delete_bitmap.get());
420-
}
421-
if (!update_st.ok()) {
422-
std::stringstream ss;
423-
ss << "failed to update delete bitmap for tablet=" << cloud_tablet()->tablet_id()
424-
<< " st=" << update_st.to_string();
425-
std::string msg = ss.str();
426-
LOG(WARNING) << msg;
427-
} else {
428-
Version version(_input_rowsets.front()->start_version(),
429-
_input_rowsets.back()->end_version());
430-
for (auto it = new_delete_bitmap->delete_bitmap.begin();
431-
it != new_delete_bitmap->delete_bitmap.end(); it++) {
432-
_tablet->tablet_meta()->delete_bitmap().set(it->first, it->second);
433-
}
434-
_tablet->tablet_meta()->delete_bitmap().add_to_remove_queue(version.to_string(),
435-
to_remove_vec);
436-
DBUG_EXECUTE_IF(
437-
"CloudCumulativeCompaction.modify_rowsets.delete_expired_stale_rowsets", {
438-
static_cast<CloudTablet*>(_tablet.get())
439-
->delete_expired_stale_rowsets();
440-
});
416+
RETURN_IF_ERROR(_engine.meta_mgr().cloud_update_delete_bitmap_without_lock(
417+
*cloud_tablet(), new_delete_bitmap.get()));
418+
419+
Version version(_input_rowsets.front()->start_version(),
420+
_input_rowsets.back()->end_version());
421+
for (auto it = new_delete_bitmap->delete_bitmap.begin();
422+
it != new_delete_bitmap->delete_bitmap.end(); it++) {
423+
_tablet->tablet_meta()->delete_bitmap().set(it->first, it->second);
441424
}
425+
_tablet->tablet_meta()->delete_bitmap().add_to_remove_queue(version.to_string(),
426+
to_remove_vec);
427+
DBUG_EXECUTE_IF(
428+
"CloudCumulativeCompaction.modify_rowsets.delete_expired_stale_rowsets",
429+
{ static_cast<CloudTablet*>(_tablet.get())->delete_expired_stale_rowsets(); });
442430
}
443431
}
432+
return Status::OK();
444433
}
445434

446435
void CloudCumulativeCompaction::garbage_collection() {

be/src/cloud/cloud_cumulative_compaction.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ class CloudCumulativeCompaction : public CloudCompactionMixin {
4747

4848
void update_cumulative_point();
4949

50-
void process_old_version_delete_bitmap();
50+
Status process_old_version_delete_bitmap();
5151

5252
ReaderType compaction_type() const override { return ReaderType::READER_CUMULATIVE_COMPACTION; }
5353

be/src/cloud/cloud_delete_bitmap_action.cpp

-2
Original file line numberDiff line numberDiff line change
@@ -95,8 +95,6 @@ Status CloudDeleteBitmapAction::_handle_show_delete_bitmap_count(HttpRequest* re
9595
auto count = tablet->tablet_meta()->delete_bitmap().get_delete_bitmap_count();
9696
auto cardinality = tablet->tablet_meta()->delete_bitmap().cardinality();
9797
auto size = tablet->tablet_meta()->delete_bitmap().get_size();
98-
LOG(INFO) << "show_delete_bitmap_count,tablet_id=" << tablet_id << ",count=" << count
99-
<< ",cardinality=" << cardinality << ",size=" << size;
10098

10199
rapidjson::Document root;
102100
root.SetObject();

be/src/cloud/cloud_meta_mgr.cpp

+6-5
Original file line numberDiff line numberDiff line change
@@ -714,8 +714,9 @@ Status CloudMetaMgr::sync_tablet_delete_bitmap(CloudTablet* tablet, int64_t old_
714714
for (size_t i = 0; i < rowset_ids.size(); i++) {
715715
RowsetId rst_id;
716716
rst_id.init(rowset_ids[i]);
717-
delete_bitmap->merge({rst_id, segment_ids[i], vers[i]},
718-
roaring::Roaring::read(delete_bitmaps[i].data()));
717+
delete_bitmap->merge(
718+
{rst_id, segment_ids[i], vers[i]},
719+
roaring::Roaring::readSafe(delete_bitmaps[i].data(), delete_bitmaps[i].length()));
719720
}
720721
int64_t latency = cntl.latency_us();
721722
if (latency > 100 * 1000) { // 100ms
@@ -1068,9 +1069,9 @@ Status CloudMetaMgr::update_delete_bitmap(const CloudTablet& tablet, int64_t loc
10681069
return st;
10691070
}
10701071

1071-
Status CloudMetaMgr::update_delete_bitmap_without_lock(const CloudTablet& tablet,
1072-
DeleteBitmap* delete_bitmap) {
1073-
LOG(INFO) << "update_delete_bitmap_without_lock , tablet_id: " << tablet.tablet_id()
1072+
Status CloudMetaMgr::cloud_update_delete_bitmap_without_lock(const CloudTablet& tablet,
1073+
DeleteBitmap* delete_bitmap) {
1074+
LOG(INFO) << "cloud_update_delete_bitmap_without_lock , tablet_id: " << tablet.tablet_id()
10741075
<< ",delete_bitmap size:" << delete_bitmap->delete_bitmap.size();
10751076
UpdateDeleteBitmapRequest req;
10761077
UpdateDeleteBitmapResponse res;

be/src/cloud/cloud_meta_mgr.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -95,8 +95,8 @@ class CloudMetaMgr {
9595
Status update_delete_bitmap(const CloudTablet& tablet, int64_t lock_id, int64_t initiator,
9696
DeleteBitmap* delete_bitmap);
9797

98-
Status update_delete_bitmap_without_lock(const CloudTablet& tablet,
99-
DeleteBitmap* delete_bitmap);
98+
Status cloud_update_delete_bitmap_without_lock(const CloudTablet& tablet,
99+
DeleteBitmap* delete_bitmap);
100100

101101
Status get_delete_bitmap_update_lock(const CloudTablet& tablet, int64_t lock_id,
102102
int64_t initiator);

cloud/src/meta-service/meta_service.cpp

+12-1
Original file line numberDiff line numberDiff line change
@@ -1782,6 +1782,7 @@ void MetaServiceImpl::update_delete_bitmap(google::protobuf::RpcController* cont
17821782
// lock_id > 0 : load
17831783
// lock_id = -1 : compaction
17841784
// lock_id = -2 : schema change
1785+
// lock_id = -3 : compaction update delete bitmap without lock
17851786
if (request->lock_id() > 0) {
17861787
std::string pending_val;
17871788
if (!delete_bitmap_keys.SerializeToString(&pending_val)) {
@@ -1794,6 +1795,15 @@ void MetaServiceImpl::update_delete_bitmap(google::protobuf::RpcController* cont
17941795
fdb_txn_size = fdb_txn_size + pending_key.size() + pending_val.size();
17951796
LOG(INFO) << "xxx update delete bitmap put pending_key=" << hex(pending_key)
17961797
<< " lock_id=" << request->lock_id() << " value_size: " << pending_val.size();
1798+
} else if (request->lock_id() == -3) {
1799+
// delete existing key
1800+
for (size_t i = 0; i < request->rowset_ids_size(); ++i) {
1801+
auto& start_key = delete_bitmap_keys.delete_bitmap_keys(i);
1802+
std::string end_key {start_key};
1803+
encode_int64(INT64_MAX, &end_key);
1804+
txn->remove(start_key, end_key);
1805+
LOG(INFO) << "xxx remove existing key=" << hex(start_key) << " tablet_id=" << tablet_id;
1806+
}
17971807
}
17981808

17991809
// 4. Update delete bitmap for curent txn
@@ -1838,7 +1848,8 @@ void MetaServiceImpl::update_delete_bitmap(google::protobuf::RpcController* cont
18381848
total_key++;
18391849
total_size += key.size() + val.size();
18401850
VLOG_DEBUG << "xxx update delete bitmap put delete_bitmap_key=" << hex(key)
1841-
<< " lock_id=" << request->lock_id() << " value_size: " << val.size();
1851+
<< " lock_id=" << request->lock_id() << " key_size: " << key.size()
1852+
<< " value_size: " << val.size();
18421853
}
18431854

18441855
err = txn->commit();

cloud/test/meta_service_test.cpp

+88
Original file line numberDiff line numberDiff line change
@@ -4768,6 +4768,94 @@ TEST(MetaServiceTest, UpdateDeleteBitmap) {
47684768
ASSERT_EQ(get_delete_bitmap_res.versions(100), 3);
47694769
ASSERT_EQ(get_delete_bitmap_res.segment_delete_bitmaps(100), "abcd4");
47704770
}
4771+
4772+
// update existing delete bitmap key
4773+
{
4774+
//first update new key
4775+
UpdateDeleteBitmapRequest update_delete_bitmap_req;
4776+
UpdateDeleteBitmapResponse update_delete_bitmap_res;
4777+
update_delete_bitmap_req.set_cloud_unique_id("test_cloud_unique_id");
4778+
update_delete_bitmap_req.set_table_id(112);
4779+
update_delete_bitmap_req.set_partition_id(123);
4780+
update_delete_bitmap_req.set_lock_id(888);
4781+
update_delete_bitmap_req.set_initiator(-1);
4782+
update_delete_bitmap_req.set_tablet_id(333);
4783+
std::string large_value = generate_random_string(300 * 1000 * 3);
4784+
update_delete_bitmap_req.add_rowset_ids("456");
4785+
update_delete_bitmap_req.add_segment_ids(0);
4786+
update_delete_bitmap_req.add_versions(2);
4787+
update_delete_bitmap_req.add_segment_delete_bitmaps(large_value);
4788+
meta_service->update_delete_bitmap(
4789+
reinterpret_cast<google::protobuf::RpcController*>(&cntl),
4790+
&update_delete_bitmap_req, &update_delete_bitmap_res, nullptr);
4791+
ASSERT_EQ(update_delete_bitmap_res.status().code(), MetaServiceCode::OK);
4792+
4793+
GetDeleteBitmapRequest get_delete_bitmap_req;
4794+
GetDeleteBitmapResponse get_delete_bitmap_res;
4795+
get_delete_bitmap_req.set_cloud_unique_id("test_cloud_unique_id");
4796+
get_delete_bitmap_req.set_tablet_id(333);
4797+
4798+
get_delete_bitmap_req.add_rowset_ids("456");
4799+
get_delete_bitmap_req.add_begin_versions(2);
4800+
get_delete_bitmap_req.add_end_versions(2);
4801+
4802+
meta_service->get_delete_bitmap(reinterpret_cast<google::protobuf::RpcController*>(&cntl),
4803+
&get_delete_bitmap_req, &get_delete_bitmap_res, nullptr);
4804+
ASSERT_EQ(get_delete_bitmap_res.status().code(), MetaServiceCode::OK);
4805+
ASSERT_EQ(get_delete_bitmap_res.rowset_ids_size(), 1);
4806+
ASSERT_EQ(get_delete_bitmap_res.segment_delete_bitmaps_size(), 1);
4807+
ASSERT_EQ(get_delete_bitmap_res.versions_size(), 1);
4808+
ASSERT_EQ(get_delete_bitmap_res.segment_delete_bitmaps_size(), 1);
4809+
4810+
ASSERT_EQ(get_delete_bitmap_res.rowset_ids(0), "456");
4811+
ASSERT_EQ(get_delete_bitmap_res.segment_ids(0), 0);
4812+
ASSERT_EQ(get_delete_bitmap_res.versions(0), 2);
4813+
ASSERT_EQ(get_delete_bitmap_res.segment_delete_bitmaps(0), large_value);
4814+
}
4815+
4816+
{
4817+
//compaction update delete bitmap without lock
4818+
UpdateDeleteBitmapRequest update_delete_bitmap_req;
4819+
UpdateDeleteBitmapResponse update_delete_bitmap_res;
4820+
update_delete_bitmap_req.set_cloud_unique_id("test_cloud_unique_id");
4821+
update_delete_bitmap_req.set_table_id(112);
4822+
update_delete_bitmap_req.set_partition_id(123);
4823+
update_delete_bitmap_req.set_unlock(true);
4824+
update_delete_bitmap_req.set_lock_id(-3);
4825+
update_delete_bitmap_req.set_initiator(-1);
4826+
update_delete_bitmap_req.set_tablet_id(333);
4827+
std::string large_value = generate_random_string(300 * 1000);
4828+
update_delete_bitmap_req.add_rowset_ids("456");
4829+
update_delete_bitmap_req.add_segment_ids(0);
4830+
update_delete_bitmap_req.add_versions(2);
4831+
update_delete_bitmap_req.add_segment_delete_bitmaps(large_value);
4832+
meta_service->update_delete_bitmap(
4833+
reinterpret_cast<google::protobuf::RpcController*>(&cntl),
4834+
&update_delete_bitmap_req, &update_delete_bitmap_res, nullptr);
4835+
ASSERT_EQ(update_delete_bitmap_res.status().code(), MetaServiceCode::OK);
4836+
4837+
GetDeleteBitmapRequest get_delete_bitmap_req;
4838+
GetDeleteBitmapResponse get_delete_bitmap_res;
4839+
get_delete_bitmap_req.set_cloud_unique_id("test_cloud_unique_id");
4840+
get_delete_bitmap_req.set_tablet_id(333);
4841+
4842+
get_delete_bitmap_req.add_rowset_ids("456");
4843+
get_delete_bitmap_req.add_begin_versions(2);
4844+
get_delete_bitmap_req.add_end_versions(2);
4845+
4846+
meta_service->get_delete_bitmap(reinterpret_cast<google::protobuf::RpcController*>(&cntl),
4847+
&get_delete_bitmap_req, &get_delete_bitmap_res, nullptr);
4848+
ASSERT_EQ(get_delete_bitmap_res.status().code(), MetaServiceCode::OK);
4849+
ASSERT_EQ(get_delete_bitmap_res.rowset_ids_size(), 1);
4850+
ASSERT_EQ(get_delete_bitmap_res.segment_delete_bitmaps_size(), 1);
4851+
ASSERT_EQ(get_delete_bitmap_res.versions_size(), 1);
4852+
ASSERT_EQ(get_delete_bitmap_res.segment_delete_bitmaps_size(), 1);
4853+
4854+
ASSERT_EQ(get_delete_bitmap_res.rowset_ids(0), "456");
4855+
ASSERT_EQ(get_delete_bitmap_res.segment_ids(0), 0);
4856+
ASSERT_EQ(get_delete_bitmap_res.versions(0), 2);
4857+
ASSERT_EQ(get_delete_bitmap_res.segment_delete_bitmaps(0), large_value);
4858+
}
47714859
}
47724860

47734861
TEST(MetaServiceTest, UpdateDeleteBitmapWithException) {

0 commit comments

Comments
 (0)