Skip to content
This repository was archived by the owner on Jun 23, 2022. It is now read-only.

fix(disk): fix disk space insufficient bug #851

Merged
merged 1 commit into from
Jul 5, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 30 additions & 34 deletions src/common/fs_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,18 +42,13 @@
namespace dsn {
namespace replication {

DSN_DEFINE_bool("replication",
enable_disk_available_space_check,
true,
"check if disk available space ratio below disk_min_available_space_ratio");
DSN_TAG_VARIABLE(enable_disk_available_space_check, FT_MUTABLE);
DSN_DEFINE_int32("replication",
disk_min_available_space_ratio,
10,
"if disk available space ratio "
"is below this value, all "
"replica on this disk will "
"reject client write");
"is below this value, this "
"disk will be considered as "
"space insufficient");
DSN_TAG_VARIABLE(disk_min_available_space_ratio, FT_MUTABLE);

unsigned dir_node::replicas_count() const
Expand Down Expand Up @@ -89,41 +84,44 @@ unsigned dir_node::remove(const gpid &pid)
return iter->second.erase(pid);
}

void dir_node::update_disk_stat(bool &status_changed)
bool dir_node::update_disk_stat(const bool update_disk_status)
{
FAIL_POINT_INJECT_F("update_disk_stat", [](string_view) {});
FAIL_POINT_INJECT_F("update_disk_stat", [](string_view) { return false; });
dsn::utils::filesystem::disk_space_info info;
if (!dsn::utils::filesystem::get_disk_space_info(full_dir, info)) {
derror_f("update disk space failed: dir = {}", full_dir);
return;
return false;
}
// update disk space info
disk_capacity_mb = info.capacity / 1024 / 1024;
disk_available_mb = info.available / 1024 / 1024;
disk_available_ratio = static_cast<int>(
disk_capacity_mb == 0 ? 0 : std::round(disk_available_mb * 100.0 / disk_capacity_mb));

if (!update_disk_status) {
ddebug_f("update disk space succeed: dir = {}, capacity_mb = {}, available_mb = {}, "
"available_ratio = {}%",
full_dir,
disk_capacity_mb,
disk_available_mb,
disk_available_ratio);
return false;
}
auto old_status = status;
auto new_status = disk_available_ratio < FLAGS_disk_min_available_space_ratio
? disk_status::SPACE_INSUFFICIENT
: disk_status::NORMAL;
if (old_status != new_status) {
status = new_status;
}
ddebug_f("update disk space succeed: dir = {}, capacity_mb = {}, available_mb = {}, "
"available_ratio = {}%",
"available_ratio = {}%, disk_status = {}",
full_dir,
disk_capacity_mb,
disk_available_mb,
disk_available_ratio);
// disk available space check
if (FLAGS_enable_disk_available_space_check) {
// update disk status
auto old_status = status;
auto new_status = disk_available_ratio < FLAGS_disk_min_available_space_ratio
? disk_status::SPACE_INSUFFICIENT
: disk_status::NORMAL;
if (old_status != new_status) {
status = new_status;
ddebug_f("disk({}) status update from({}) to({})",
full_dir,
enum_to_string(old_status),
enum_to_string(new_status));
}
status_changed = (old_status != new_status);
}
disk_available_ratio,
enum_to_string(status));
return (old_status != new_status);
}

fs_manager::fs_manager(bool for_test)
Expand Down Expand Up @@ -190,7 +188,7 @@ dsn::error_code fs_manager::initialize(const std::vector<std::string> &data_dirs
_available_data_dirs = data_dirs;

if (!for_test) {
update_disk_stat();
update_disk_stat(false);
}
return dsn::ERR_OK;
}
Expand Down Expand Up @@ -312,13 +310,11 @@ bool fs_manager::for_each_dir_node(const std::function<bool(const dir_node &)> &
return true;
}

void fs_manager::update_disk_stat()
void fs_manager::update_disk_stat(bool check_status_changed)
{
reset_disk_stat();
for (auto &dir_node : _dir_nodes) {
bool status_changed = false;
dir_node->update_disk_stat(status_changed);
if (status_changed) {
if (dir_node->update_disk_stat(check_status_changed)) {
_status_updated_dir_nodes.emplace_back(dir_node);
}
_total_capacity_mb += dir_node->disk_capacity_mb;
Expand Down
5 changes: 2 additions & 3 deletions src/common/fs_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
namespace dsn {
namespace replication {

DSN_DECLARE_bool(enable_disk_available_space_check);
DSN_DECLARE_int32(disk_min_available_space_ratio);

struct dir_node
Expand Down Expand Up @@ -64,7 +63,7 @@ struct dir_node
unsigned replicas_count() const;
bool has(const dsn::gpid &pid) const;
unsigned remove(const dsn::gpid &pid);
void update_disk_stat(bool &status_changed);
bool update_disk_stat(const bool update_disk_status);
};

class fs_manager
Expand All @@ -86,7 +85,7 @@ class fs_manager
void add_replica(const dsn::gpid &pid, const std::string &pid_dir);
void remove_replica(const dsn::gpid &pid);
bool for_each_dir_node(const std::function<bool(const dir_node &)> &func) const;
void update_disk_stat();
void update_disk_stat(bool check_status_changed = true);

const std::vector<std::string> &get_available_data_dirs() const
{
Expand Down
60 changes: 60 additions & 0 deletions src/common/test/fs_manager_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#include <gtest/gtest.h>
#include <dsn/utility/fail_point.h>

#include "common/fs_manager.h"

namespace dsn {
namespace replication {

TEST(fs_manager, dir_update_disk_status)
{
std::shared_ptr<dir_node> node = std::make_shared<dir_node>("tag", "path");
struct update_disk_status
{
bool update_status;
bool mock_insufficient;
disk_status::type old_disk_status;
disk_status::type new_disk_status;
bool expected_ret;
} tests[] = {
{false, false, disk_status::NORMAL, disk_status::NORMAL, false},
{false, true, disk_status::NORMAL, disk_status::NORMAL, false},
{true, false, disk_status::NORMAL, disk_status::NORMAL, false},
{true, false, disk_status::SPACE_INSUFFICIENT, disk_status::NORMAL, true},
{true, true, disk_status::NORMAL, disk_status::SPACE_INSUFFICIENT, true},
{true, true, disk_status::SPACE_INSUFFICIENT, disk_status::SPACE_INSUFFICIENT, false}};
for (const auto &test : tests) {
node->status = test.old_disk_status;
fail::setup();
if (test.mock_insufficient) {
fail::cfg("filesystem_get_disk_space_info", "return(insufficient)");
} else {
fail::cfg("filesystem_get_disk_space_info", "return(normal)");
}
ASSERT_EQ(test.expected_ret, node->update_disk_stat(test.update_status));
ASSERT_EQ(test.new_disk_status, node->status);
fail::teardown();
}
}

} // namespace replication
} // namespace dsn
4 changes: 3 additions & 1 deletion src/replica/replica.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,8 @@ const char *manual_compaction_status_to_string(manual_compaction_status status);
} \
}

DSN_DECLARE_bool(reject_write_when_disk_insufficient);

class replica : public serverlet<replica>, public ref_counter, public replica_base
{
public:
Expand Down Expand Up @@ -571,7 +573,7 @@ class replica : public serverlet<replica>, public ref_counter, public replica_ba

std::unique_ptr<security::access_controller> _access_controller;

disk_status::type _disk_status;
disk_status::type _disk_status{disk_status::NORMAL};
};
typedef dsn::ref_ptr<replica> replica_ptr;
} // namespace replication
Expand Down
9 changes: 8 additions & 1 deletion src/replica/replica_2pc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@
namespace dsn {
namespace replication {

DSN_DEFINE_bool("replication",
reject_write_when_disk_insufficient,
true,
"reject client write requests if disk status is space insufficient");
DSN_TAG_VARIABLE(reject_write_when_disk_insufficient, FT_MUTABLE);

void replica::on_client_write(dsn::message_ex *request, bool ignore_throttling)
{
_checker.only_one_thread_access();
Expand Down Expand Up @@ -85,7 +91,8 @@ void replica::on_client_write(dsn::message_ex *request, bool ignore_throttling)
return;
}

if (disk_space_insufficient() || _primary_states.secondary_disk_space_insufficient()) {
if (FLAGS_reject_write_when_disk_insufficient &&
(disk_space_insufficient() || _primary_states.secondary_disk_space_insufficient())) {
response_client_write(request, ERR_DISK_INSUFFICIENT);
return;
}
Expand Down
9 changes: 9 additions & 0 deletions src/utils/filesystem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -678,6 +678,15 @@ error_code get_process_image_path(int pid, std::string &path)

bool get_disk_space_info(const std::string &path, disk_space_info &info)
{
FAIL_POINT_INJECT_F("filesystem_get_disk_space_info", [&info](string_view str) {
info.capacity = 100 * 1024 * 1024;
if (str.find("insufficient") != string_view::npos) {
info.available = 5 * 1024 * 1024;
} else {
info.available = 50 * 1024 * 1024;
}
return true;
});

boost::system::error_code ec;
boost::filesystem::space_info in = boost::filesystem::space(path, ec);
Expand Down