Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DAOS-12751 dfuse: Improve evict command. #12633

Merged
merged 5 commits into from
Jul 23, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions src/client/dfuse/dfuse.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ struct dfuse_info {
*/
pthread_spinlock_t di_lock;

/* RW lock used for force filesystem query ioctl to block for pending forget calls. */
pthread_rwlock_t di_forget_lock;

/** Hash table of open inodes, this matches kernel ref counts */
struct d_hash_table dpi_iet;
/** Hash table of open pools */
Expand Down Expand Up @@ -791,6 +794,24 @@ struct dfuse_inode_entry {
bool ie_unlinked;
};

static inline struct dfuse_inode_entry *
dfuse_inode_lookup(struct dfuse_info *dfuse_info, fuse_ino_t ino)
{
d_list_t *rlink;

rlink = d_hash_rec_find(&dfuse_info->dpi_iet, &ino, sizeof(ino));
if (!rlink)
return NULL;

return container_of(rlink, struct dfuse_inode_entry, ie_htl);
}

static inline void
dfuse_inode_decref(struct dfuse_info *dfuse_info, struct dfuse_inode_entry *ie)
{
d_hash_rec_decref(&dfuse_info->dpi_iet, &ie->ie_htl);
}

extern char *duns_xattr_name;

/* Generate the inode to use for this dfs object. This is generating a single
Expand Down
4 changes: 4 additions & 0 deletions src/client/dfuse/dfuse_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -1042,6 +1042,8 @@ dfuse_fs_init(struct dfuse_info *fs_handle)

D_SPIN_INIT(&fs_handle->di_lock, 0);

D_RWLOCK_INIT(&fs_handle->di_forget_lock, 0);

for (i = 0; i < fs_handle->di_eq_count; i++) {
struct dfuse_eq *eqt = &fs_handle->di_eqt[i];

Expand Down Expand Up @@ -1071,6 +1073,7 @@ dfuse_fs_init(struct dfuse_info *fs_handle)

err_eq:
D_SPIN_DESTROY(&fs_handle->di_lock);
D_RWLOCK_DESTROY(&fs_handle->di_forget_lock);

for (i = 0; i < fs_handle->di_eq_count; i++) {
struct dfuse_eq *eqt = &fs_handle->di_eqt[i];
Expand Down Expand Up @@ -1529,6 +1532,7 @@ dfuse_fs_fini(struct dfuse_info *dfuse_info)
int i;

D_SPIN_DESTROY(&dfuse_info->di_lock);
D_RWLOCK_DESTROY(&dfuse_info->di_forget_lock);

for (i = 0; i < dfuse_info->di_eq_count; i++) {
struct dfuse_eq *eqt = &dfuse_info->di_eqt[i];
Expand Down
53 changes: 23 additions & 30 deletions src/client/dfuse/ops/forget.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* (C) Copyright 2016-2022 Intel Corporation.
* (C) Copyright 2016-2023 Intel Corporation.
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand All @@ -8,58 +8,51 @@
#include "dfuse.h"

static void
dfuse_forget_one(struct dfuse_projection_info *fs_handle,
fuse_ino_t ino, uintptr_t nlookup)
dfuse_forget_one(struct dfuse_info *dfuse_info, fuse_ino_t ino, uintptr_t nlookup)
{
d_list_t *rlink;
int rc;
struct dfuse_inode_entry *ie;
int rc;

/* One additional reference is needed because the rec_find() itself
* acquires one
*/
/* One additional reference is needed because the rec_find() itself acquires one */
nlookup++;

rlink = d_hash_rec_find(&fs_handle->dpi_iet, &ino, sizeof(ino));
if (!rlink) {
DFUSE_TRA_WARNING(fs_handle, "Unable to find ref for %#lx %lu",
ino, nlookup);
ie = dfuse_inode_lookup(dfuse_info, ino);
if (!ie) {
DFUSE_TRA_WARNING(dfuse_info, "Unable to find ref for %#lx %lu", ino, nlookup);
return;
}

DFUSE_TRA_DEBUG(container_of(rlink, struct dfuse_inode_entry, ie_htl),
"inode %#lx count %lu",
ino, nlookup);
DFUSE_TRA_DEBUG(ie, "inode %#lx count %lu", ino, nlookup);

rc = d_hash_rec_ndecref(&fs_handle->dpi_iet, nlookup, rlink);
if (rc != -DER_SUCCESS) {
DFUSE_TRA_ERROR(fs_handle, "Invalid refcount %lu on %p",
nlookup,
container_of(rlink, struct dfuse_inode_entry,
ie_htl));
}
rc = d_hash_rec_ndecref(&dfuse_info->dpi_iet, nlookup, &ie->ie_htl);
if (rc != -DER_SUCCESS)
DFUSE_TRA_ERROR(dfuse_info, "Invalid refcount %lu on %p", nlookup, ie);
}

void
dfuse_cb_forget(fuse_req_t req, fuse_ino_t ino, uintptr_t nlookup)
{
struct dfuse_projection_info *fs_handle = fuse_req_userdata(req);
struct dfuse_info *dfuse_info = fuse_req_userdata(req);

fuse_reply_none(req);

dfuse_forget_one(fs_handle, ino, nlookup);
dfuse_forget_one(dfuse_info, ino, nlookup);
}

void
dfuse_cb_forget_multi(fuse_req_t req, size_t count,
struct fuse_forget_data *forgets)
dfuse_cb_forget_multi(fuse_req_t req, size_t count, struct fuse_forget_data *forgets)
{
struct dfuse_projection_info *fs_handle = fuse_req_userdata(req);
int i;
struct dfuse_info *dfuse_info = fuse_req_userdata(req);
int i;

fuse_reply_none(req);

DFUSE_TRA_DEBUG(fs_handle, "Forgetting %zi", count);
DFUSE_TRA_DEBUG(dfuse_info, "Forgetting %zi", count);

D_RWLOCK_RDLOCK(&dfuse_info->di_forget_lock);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

D_RWLOCK_RDLOCK, not D_RWLOCK_WRLOCK?
Is this because d_hash_rec_ndecref has its own lock?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I want forgets to happen in parallel as they do now so forget takes a reader lock (although it does modify - this is protected by the hash table) but I also want the ioctl to block whilst there are pending forget operations so that takes a writer lock. It's the only way I could think of for the ioctl to pause which pending operations are happening. The kernel seems to send a small number of forgetmany operation, each with a large number of forgets so this won't guarantee that the kernel has completed but it does at least pause the reader for any in-flight to complete.


for (i = 0; i < count; i++)
dfuse_forget_one(fs_handle, forgets[i].ino, forgets[i].nlookup);
dfuse_forget_one(dfuse_info, forgets[i].ino, forgets[i].nlookup);

D_RWLOCK_UNLOCK(&dfuse_info->di_forget_lock);
}
15 changes: 8 additions & 7 deletions src/client/dfuse/ops/ioctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -304,14 +304,16 @@ handle_cont_qe_ioctl_helper(fuse_req_t req, const struct dfuse_mem_query *in_que
struct dfuse_mem_query query = {};

if (in_query && in_query->ino) {
d_list_t *rlink;
struct dfuse_inode_entry *ie;

rlink =
d_hash_rec_find(&dfuse_info->dpi_iet, &in_query->ino, sizeof(in_query->ino));
if (rlink) {
D_RWLOCK_WRLOCK(&dfuse_info->di_forget_lock);

ie = dfuse_inode_lookup(dfuse_info, in_query->ino);
if (ie) {
query.found = true;
d_hash_rec_decref(&dfuse_info->dpi_iet, rlink);
dfuse_inode_decref(dfuse_info, ie);
}
D_RWLOCK_UNLOCK(&dfuse_info->di_forget_lock);
}

query.inode_count = atomic_load_relaxed(&dfuse_info->di_inode_count);
Expand All @@ -326,11 +328,10 @@ static void
handle_cont_query_ioctl(fuse_req_t req, const void *in_buf, size_t in_bufsz)
{
struct dfuse_info *dfuse_info = fuse_req_userdata(req);
struct dfuse_mem_query query = {};
const struct dfuse_mem_query *in_query = in_buf;
int rc;

if (in_bufsz != sizeof(query))
if (in_bufsz != sizeof(struct dfuse_mem_query))
D_GOTO(err, rc = EIO);

handle_cont_qe_ioctl_helper(req, in_query);
Expand Down
4 changes: 2 additions & 2 deletions src/control/cmd/daos/filesystem.go
Original file line number Diff line number Diff line change
Expand Up @@ -499,9 +499,9 @@ func (cmd *fsDfuseQueryCmd) Execute(_ []string) error {
cmd.Infof(" Open files: %d", ap.dfuse_mem.fh_count)
if cmd.Ino != 0 {
if ap.dfuse_mem.found {
cmd.Infof(" Inode %#lx resident", cmd.Ino)
cmd.Infof(" Inode %d resident", cmd.Ino)
} else {
cmd.Infof(" Inode %#lx not resident", cmd.Ino)
cmd.Infof(" Inode %d not resident", cmd.Ino)
}
}

Expand Down
3 changes: 3 additions & 0 deletions src/tests/ftest/dfuse/daos_build.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,9 @@ def run_build_test(self, cache_mode, intercept=False, dfuse_namespace=None):
'python3 -m pip install pip --upgrade',
'python3 -m pip install -r {}/requirements.txt'.format(build_dir),
'scons -C {} --jobs {} --build-deps=only'.format(build_dir, build_jobs),
'daos filesystem query {}'.format(mount_dir),
'daos filesystem evict {}'.format(build_dir),
'daos filesystem query {}'.format(mount_dir),
Comment on lines +206 to +208
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this a fix or a workaround?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Neither, this is to see how long the forget takes and see how effective it is. Plus check that it doesn't deadlock of course.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

First results are in, all 3 commands took less than 0.00 seconds so the evict is fast, the first two commands reported 45065 inode in memory, the last one 4951. I don't think this test should be putting anything other than the build_dir so it would be interesting to see if nodes does drop to 1, if we could run the command in json mode remotely and get the evicted inode number then we could wait for it but I think that's outside the scope of this PR.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All tests had exactly the same values which shouldn't be a surprise, the files that aren't being evicted are the venv files which makes sense.

'scons -C {} --jobs {}'.format(build_dir, intercept_jobs)]
for cmd in cmds:
command = '{};{}'.format(preload_cmd, cmd)
Expand Down
19 changes: 14 additions & 5 deletions src/utils/daos_hdlr.c
Original file line number Diff line number Diff line change
Expand Up @@ -2441,7 +2441,8 @@ dfuse_count_query(struct cmd_args_s *ap)
fd = open(ap->path, O_NOFOLLOW, O_RDONLY);
if (fd < 0) {
rc = errno;
DH_PERROR_SYS(ap, rc, "Failed to open path");
if (rc != ENOENT)
DH_PERROR_SYS(ap, rc, "Failed to open path");
return daos_errno2der(rc);
}

Expand All @@ -2450,8 +2451,12 @@ dfuse_count_query(struct cmd_args_s *ap)
rc = ioctl(fd, DFUSE_IOCTL_COUNT_QUERY, &query);
if (rc < 0) {
rc = errno;
DH_PERROR_SYS(ap, rc, "ioctl failed");
rc = daos_errno2der(errno);
if (rc == ENOTTY) {
rc = -DER_NOTAPPLICABLE;
} else {
DH_PERROR_SYS(ap, rc, "ioctl failed");
rc = daos_errno2der(errno);
}
goto close;
}

Expand Down Expand Up @@ -2542,8 +2547,12 @@ dfuse_evict(struct cmd_args_s *ap)
rc = ioctl(fd, DFUSE_IOCTL_DFUSE_EVICT, &query);
if (rc < 0) {
rc = errno;
DH_PERROR_SYS(ap, rc, "ioctl failed");
rc = daos_errno2der(errno);
if (rc == ENOTTY) {
rc = -DER_NOTAPPLICABLE;
} else {
DH_PERROR_SYS(ap, rc, "ioctl failed");
rc = daos_errno2der(errno);
}
goto close;
}

Expand Down
2 changes: 1 addition & 1 deletion utils/node_local_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5424,7 +5424,7 @@ def get_cmd(cont_id):
os.symlink('broken', join(sub_dir, 'broken_s'))
os.symlink('file.0', join(sub_dir, 'link'))

rc = run_daos_cmd(conf, ['filesystem', 'copy', '--src', src_dir,
rc = run_daos_cmd(conf, ['filesystem', 'copy', '--src', sub_dir,
'--dst', f'daos://{pool.id()}/aft_base'])
assert rc.returncode == 0, rc

Expand Down