Re: race between vfs_rename and do_linkat (mv and link)

From: Miklos Szeredi
Date: Wed Feb 16 2022 - 05:28:53 EST


On Wed, Feb 16, 2022 at 10:28:20AM +0100, Miklos Szeredi wrote:

> So this is a fairly special situation. How about adding a new rwsem
> (could possibly be global or per-fs)?
>
> - acquired for read in lock_rename() before inode locks
> - acquired for write in do_linkat before inode locks, but only on retry

Something like this:

diff --git a/fs/namei.c b/fs/namei.c
index 3f1829b3ab5b..dd6908cee49d 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -122,6 +122,8 @@
* PATH_MAX includes the nul terminator --RR.
*/

+static DECLARE_RWSEM(link_rwsem);
+
#define EMBEDDED_NAME_MAX (PATH_MAX - offsetof(struct filename, iname))

struct filename *
@@ -2961,6 +2963,8 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
{
struct dentry *p;

+ down_read(&link_rwsem);
+
if (p1 == p2) {
inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
return NULL;
@@ -2995,6 +2999,8 @@ void unlock_rename(struct dentry *p1, struct dentry *p2)
inode_unlock(p2->d_inode);
mutex_unlock(&p1->d_sb->s_vfs_rename_mutex);
}
+
+ up_read(&link_rwsem);
}
EXPORT_SYMBOL(unlock_rename);

@@ -4456,6 +4462,7 @@ int do_linkat(int olddfd, struct filename *old, int newdfd,
struct path old_path, new_path;
struct inode *delegated_inode = NULL;
int how = 0;
+ bool lock = false;
int error;

if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0) {
@@ -4474,10 +4481,13 @@ int do_linkat(int olddfd, struct filename *old, int newdfd,

if (flags & AT_SYMLINK_FOLLOW)
how |= LOOKUP_FOLLOW;
+retry_lock:
+ if (lock)
+ down_write(&link_rwsem);
retry:
error = filename_lookup(olddfd, old, how, &old_path, NULL);
if (error)
- goto out_putnames;
+ goto out_unlock_link;

new_dentry = filename_create(newdfd, new, &new_path,
(how & LOOKUP_REVAL));
@@ -4511,8 +4521,16 @@ int do_linkat(int olddfd, struct filename *old, int newdfd,
how |= LOOKUP_REVAL;
goto retry;
}
+ if (!lock && error == -ENOENT) {
+ path_put(&old_path);
+ lock = true;
+ goto retry_lock;
+ }
out_putpath:
path_put(&old_path);
+out_unlock_link:
+ if (lock)
+ up_write(&link_rwsem);
out_putnames:
putname(old);
putname(new);