Re: [PATCH] ceph: Convert from atomic_t to refcount_t on ceph_snap_realm->nref

From: Jeff Layton
Date: Sat Jul 17 2021 - 07:21:57 EST


On Sat, 2021-07-17 at 18:06 +0800, Xiyu Yang wrote:
> refcount_t type and corresponding API can protect refcounters from
> accidental underflow and overflow and further use-after-free situations.
>
> Signed-off-by: Xiyu Yang <xiyuyang19@xxxxxxxxxxxx>
> Signed-off-by: Xin Tan <tanxin.ctf@xxxxxxxxx>
> ---
> fs/ceph/snap.c | 15 ++++++++-------
> fs/ceph/super.h | 3 ++-
> 2 files changed, 10 insertions(+), 8 deletions(-)
>
> diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
> index 4ac0606dcbd4..d4ec9c5118bd 100644
> --- a/fs/ceph/snap.c
> +++ b/fs/ceph/snap.c
> @@ -68,14 +68,15 @@ void ceph_get_snap_realm(struct ceph_mds_client *mdsc,
> lockdep_assert_held(&mdsc->snap_rwsem);
>
> dout("get_realm %p %d -> %d\n", realm,
> - atomic_read(&realm->nref), atomic_read(&realm->nref)+1);
> + refcount_read(&realm->nref), refcount_read(&realm->nref)+1);
> /*
> * since we _only_ increment realm refs or empty the empty
> * list with snap_rwsem held, adjusting the empty list here is
> * safe. we do need to protect against concurrent empty list
> * additions, however.
> */
> - if (atomic_inc_return(&realm->nref) == 1) {
> + refcount_inc(&realm->nref);
> + if (refcount_read(&realm->nref) == 1) {

The above is potentially racy as you've turned a single atomic operation
into two. Another task could come in and increment or decrement
realm->nref just after your recount_inc but before the refcount_read,
and then the read would show the wrong result.

FWIW, Yejune Deng (cc'ed) proposed a very similar patch a few months ago
that caused this regression:

https://tracker.ceph.com/issues/50281

> spin_lock(&mdsc->snap_empty_lock);
> list_del_init(&realm->empty_item);
> spin_unlock(&mdsc->snap_empty_lock);
> @@ -121,7 +122,7 @@ static struct ceph_snap_realm *ceph_create_snap_realm(
> if (!realm)
> return ERR_PTR(-ENOMEM);
>
> - atomic_set(&realm->nref, 1); /* for caller */
> + refcount_set(&realm->nref, 1); /* for caller */
> realm->ino = ino;
> INIT_LIST_HEAD(&realm->children);
> INIT_LIST_HEAD(&realm->child_item);
> @@ -209,8 +210,8 @@ static void __put_snap_realm(struct ceph_mds_client *mdsc,
> lockdep_assert_held_write(&mdsc->snap_rwsem);
>
> dout("__put_snap_realm %llx %p %d -> %d\n", realm->ino, realm,
> - atomic_read(&realm->nref), atomic_read(&realm->nref)-1);
> - if (atomic_dec_and_test(&realm->nref))
> + refcount_read(&realm->nref), refcount_read(&realm->nref)-1);
> + if (refcount_dec_and_test(&realm->nref))
> __destroy_snap_realm(mdsc, realm);
> }
>
> @@ -221,8 +222,8 @@ void ceph_put_snap_realm(struct ceph_mds_client *mdsc,
> struct ceph_snap_realm *realm)
> {
> dout("put_snap_realm %llx %p %d -> %d\n", realm->ino, realm,
> - atomic_read(&realm->nref), atomic_read(&realm->nref)-1);
> - if (!atomic_dec_and_test(&realm->nref))
> + refcount_read(&realm->nref), refcount_read(&realm->nref)-1);
> + if (!refcount_dec_and_test(&realm->nref))
> return;
>
> if (down_write_trylock(&mdsc->snap_rwsem)) {
> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> index 6b6332a5c113..3abb00d7a0eb 100644
> --- a/fs/ceph/super.h
> +++ b/fs/ceph/super.h
> @@ -2,6 +2,7 @@
> #ifndef _FS_CEPH_SUPER_H
> #define _FS_CEPH_SUPER_H
>
> +#include <linux/refcount.h>
> #include <linux/ceph/ceph_debug.h>
>
> #include <asm/unaligned.h>
> @@ -859,7 +860,7 @@ struct ceph_readdir_cache_control {
> struct ceph_snap_realm {
> u64 ino;
> struct inode *inode;
> - atomic_t nref;
> + refcount_t nref;
> struct rb_node node;
>
> u64 created, seq;

--
Jeff Layton <jlayton@xxxxxxxxxx>