Re: [RFC v3 6/8] ext4: Convert pa->pa_inode_list and pa->pa_obj_lock into a union

From: Jan Kara
Date: Thu Sep 29 2022 - 07:40:36 EST


On Tue 27-09-22 14:46:46, Ojaswin Mujoo wrote:
> ** Splitting pa->pa_inode_list **
>
> Currently, we use the same pa->pa_inode_list to add a pa to either
> the inode preallocation list or the locality group preallocation list.
> For better clarity, split this list into a union of 2 list_heads and use
> either of the them based on the type of pa.
>
> ** Splitting pa->pa_obj_lock **
>
> Currently, pa->pa_obj_lock is either assigned &ei->i_prealloc_lock for
> inode PAs or lg_prealloc_lock for lg PAs, and is then used to lock the
> lists containing these PAs. Make the distinction between the 2 PA types
> clear by changing this lock to a union of 2 locks. Explicitly use the
> pa_lock_node.inode_lock for inode PAs and pa_lock_node.lg_lock for lg
> PAs.
>
> This patch is required so that the locality group preallocation code
> remains the same as in upcoming patches we are going to make changes to
> inode preallocation code to move from list to rbtree based
> implementation. This patch also makes it easier to review the upcoming
> patches.
>
> There are no functional changes in this patch.
>
> Signed-off-by: Ojaswin Mujoo <ojaswin@xxxxxxxxxxxxx>
> Reviewed-by: Ritesh Harjani (IBM) <ritesh.list@xxxxxxxxx>

Nice intermediate step. Feel free to add:

Reviewed-by: Jan Kara <jack@xxxxxxx>

Honza

> ---
> fs/ext4/mballoc.c | 76 +++++++++++++++++++++++++++--------------------
> fs/ext4/mballoc.h | 10 +++++--
> 2 files changed, 52 insertions(+), 34 deletions(-)
>
> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
> index dda9a72c81d9..b91710fe881f 100644
> --- a/fs/ext4/mballoc.c
> +++ b/fs/ext4/mballoc.c
> @@ -3995,7 +3995,7 @@ ext4_mb_pa_assert_overlap(struct ext4_allocation_context *ac,
> ext4_lblk_t tmp_pa_start, tmp_pa_end;
>
> rcu_read_lock();
> - list_for_each_entry_rcu(tmp_pa, &ei->i_prealloc_list, pa_inode_list) {
> + list_for_each_entry_rcu(tmp_pa, &ei->i_prealloc_list, pa_node.inode_list) {
> spin_lock(&tmp_pa->pa_lock);
> if (tmp_pa->pa_deleted == 0) {
> tmp_pa_start = tmp_pa->pa_lstart;
> @@ -4033,7 +4033,7 @@ ext4_mb_pa_adjust_overlap(struct ext4_allocation_context *ac,
>
> /* check we don't cross already preallocated blocks */
> rcu_read_lock();
> - list_for_each_entry_rcu(tmp_pa, &ei->i_prealloc_list, pa_inode_list) {
> + list_for_each_entry_rcu(tmp_pa, &ei->i_prealloc_list, pa_node.inode_list) {
> if (tmp_pa->pa_deleted)
> continue;
> spin_lock(&tmp_pa->pa_lock);
> @@ -4410,7 +4410,7 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
>
> /* first, try per-file preallocation */
> rcu_read_lock();
> - list_for_each_entry_rcu(tmp_pa, &ei->i_prealloc_list, pa_inode_list) {
> + list_for_each_entry_rcu(tmp_pa, &ei->i_prealloc_list, pa_node.inode_list) {
>
> /* all fields in this condition don't change,
> * so we can skip locking for them */
> @@ -4467,7 +4467,7 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
> for (i = order; i < PREALLOC_TB_SIZE; i++) {
> rcu_read_lock();
> list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[i],
> - pa_inode_list) {
> + pa_node.lg_list) {
> spin_lock(&tmp_pa->pa_lock);
> if (tmp_pa->pa_deleted == 0 &&
> tmp_pa->pa_free >= ac->ac_o_ex.fe_len) {
> @@ -4640,9 +4640,15 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
> list_del(&pa->pa_group_list);
> ext4_unlock_group(sb, grp);
>
> - spin_lock(pa->pa_obj_lock);
> - list_del_rcu(&pa->pa_inode_list);
> - spin_unlock(pa->pa_obj_lock);
> + if (pa->pa_type == MB_INODE_PA) {
> + spin_lock(pa->pa_node_lock.inode_lock);
> + list_del_rcu(&pa->pa_node.inode_list);
> + spin_unlock(pa->pa_node_lock.inode_lock);
> + } else {
> + spin_lock(pa->pa_node_lock.lg_lock);
> + list_del_rcu(&pa->pa_node.lg_list);
> + spin_unlock(pa->pa_node_lock.lg_lock);
> + }
>
> call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
> }
> @@ -4710,7 +4716,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
> pa->pa_len = ac->ac_b_ex.fe_len;
> pa->pa_free = pa->pa_len;
> spin_lock_init(&pa->pa_lock);
> - INIT_LIST_HEAD(&pa->pa_inode_list);
> + INIT_LIST_HEAD(&pa->pa_node.inode_list);
> INIT_LIST_HEAD(&pa->pa_group_list);
> pa->pa_deleted = 0;
> pa->pa_type = MB_INODE_PA;
> @@ -4725,14 +4731,14 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
> ei = EXT4_I(ac->ac_inode);
> grp = ext4_get_group_info(sb, ac->ac_b_ex.fe_group);
>
> - pa->pa_obj_lock = &ei->i_prealloc_lock;
> + pa->pa_node_lock.inode_lock = &ei->i_prealloc_lock;
> pa->pa_inode = ac->ac_inode;
>
> list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
>
> - spin_lock(pa->pa_obj_lock);
> - list_add_rcu(&pa->pa_inode_list, &ei->i_prealloc_list);
> - spin_unlock(pa->pa_obj_lock);
> + spin_lock(pa->pa_node_lock.inode_lock);
> + list_add_rcu(&pa->pa_node.inode_list, &ei->i_prealloc_list);
> + spin_unlock(pa->pa_node_lock.inode_lock);
> atomic_inc(&ei->i_prealloc_active);
> }
>
> @@ -4764,7 +4770,7 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
> pa->pa_len = ac->ac_b_ex.fe_len;
> pa->pa_free = pa->pa_len;
> spin_lock_init(&pa->pa_lock);
> - INIT_LIST_HEAD(&pa->pa_inode_list);
> + INIT_LIST_HEAD(&pa->pa_node.lg_list);
> INIT_LIST_HEAD(&pa->pa_group_list);
> pa->pa_deleted = 0;
> pa->pa_type = MB_GROUP_PA;
> @@ -4780,7 +4786,7 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
> lg = ac->ac_lg;
> BUG_ON(lg == NULL);
>
> - pa->pa_obj_lock = &lg->lg_prealloc_lock;
> + pa->pa_node_lock.lg_lock = &lg->lg_prealloc_lock;
> pa->pa_inode = NULL;
>
> list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
> @@ -4956,9 +4962,15 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
> list_for_each_entry_safe(pa, tmp, &list, u.pa_tmp_list) {
>
> /* remove from object (inode or locality group) */
> - spin_lock(pa->pa_obj_lock);
> - list_del_rcu(&pa->pa_inode_list);
> - spin_unlock(pa->pa_obj_lock);
> + if (pa->pa_type == MB_GROUP_PA) {
> + spin_lock(pa->pa_node_lock.lg_lock);
> + list_del_rcu(&pa->pa_node.lg_list);
> + spin_unlock(pa->pa_node_lock.lg_lock);
> + } else {
> + spin_lock(pa->pa_node_lock.inode_lock);
> + list_del_rcu(&pa->pa_node.inode_list);
> + spin_unlock(pa->pa_node_lock.inode_lock);
> + }
>
> if (pa->pa_type == MB_GROUP_PA)
> ext4_mb_release_group_pa(&e4b, pa);
> @@ -5021,8 +5033,8 @@ void ext4_discard_preallocations(struct inode *inode, unsigned int needed)
> spin_lock(&ei->i_prealloc_lock);
> while (!list_empty(&ei->i_prealloc_list) && needed) {
> pa = list_entry(ei->i_prealloc_list.prev,
> - struct ext4_prealloc_space, pa_inode_list);
> - BUG_ON(pa->pa_obj_lock != &ei->i_prealloc_lock);
> + struct ext4_prealloc_space, pa_node.inode_list);
> + BUG_ON(pa->pa_node_lock.inode_lock != &ei->i_prealloc_lock);
> spin_lock(&pa->pa_lock);
> if (atomic_read(&pa->pa_count)) {
> /* this shouldn't happen often - nobody should
> @@ -5039,7 +5051,7 @@ void ext4_discard_preallocations(struct inode *inode, unsigned int needed)
> if (pa->pa_deleted == 0) {
> ext4_mb_mark_pa_deleted(sb, pa);
> spin_unlock(&pa->pa_lock);
> - list_del_rcu(&pa->pa_inode_list);
> + list_del_rcu(&pa->pa_node.inode_list);
> list_add(&pa->u.pa_tmp_list, &list);
> needed--;
> continue;
> @@ -5331,7 +5343,7 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
>
> spin_lock(&lg->lg_prealloc_lock);
> list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
> - pa_inode_list,
> + pa_node.lg_list,
> lockdep_is_held(&lg->lg_prealloc_lock)) {
> spin_lock(&pa->pa_lock);
> if (atomic_read(&pa->pa_count)) {
> @@ -5354,7 +5366,7 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
> ext4_mb_mark_pa_deleted(sb, pa);
> spin_unlock(&pa->pa_lock);
>
> - list_del_rcu(&pa->pa_inode_list);
> + list_del_rcu(&pa->pa_node.lg_list);
> list_add(&pa->u.pa_tmp_list, &discard_list);
>
> total_entries--;
> @@ -5415,7 +5427,7 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
> /* Add the prealloc space to lg */
> spin_lock(&lg->lg_prealloc_lock);
> list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order],
> - pa_inode_list,
> + pa_node.lg_list,
> lockdep_is_held(&lg->lg_prealloc_lock)) {
> spin_lock(&tmp_pa->pa_lock);
> if (tmp_pa->pa_deleted) {
> @@ -5424,8 +5436,8 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
> }
> if (!added && pa->pa_free < tmp_pa->pa_free) {
> /* Add to the tail of the previous entry */
> - list_add_tail_rcu(&pa->pa_inode_list,
> - &tmp_pa->pa_inode_list);
> + list_add_tail_rcu(&pa->pa_node.lg_list,
> + &tmp_pa->pa_node.lg_list);
> added = 1;
> /*
> * we want to count the total
> @@ -5436,7 +5448,7 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
> lg_prealloc_count++;
> }
> if (!added)
> - list_add_tail_rcu(&pa->pa_inode_list,
> + list_add_tail_rcu(&pa->pa_node.lg_list,
> &lg->lg_prealloc_list[order]);
> spin_unlock(&lg->lg_prealloc_lock);
>
> @@ -5492,9 +5504,9 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac)
> * doesn't grow big.
> */
> if (likely(pa->pa_free)) {
> - spin_lock(pa->pa_obj_lock);
> - list_del_rcu(&pa->pa_inode_list);
> - spin_unlock(pa->pa_obj_lock);
> + spin_lock(pa->pa_node_lock.lg_lock);
> + list_del_rcu(&pa->pa_node.lg_list);
> + spin_unlock(pa->pa_node_lock.lg_lock);
> ext4_mb_add_n_trim(ac);
> }
> }
> @@ -5504,9 +5516,9 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac)
> * treat per-inode prealloc list as a lru list, then try
> * to trim the least recently used PA.
> */
> - spin_lock(pa->pa_obj_lock);
> - list_move(&pa->pa_inode_list, &ei->i_prealloc_list);
> - spin_unlock(pa->pa_obj_lock);
> + spin_lock(pa->pa_node_lock.inode_lock);
> + list_move(&pa->pa_node.inode_list, &ei->i_prealloc_list);
> + spin_unlock(pa->pa_node_lock.inode_lock);
> }
>
> ext4_mb_put_pa(ac, ac->ac_sb, pa);
> diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
> index dcda2a943cee..398a6688c341 100644
> --- a/fs/ext4/mballoc.h
> +++ b/fs/ext4/mballoc.h
> @@ -114,7 +114,10 @@ struct ext4_free_data {
> };
>
> struct ext4_prealloc_space {
> - struct list_head pa_inode_list;
> + union {
> + struct list_head inode_list; /* for inode PAs */
> + struct list_head lg_list; /* for lg PAs */
> + } pa_node;
> struct list_head pa_group_list;
> union {
> struct list_head pa_tmp_list;
> @@ -128,7 +131,10 @@ struct ext4_prealloc_space {
> ext4_grpblk_t pa_len; /* len of preallocated chunk */
> ext4_grpblk_t pa_free; /* how many blocks are free */
> unsigned short pa_type; /* pa type. inode or group */
> - spinlock_t *pa_obj_lock;
> + union {
> + spinlock_t *inode_lock; /* locks the inode list holding this PA */
> + spinlock_t *lg_lock; /* locks the lg list holding this PA */
> + } pa_node_lock;
> struct inode *pa_inode; /* hack, for history only */
> };
>
> --
> 2.31.1
>
--
Jan Kara <jack@xxxxxxxx>
SUSE Labs, CR