Re: [PATCH v3 26/28] ext4: cleanup transaction restarts during inode deletion

From: Tahsin Erdogan
Date: Tue Jun 20 2017 - 05:36:33 EST


On Wed, Jun 14, 2017 at 5:11 PM, Andreas Dilger <adilger@xxxxxxxxx> wrote:
> Another option that might be less complex is to just add the xattr inodes
> to the orphan list in the main transaction (which should be a fixed number
> of credits), and then truncate/unlink the xattr inodes after the main
> transaction has completed rather than making the transactions arbitrarily
> large. At one point we even had a separate unlink thread to handle this
> in the background to reduce the unlink latency for very large files, which
> also avoids issues with nested transactions.

I think that is true for simple xattr value updates or removals, but
when we delete the parent inode we have to drop references to all
xattr inodes which again becomes arbitrarily large. Also with
deduplication patch that follows, dropping xattr inode reference does
not always cause deletion of the inode so I am not sure whether that
version can be simplified.



On Tue, Jun 20, 2017 at 2:04 AM, Tahsin Erdogan <tahsin@xxxxxxxxxx> wrote:
> During inode deletion, journal credits that will be needed are hard to
> determine, that is why we have journal extend/restart calls in several
> places. Whenever a transaction is restarted, filesystem must be in a
> consistent state because there is no atomicity guarantee beyond a
> restart call.
>
> Add ext4_xattr_ensure_credits() helper function which takes care of
> journal extend/restart logic. It also handles getting jbd2 write access
> and dirty metadata calls. This function is called at every iteration of
> handling an ea_inode reference.
>
> Signed-off-by: Tahsin Erdogan <tahsin@xxxxxxxxxx>
> ---
> v3: fixed checkpatch.pl warnings about long lines and indented label
>
> v2: made ext4_xattr_ensure_credits() static
>
> fs/ext4/inode.c | 66 ++++-----------
> fs/ext4/xattr.c | 258 ++++++++++++++++++++++++++++++++++++--------------------
> fs/ext4/xattr.h | 3 +-
> 3 files changed, 184 insertions(+), 143 deletions(-)
>
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index cf91532765a4..cd007f9757d1 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -239,7 +239,11 @@ void ext4_evict_inode(struct inode *inode)
> */
> sb_start_intwrite(inode->i_sb);
>
> - handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, extra_credits);
> + if (!IS_NOQUOTA(inode))
> + extra_credits += EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb);
> +
> + handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE,
> + ext4_blocks_for_truncate(inode)+extra_credits);
> if (IS_ERR(handle)) {
> ext4_std_error(inode->i_sb, PTR_ERR(handle));
> /*
> @@ -251,36 +255,9 @@ void ext4_evict_inode(struct inode *inode)
> sb_end_intwrite(inode->i_sb);
> goto no_delete;
> }
> +
> if (IS_SYNC(inode))
> ext4_handle_sync(handle);
> -
> - /*
> - * Delete xattr inode before deleting the main inode.
> - */
> - err = ext4_xattr_delete_inode(handle, inode, &ea_inode_array);
> - if (err) {
> - ext4_warning(inode->i_sb,
> - "couldn't delete inode's xattr (err %d)", err);
> - goto stop_handle;
> - }
> -
> - if (!IS_NOQUOTA(inode))
> - extra_credits += 2 * EXT4_QUOTA_DEL_BLOCKS(inode->i_sb);
> -
> - if (!ext4_handle_has_enough_credits(handle,
> - ext4_blocks_for_truncate(inode) + extra_credits)) {
> - err = ext4_journal_extend(handle,
> - ext4_blocks_for_truncate(inode) + extra_credits);
> - if (err > 0)
> - err = ext4_journal_restart(handle,
> - ext4_blocks_for_truncate(inode) + extra_credits);
> - if (err != 0) {
> - ext4_warning(inode->i_sb,
> - "couldn't extend journal (err %d)", err);
> - goto stop_handle;
> - }
> - }
> -
> inode->i_size = 0;
> err = ext4_mark_inode_dirty(handle, inode);
> if (err) {
> @@ -298,25 +275,17 @@ void ext4_evict_inode(struct inode *inode)
> }
> }
>
> - /*
> - * ext4_ext_truncate() doesn't reserve any slop when it
> - * restarts journal transactions; therefore there may not be
> - * enough credits left in the handle to remove the inode from
> - * the orphan list and set the dtime field.
> - */
> - if (!ext4_handle_has_enough_credits(handle, extra_credits)) {
> - err = ext4_journal_extend(handle, extra_credits);
> - if (err > 0)
> - err = ext4_journal_restart(handle, extra_credits);
> - if (err != 0) {
> - ext4_warning(inode->i_sb,
> - "couldn't extend journal (err %d)", err);
> - stop_handle:
> - ext4_journal_stop(handle);
> - ext4_orphan_del(NULL, inode);
> - sb_end_intwrite(inode->i_sb);
> - goto no_delete;
> - }
> + /* Remove xattr references. */
> + err = ext4_xattr_delete_inode(handle, inode, &ea_inode_array,
> + extra_credits);
> + if (err) {
> + ext4_warning(inode->i_sb, "xattr delete (err %d)", err);
> +stop_handle:
> + ext4_journal_stop(handle);
> + ext4_orphan_del(NULL, inode);
> + sb_end_intwrite(inode->i_sb);
> + ext4_xattr_inode_array_free(ea_inode_array);
> + goto no_delete;
> }
>
> /*
> @@ -342,7 +311,6 @@ void ext4_evict_inode(struct inode *inode)
> ext4_clear_inode(inode);
> else
> ext4_free_inode(handle, inode);
> -
> ext4_journal_stop(handle);
> sb_end_intwrite(inode->i_sb);
> ext4_xattr_inode_array_free(ea_inode_array);
> diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
> index e69550e23d64..0484df8dadd1 100644
> --- a/fs/ext4/xattr.c
> +++ b/fs/ext4/xattr.c
> @@ -108,6 +108,10 @@ const struct xattr_handler *ext4_xattr_handlers[] = {
> #define EXT4_GET_MB_CACHE(inode) (((struct ext4_sb_info *) \
> inode->i_sb->s_fs_info)->s_mb_cache)
>
> +static int
> +ext4_expand_inode_array(struct ext4_xattr_inode_array **ea_inode_array,
> + struct inode *inode);
> +
> #ifdef CONFIG_LOCKDEP
> void ext4_xattr_inode_set_class(struct inode *ea_inode)
> {
> @@ -653,6 +657,128 @@ static void ext4_xattr_update_super_block(handle_t *handle,
> }
> }
>
> +static int ext4_xattr_ensure_credits(handle_t *handle, struct inode *inode,
> + int credits, struct buffer_head *bh,
> + bool dirty, bool block_csum)
> +{
> + int error;
> +
> + if (!ext4_handle_valid(handle))
> + return 0;
> +
> + if (handle->h_buffer_credits >= credits)
> + return 0;
> +
> + error = ext4_journal_extend(handle, credits - handle->h_buffer_credits);
> + if (!error)
> + return 0;
> + if (error < 0) {
> + ext4_warning(inode->i_sb, "Extend journal (error %d)", error);
> + return error;
> + }
> +
> + if (bh && dirty) {
> + if (block_csum)
> + ext4_xattr_block_csum_set(inode, bh);
> + error = ext4_handle_dirty_metadata(handle, NULL, bh);
> + if (error) {
> + ext4_warning(inode->i_sb, "Handle metadata (error %d)",
> + error);
> + return error;
> + }
> + }
> +
> + error = ext4_journal_restart(handle, credits);
> + if (error) {
> + ext4_warning(inode->i_sb, "Restart journal (error %d)", error);
> + return error;
> + }
> +
> + if (bh) {
> + error = ext4_journal_get_write_access(handle, bh);
> + if (error) {
> + ext4_warning(inode->i_sb,
> + "Get write access failed (error %d)",
> + error);
> + return error;
> + }
> + }
> + return 0;
> +}
> +
> +static void
> +ext4_xattr_inode_remove_all(handle_t *handle, struct inode *parent,
> + struct buffer_head *bh,
> + struct ext4_xattr_entry *first, bool block_csum,
> + struct ext4_xattr_inode_array **ea_inode_array,
> + int extra_credits)
> +{
> + struct inode *ea_inode;
> + struct ext4_xattr_entry *entry;
> + bool dirty = false;
> + unsigned int ea_ino;
> + int err;
> + int credits;
> +
> + /* One credit for dec ref on ea_inode, one for orphan list addition, */
> + credits = 2 + extra_credits;
> +
> + for (entry = first; !IS_LAST_ENTRY(entry);
> + entry = EXT4_XATTR_NEXT(entry)) {
> + if (!entry->e_value_inum)
> + continue;
> + ea_ino = le32_to_cpu(entry->e_value_inum);
> + err = ext4_xattr_inode_iget(parent, ea_ino, &ea_inode);
> + if (err)
> + continue;
> +
> + err = ext4_expand_inode_array(ea_inode_array, ea_inode);
> + if (err) {
> + ext4_warning_inode(ea_inode,
> + "Expand inode array err=%d", err);
> + iput(ea_inode);
> + continue;
> + }
> +
> + err = ext4_xattr_ensure_credits(handle, parent, credits, bh,
> + dirty, block_csum);
> + if (err) {
> + ext4_warning_inode(ea_inode, "Ensure credits err=%d",
> + err);
> + continue;
> + }
> +
> + inode_lock(ea_inode);
> + clear_nlink(ea_inode);
> + ext4_orphan_add(handle, ea_inode);
> + inode_unlock(ea_inode);
> +
> + /*
> + * Forget about ea_inode within the same transaction that
> + * decrements the ref count. This avoids duplicate decrements in
> + * case the rest of the work spills over to subsequent
> + * transactions.
> + */
> + entry->e_value_inum = 0;
> + entry->e_value_size = 0;
> +
> + dirty = true;
> + }
> +
> + if (dirty) {
> + /*
> + * Note that we are deliberately skipping csum calculation for
> + * the final update because we do not expect any journal
> + * restarts until xattr block is freed.
> + */
> +
> + err = ext4_handle_dirty_metadata(handle, NULL, bh);
> + if (err)
> + ext4_warning_inode(parent,
> + "handle dirty metadata err=%d", err);
> + }
> +}
> +
> /*
> * Release the xattr block BH: If the reference count is > 1, decrement it;
> * otherwise free the block.
> @@ -1985,42 +2111,6 @@ ext4_expand_inode_array(struct ext4_xattr_inode_array **ea_inode_array,
> return 0;
> }
>
> -/**
> - * Add xattr inode to orphan list
> - */
> -static int
> -ext4_xattr_inode_orphan_add(handle_t *handle, struct inode *inode, int credits,
> - struct ext4_xattr_inode_array *ea_inode_array)
> -{
> - int idx = 0, error = 0;
> - struct inode *ea_inode;
> -
> - if (ea_inode_array == NULL)
> - return 0;
> -
> - for (; idx < ea_inode_array->count; ++idx) {
> - if (!ext4_handle_has_enough_credits(handle, credits)) {
> - error = ext4_journal_extend(handle, credits);
> - if (error > 0)
> - error = ext4_journal_restart(handle, credits);
> -
> - if (error != 0) {
> - ext4_warning(inode->i_sb,
> - "couldn't extend journal "
> - "(err %d)", error);
> - return error;
> - }
> - }
> - ea_inode = ea_inode_array->inodes[idx];
> - inode_lock(ea_inode);
> - ext4_orphan_add(handle, ea_inode);
> - inode_unlock(ea_inode);
> - /* the inode's i_count will be released by caller */
> - }
> -
> - return 0;
> -}
> -
> /*
> * ext4_xattr_delete_inode()
> *
> @@ -2033,16 +2123,23 @@ ext4_xattr_inode_orphan_add(handle_t *handle, struct inode *inode, int credits,
> */
> int
> ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
> - struct ext4_xattr_inode_array **ea_inode_array)
> + struct ext4_xattr_inode_array **ea_inode_array,
> + int extra_credits)
> {
> struct buffer_head *bh = NULL;
> struct ext4_xattr_ibody_header *header;
> struct ext4_inode *raw_inode;
> - struct ext4_iloc iloc;
> - struct ext4_xattr_entry *entry;
> - struct inode *ea_inode;
> - unsigned int ea_ino;
> - int credits = 3, error = 0;
> + struct ext4_iloc iloc = { .bh = NULL };
> + int error;
> +
> + error = ext4_xattr_ensure_credits(handle, inode, extra_credits,
> + NULL /* bh */,
> + false /* dirty */,
> + false /* block_csum */);
> + if (error) {
> + EXT4_ERROR_INODE(inode, "ensure credits (error %d)", error);
> + goto cleanup;
> + }
>
> if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR))
> goto delete_external_ea;
> @@ -2050,31 +2147,20 @@ ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
> error = ext4_get_inode_loc(inode, &iloc);
> if (error)
> goto cleanup;
> +
> + error = ext4_journal_get_write_access(handle, iloc.bh);
> + if (error)
> + goto cleanup;
> +
> raw_inode = ext4_raw_inode(&iloc);
> header = IHDR(inode, raw_inode);
> - for (entry = IFIRST(header); !IS_LAST_ENTRY(entry);
> - entry = EXT4_XATTR_NEXT(entry)) {
> - if (!entry->e_value_inum)
> - continue;
> - ea_ino = le32_to_cpu(entry->e_value_inum);
> - error = ext4_xattr_inode_iget(inode, ea_ino, &ea_inode);
> - if (error)
> - continue;
> - error = ext4_expand_inode_array(ea_inode_array, ea_inode);
> - if (error) {
> - iput(ea_inode);
> - brelse(iloc.bh);
> - goto cleanup;
> - }
> - entry->e_value_inum = 0;
> - }
> - brelse(iloc.bh);
> + ext4_xattr_inode_remove_all(handle, inode, iloc.bh, IFIRST(header),
> + false /* block_csum */, ea_inode_array,
> + extra_credits);
>
> delete_external_ea:
> if (!EXT4_I(inode)->i_file_acl) {
> - /* add xattr inode to orphan list */
> - error = ext4_xattr_inode_orphan_add(handle, inode, credits,
> - *ea_inode_array);
> + error = 0;
> goto cleanup;
> }
> bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
> @@ -2092,46 +2178,32 @@ ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
> goto cleanup;
> }
>
> - for (entry = BFIRST(bh); !IS_LAST_ENTRY(entry);
> - entry = EXT4_XATTR_NEXT(entry)) {
> - if (!entry->e_value_inum)
> - continue;
> - ea_ino = le32_to_cpu(entry->e_value_inum);
> - error = ext4_xattr_inode_iget(inode, ea_ino, &ea_inode);
> - if (error)
> - continue;
> - error = ext4_expand_inode_array(ea_inode_array, ea_inode);
> - if (error)
> - goto cleanup;
> - entry->e_value_inum = 0;
> - }
> -
> - /* add xattr inode to orphan list */
> - error = ext4_xattr_inode_orphan_add(handle, inode, credits,
> - *ea_inode_array);
> - if (error)
> - goto cleanup;
> -
> - if (!IS_NOQUOTA(inode))
> - credits += 2 * EXT4_QUOTA_DEL_BLOCKS(inode->i_sb);
> -
> - if (!ext4_handle_has_enough_credits(handle, credits)) {
> - error = ext4_journal_extend(handle, credits);
> - if (error > 0)
> - error = ext4_journal_restart(handle, credits);
> + if (ext4_has_feature_ea_inode(inode->i_sb)) {
> + error = ext4_journal_get_write_access(handle, bh);
> if (error) {
> - ext4_warning(inode->i_sb,
> - "couldn't extend journal (err %d)", error);
> + EXT4_ERROR_INODE(inode, "write access %llu",
> + EXT4_I(inode)->i_file_acl);
> goto cleanup;
> }
> + ext4_xattr_inode_remove_all(handle, inode, bh,
> + BFIRST(bh),
> + true /* block_csum */,
> + ea_inode_array,
> + extra_credits);
> }
>
> ext4_xattr_release_block(handle, inode, bh);
> + /* Update i_file_acl within the same transaction that releases block. */
> EXT4_I(inode)->i_file_acl = 0;
> -
> + error = ext4_mark_inode_dirty(handle, inode);
> + if (error) {
> + EXT4_ERROR_INODE(inode, "mark inode dirty (error %d)",
> + error);
> + goto cleanup;
> + }
> cleanup:
> + brelse(iloc.bh);
> brelse(bh);
> -
> return error;
> }
>
> diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
> index adf761518a73..b2005a2716d9 100644
> --- a/fs/ext4/xattr.h
> +++ b/fs/ext4/xattr.h
> @@ -169,7 +169,8 @@ extern int ext4_xattr_set_credits(struct inode *inode, size_t value_len);
>
> extern int ext4_xattr_inode_unlink(struct inode *inode, unsigned long ea_ino);
> extern int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
> - struct ext4_xattr_inode_array **array);
> + struct ext4_xattr_inode_array **array,
> + int extra_credits);
> extern void ext4_xattr_inode_array_free(struct ext4_xattr_inode_array *array);
>
> extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
> --
> 2.13.1.518.g3df882009-goog
>