Re: [PATCH v2] f2fs: fix out-of-free problem caused by atomic write

From: Chao Yu
Date: Fri Nov 03 2017 - 11:23:44 EST


On 2017/11/3 22:40, Yunlong Song wrote:
> Test:
> Newest kernel source code from f2fs-dev
> 1G zram with f2fs
> 8 threads to atomic write one same file on zram
> there are four kinds of atomic write at the same time:
> 1 no atomic start, with atomic commit
> 2 no atomic start, no atomic commit
> 3 atomic start, with atomic commit
> 4 atomic start, no atomic commit
>
> And I add dump_stack after the check as following,
> +ÂÂÂÂÂÂ if ((sbi->user_block_count - valid_user_blocks(sbi)) <
> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂ fi->inmem_blocks) {

valid_user_blocks contains fi->inmem_blocks and all reserved new node blocks?

Thanks,

> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂ dump_stack();
> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂ err = -ENOSPC;
> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂ goto drop;
> +ÂÂÂÂÂÂ }
>
> then we have:
>
> [Â 136.237247] F2FS-fs (zram1): Unexpected flush for atomic writes: ino=4, npages=8193
> [Â 136.952469] CPU: 1 PID: 1274 Comm: atomic_t2 Not tainted 4.14.0-rc4+ #109
> [Â 136.952947] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.8.2-0-g33fbe13 by qemu-project.org 04/01/2014
> [Â 136.953162] Call Trace:
> [Â 136.953162]Â dump_stack+0x4d/0x6e
> [Â 136.953162]Â commit_inmem_pages+0x258/0x270
> [Â 136.953162]Â ? __sb_start_write+0x48/0x80
> [Â 136.953162]Â ? __mnt_want_write_file+0x18/0x30
> [Â 136.953162]Â f2fs_ioctl+0x1025/0x1e30
> [Â 136.953162]Â ? up_write+0x25/0x30
> [Â 136.953162]Â ? f2fs_file_write_iter+0xf3/0x1e0
> [Â 136.953162]Â ? selinux_file_ioctl+0x114/0x1e0
> [Â 136.953162]Â do_vfs_ioctl+0x96/0x5a0
> [Â 136.953162]Â SyS_ioctl+0x79/0x90
> [Â 136.953162]Â ? SyS_lseek+0x87/0xb0
> [Â 136.953162]Â entry_SYSCALL_64_fastpath+0x13/0x94
> [Â 136.953162] RIP: 0033:0x434b97
> [Â 136.953162] RSP: 002b:00007ffc68859de8 EFLAGS: 00000202 ORIG_RAX: 0000000000000010
> [Â 136.953162] RAX: ffffffffffffffda RBX: 00000000006b78e0 RCX: 0000000000434b97
> [Â 136.953162] RDX: 00000000006b70e8 RSI: 000000000000f502 RDI: 0000000000000003
> [Â 136.953162] RBP: 0000000002000010 R08: 00000000006b70e8 R09: 00000000006b7160
> [Â 136.953162] R10: 0000000000000022 R11: 0000000000000202 R12: 00007f491a1c4010
> [Â 136.953162] R13: 0000000002001000 R14: 0000000002000000 R15: 00000000006b7938
>
> So I think we should add the check code.
>
> On 2017/11/3 12:48, Yunlong Song wrote:
>> Because I found that it will still lead to out-of-free problem with out that check.
>> I trace and find that it is possible that the committing date pages of the atomic
>> file is bigger than the sbi->user_block_count - valid_user_blocks(sbi), so I add
>> this check.
>>
>> On 2017/11/3 11:46, Jaegeuk Kim wrote:
>>> On 10/30, Yunlong Song wrote:
>>>> f2fs_balance_fs only actives once in the commit_inmem_pages, but there
>>>> are more than one page to commit, so all the other pages will miss the
>>>> check. This will lead to out-of-free problem when commit a very large
>>>> file. However, we cannot do f2fs_balance_fs for each inmem page, since
>>>> this will break atomicity. As a result, we should collect prefree
>>>> segments if needed and stop atomic commit when there are not enough
>>>> available blocks to write atomic pages.
>>>>
>>>> Signed-off-by: Yunlong Song <yunlong.song@xxxxxxxxxx>
>>>> ---
>>>> Â fs/f2fs/f2fs.hÂÂÂ |Â 1 +
>>>> Â fs/f2fs/segment.c | 29 ++++++++++++++++++++++++++++-
>>>> Â 2 files changed, 29 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>>> index 13a96b8..04ce48f 100644
>>>> --- a/fs/f2fs/f2fs.h
>>>> +++ b/fs/f2fs/f2fs.h
>>>> @@ -610,6 +610,7 @@ struct f2fs_inode_info {
>>>> ÂÂÂÂÂ struct list_head inmem_pages;ÂÂÂ /* inmemory pages managed by f2fs */
>>>> ÂÂÂÂÂ struct task_struct *inmem_task;ÂÂÂ /* store inmemory task */
>>>> ÂÂÂÂÂ struct mutex inmem_lock;ÂÂÂ /* lock for inmemory pages */
>>>> +ÂÂÂ unsigned long inmem_blocks;ÂÂÂ /* inmemory blocks */
>>>> ÂÂÂÂÂ struct extent_tree *extent_tree;ÂÂÂ /* cached extent_tree entry */
>>>> ÂÂÂÂÂ struct rw_semaphore dio_rwsem[2];/* avoid racing between dio and gc */
>>>> ÂÂÂÂÂ struct rw_semaphore i_mmap_sem;
>>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>>> index 46dfbca..813c110 100644
>>>> --- a/fs/f2fs/segment.c
>>>> +++ b/fs/f2fs/segment.c
>>>> @@ -210,6 +210,7 @@ void register_inmem_page(struct inode *inode, struct page *page)
>>>> ÂÂÂÂÂÂÂÂÂ list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]);
>>>> ÂÂÂÂÂ spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
>>>> ÂÂÂÂÂ inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
>>>> +ÂÂÂ fi->inmem_blocks++;
>>>> ÂÂÂÂÂ mutex_unlock(&fi->inmem_lock);
>>>> ÂÂÂÂÂÂÂ trace_f2fs_register_inmem_page(page, INMEM);
>>>> @@ -221,6 +222,7 @@ static int __revoke_inmem_pages(struct inode *inode,
>>>> ÂÂÂÂÂ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
>>>> ÂÂÂÂÂ struct inmem_pages *cur, *tmp;
>>>> ÂÂÂÂÂ int err = 0;
>>>> +ÂÂÂ struct f2fs_inode_info *fi = F2FS_I(inode);
>>>> ÂÂÂÂÂÂÂ list_for_each_entry_safe(cur, tmp, head, list) {
>>>> ÂÂÂÂÂÂÂÂÂ struct page *page = cur->page;
>>>> @@ -263,6 +265,7 @@ static int __revoke_inmem_pages(struct inode *inode,
>>>> ÂÂÂÂÂÂÂÂÂ list_del(&cur->list);
>>>> ÂÂÂÂÂÂÂÂÂ kmem_cache_free(inmem_entry_slab, cur);
>>>> ÂÂÂÂÂÂÂÂÂ dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
>>>> +ÂÂÂÂÂÂÂ fi->inmem_blocks--;
>>>> ÂÂÂÂÂ }
>>>> ÂÂÂÂÂ return err;
>>>> Â }
>>>> @@ -302,6 +305,10 @@ void drop_inmem_pages(struct inode *inode)
>>>> ÂÂÂÂÂ if (!list_empty(&fi->inmem_ilist))
>>>> ÂÂÂÂÂÂÂÂÂ list_del_init(&fi->inmem_ilist);
>>>> ÂÂÂÂÂ spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
>>>> +ÂÂÂ if (fi->inmem_blocks) {
>>>> +ÂÂÂÂÂÂÂ f2fs_bug_on(sbi, 1);
>>>> +ÂÂÂÂÂÂÂ fi->inmem_blocks = 0;
>>>> +ÂÂÂ }
>>>> ÂÂÂÂÂ mutex_unlock(&fi->inmem_lock);
>>>> ÂÂÂÂÂÂÂ clear_inode_flag(inode, FI_ATOMIC_FILE);
>>>> @@ -326,6 +333,7 @@ void drop_inmem_page(struct inode *inode, struct page *page)
>>>> ÂÂÂÂÂÂÂ f2fs_bug_on(sbi, !cur || cur->page != page);
>>>> ÂÂÂÂÂ list_del(&cur->list);
>>>> +ÂÂÂ fi->inmem_blocks--;
>>>> ÂÂÂÂÂ mutex_unlock(&fi->inmem_lock);
>>>> ÂÂÂÂÂÂÂ dec_page_count(sbi, F2FS_INMEM_PAGES);
>>>> @@ -410,11 +418,26 @@ int commit_inmem_pages(struct inode *inode)
>>>> ÂÂÂÂÂÂÂ INIT_LIST_HEAD(&revoke_list);
>>>> ÂÂÂÂÂ f2fs_balance_fs(sbi, true);
>>>> +ÂÂÂ if (prefree_segments(sbi)
>>>> +ÂÂÂÂÂÂÂ && has_not_enough_free_secs(sbi, 0,
>>>> +ÂÂÂÂÂÂÂ fi->inmem_blocks / BLKS_PER_SEC(sbi))) {
>>>> +ÂÂÂÂÂÂÂ struct cp_control cpc;
>>>> +
>>>> +ÂÂÂÂÂÂÂ cpc.reason = __get_cp_reason(sbi);
>>>> +ÂÂÂÂÂÂÂ err = write_checkpoint(sbi, &cpc);
>>>> +ÂÂÂÂÂÂÂ if (err)
>>>> +ÂÂÂÂÂÂÂÂÂÂÂ goto drop;
>>>> +ÂÂÂ }
>>>> ÂÂÂÂÂ f2fs_lock_op(sbi);
>>>> ÂÂÂÂÂÂÂ set_inode_flag(inode, FI_ATOMIC_COMMIT);
>>>> ÂÂÂÂÂÂÂ mutex_lock(&fi->inmem_lock);
>>>> +ÂÂÂ if ((sbi->user_block_count - valid_user_blocks(sbi)) <
>>> What does this mean? We already allocated blocks successfully?
>>>
>>>> +ÂÂÂÂÂÂÂ fi->inmem_blocks) {
>>>> +ÂÂÂÂÂÂÂ err = -ENOSPC;
>>>> +ÂÂÂÂÂÂÂ goto drop;
>>>> +ÂÂÂ }
>>>> ÂÂÂÂÂ err = __commit_inmem_pages(inode, &revoke_list);
>>>> ÂÂÂÂÂ if (err) {
>>>> ÂÂÂÂÂÂÂÂÂ int ret;
>>>> @@ -429,7 +452,7 @@ int commit_inmem_pages(struct inode *inode)
>>>> ÂÂÂÂÂÂÂÂÂ ret = __revoke_inmem_pages(inode, &revoke_list, false, true);
>>>> ÂÂÂÂÂÂÂÂÂ if (ret)
>>>> ÂÂÂÂÂÂÂÂÂÂÂÂÂ err = ret;
>>>> -
>>>> +drop:
>>>> ÂÂÂÂÂÂÂÂÂ /* drop all uncommitted pages */
>>>> ÂÂÂÂÂÂÂÂÂ __revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
>>>> ÂÂÂÂÂ }
>>>> @@ -437,6 +460,10 @@ int commit_inmem_pages(struct inode *inode)
>>>> ÂÂÂÂÂ if (!list_empty(&fi->inmem_ilist))
>>>> ÂÂÂÂÂÂÂÂÂ list_del_init(&fi->inmem_ilist);
>>>> ÂÂÂÂÂ spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
>>>> +ÂÂÂ if (fi->inmem_blocks) {
>>>> +ÂÂÂÂÂÂÂ f2fs_bug_on(sbi, 1);
>>>> +ÂÂÂÂÂÂÂ fi->inmem_blocks = 0;
>>>> +ÂÂÂ }
>>>> ÂÂÂÂÂ mutex_unlock(&fi->inmem_lock);
>>>> ÂÂÂÂÂÂÂ clear_inode_flag(inode, FI_ATOMIC_COMMIT);
>>>> --Â
>>>> 1.8.5.2
>>> .
>>>
>>
>