Re: [PATCH v3] f2fs: fix out-of-free problem caused by atomic write

From: Chao Yu
Date: Mon Nov 06 2017 - 21:17:46 EST


On 2017/11/7 9:55, Jaegeuk Kim wrote:
> On 11/06, Yunlong Song wrote:
>> f2fs_balance_fs only actives once in the commit_inmem_pages, but there
>> are more than one page to commit, so all the other pages will miss the
>> check. This will lead to out-of-free problem when commit a very large
>> file. However, we cannot do f2fs_balance_fs for each inmem page, since
>> this will break atomicity. As a result, we should collect prefree
>> segments if needed.
>>
>> Signed-off-by: Yunlong Song <yunlong.song@xxxxxxxxxx>
>> ---
>> fs/f2fs/f2fs.h | 1 +
>> fs/f2fs/segment.c | 24 +++++++++++++++++++++++-
>> 2 files changed, 24 insertions(+), 1 deletion(-)
>>
>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>> index 13a96b8..04ce48f 100644
>> --- a/fs/f2fs/f2fs.h
>> +++ b/fs/f2fs/f2fs.h
>> @@ -610,6 +610,7 @@ struct f2fs_inode_info {
>> struct list_head inmem_pages; /* inmemory pages managed by f2fs */
>> struct task_struct *inmem_task; /* store inmemory task */
>> struct mutex inmem_lock; /* lock for inmemory pages */
>> + unsigned long inmem_blocks; /* inmemory blocks */
>> struct extent_tree *extent_tree; /* cached extent_tree entry */
>> struct rw_semaphore dio_rwsem[2];/* avoid racing between dio and gc */
>> struct rw_semaphore i_mmap_sem;
>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>> index 46dfbca..b87ede4 100644
>> --- a/fs/f2fs/segment.c
>> +++ b/fs/f2fs/segment.c
>> @@ -210,6 +210,7 @@ void register_inmem_page(struct inode *inode, struct page *page)
>> list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]);
>> spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
>> inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
>> + fi->inmem_blocks++;
>> mutex_unlock(&fi->inmem_lock);
>>
>> trace_f2fs_register_inmem_page(page, INMEM);
>> @@ -221,6 +222,7 @@ static int __revoke_inmem_pages(struct inode *inode,
>> struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
>> struct inmem_pages *cur, *tmp;
>> int err = 0;
>> + struct f2fs_inode_info *fi = F2FS_I(inode);
>>
>> list_for_each_entry_safe(cur, tmp, head, list) {
>> struct page *page = cur->page;
>> @@ -263,6 +265,7 @@ static int __revoke_inmem_pages(struct inode *inode,
>> list_del(&cur->list);
>> kmem_cache_free(inmem_entry_slab, cur);
>> dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
>> + fi->inmem_blocks--;
>> }
>> return err;
>> }
>> @@ -302,6 +305,10 @@ void drop_inmem_pages(struct inode *inode)
>> if (!list_empty(&fi->inmem_ilist))
>> list_del_init(&fi->inmem_ilist);
>> spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
>> + if (fi->inmem_blocks) {
>> + f2fs_bug_on(sbi, 1);
>> + fi->inmem_blocks = 0;
>> + }
>> mutex_unlock(&fi->inmem_lock);
>>
>> clear_inode_flag(inode, FI_ATOMIC_FILE);
>> @@ -326,6 +333,7 @@ void drop_inmem_page(struct inode *inode, struct page *page)
>>
>> f2fs_bug_on(sbi, !cur || cur->page != page);
>> list_del(&cur->list);
>> + fi->inmem_blocks--;
>> mutex_unlock(&fi->inmem_lock);
>>
>> dec_page_count(sbi, F2FS_INMEM_PAGES);
>> @@ -410,6 +418,16 @@ int commit_inmem_pages(struct inode *inode)
>>
>> INIT_LIST_HEAD(&revoke_list);
>> f2fs_balance_fs(sbi, true);
>> + if (prefree_segments(sbi)
>> + && has_not_enough_free_secs(sbi, 0,
>> + fi->inmem_blocks / BLKS_PER_SEC(sbi))) {
>> + struct cp_control cpc;
>> +
>> + cpc.reason = __get_cp_reason(sbi);
>> + err = write_checkpoint(sbi, &cpc);
>> + if (err)
>> + goto drop;
>
> What do you want to guarantee with this? How about passing target # of segments
> into f2fs_balance_fs() so that f2fs_gc() could secure wanted free space in a
> loop?

Agreed, Jaegeuk, IMO, later we can add one more dirty type F2FS_DIRTY_BUDGET in
enum count_type, and introduce below function, add them around dirtying
node/dent/imeta datas.

void f2fs_budget_space(struct f2fs_sb_info *sbi, unsigned int dirty_budget)
{
if (dirty_budget)
atomic_add(&sbi->nr_pages[F2FS_DIRTY_BUDGET], dirty_budget);

f2fs_balance_fs(sbi, dirty_budget);
}

void f2fs_release_budget(struct f2fs_sb_info *sbi, unsigned int dirty_budget)
{
atomic_dec(&sbi->nr_pages[F2FS_DIRTY_BUDGET], dirty_budget);
}

So that in has_not_enough_free_secs we can calculate all dirty datas include
F2FS_DIRTY_BUDGET type datas more precisely.

How about that?

Thanks,

>
> Thanks,
>
>> + }
>> f2fs_lock_op(sbi);
>>
>> set_inode_flag(inode, FI_ATOMIC_COMMIT);
>> @@ -429,7 +447,7 @@ int commit_inmem_pages(struct inode *inode)
>> ret = __revoke_inmem_pages(inode, &revoke_list, false, true);
>> if (ret)
>> err = ret;
>> -
>> +drop:
>> /* drop all uncommitted pages */
>> __revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
>> }
>> @@ -437,6 +455,10 @@ int commit_inmem_pages(struct inode *inode)
>> if (!list_empty(&fi->inmem_ilist))
>> list_del_init(&fi->inmem_ilist);
>> spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
>> + if (fi->inmem_blocks) {
>> + f2fs_bug_on(sbi, 1);
>> + fi->inmem_blocks = 0;
>> + }
>> mutex_unlock(&fi->inmem_lock);
>>
>> clear_inode_flag(inode, FI_ATOMIC_COMMIT);
>> --
>> 1.8.5.2
>
> .
>