Re: [PATCH 1/3] f2fs: turn off atomic writes when deteting abnormal behaviors

From: Jaegeuk Kim
Date: Mon Jul 23 2018 - 09:03:54 EST


On 07/16, Chao Yu wrote:
> On 2018/7/15 9:11, Jaegeuk Kim wrote:
> > In order to prevent abusing atomic writes by abnormal users, we've added a
> > threshold, 20% over memory footprint, which disallows further atomic writes.
> > Previously, however, SQLite doesn't know the files became normal, so that
> > it could write stale data and commit on revoked normal database file.
> >
> > Once f2fs detects such the abnormal behavior, this patch simply disables
> > all the atomic operations such as:
> > - write_begin() gives EINVAL to avoid stale data writes, and SQLite will call
> > F2FS_IOC_ABORT_VOLATILE_WRITE to notify aborting the transaction,
> > - F2FS_IOC_START_ATOMIC_WRITE gives EINVAL for SQLite to fall back normal
> > journal_mode,
> > - F2FS_IOC_COMMIT_ATOMIC_WRITE gives EINVAL, if the file was revoked, so that
> > Framework retries to submit the transaction given propagated SQLite error.
> >
> > Note that, this patch also turns off atomic operations, if foreground GC tries
> > to move atomic files too frequently.
>
> Well, how about just keeping original implementation: shutdown atomic write for
> those files which are really affect fggc? Since intention of the original
> behavior is targeting to abnormal atomic write usage, e.g. open atomic_write
> file for very long time, then fggc will be blocked each time when moving its
> block. So shutdown it, fggc will recover.

The point here is stopping sqlite to keep going wrong data writes even after
we already revoked blocks.

Thanks,

>
> Thanks,
>
> >
> > Signed-off-by: Jaegeuk Kim <jaegeuk@xxxxxxxxxx>
> > ---
> > fs/f2fs/data.c | 7 ++++---
> > fs/f2fs/f2fs.h | 4 ++--
> > fs/f2fs/file.c | 6 +++++-
> > fs/f2fs/gc.c | 4 +---
> > fs/f2fs/segment.c | 21 +++++++++++----------
> > 5 files changed, 23 insertions(+), 19 deletions(-)
> >
> > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> > index 5e53d210e222..c9e75aa61c24 100644
> > --- a/fs/f2fs/data.c
> > +++ b/fs/f2fs/data.c
> > @@ -2247,8 +2247,9 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
> > trace_f2fs_write_begin(inode, pos, len, flags);
> >
> > if (f2fs_is_atomic_file(inode) &&
> > - !f2fs_available_free_memory(sbi, INMEM_PAGES)) {
> > - err = -ENOMEM;
> > + (is_sbi_flag_set(sbi, SBI_DISABLE_ATOMIC_WRITE) ||
> > + !f2fs_available_free_memory(sbi, INMEM_PAGES))) {
> > + err = -EINVAL;
> > drop_atomic = true;
> > goto fail;
> > }
> > @@ -2331,7 +2332,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
> > f2fs_put_page(page, 1);
> > f2fs_write_failed(mapping, pos + len);
> > if (drop_atomic)
> > - f2fs_drop_inmem_pages_all(sbi, false);
> > + f2fs_disable_atomic_write(sbi);
> > return err;
> > }
> >
> > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > index 4d8b1de83143..1d5c8d543eda 100644
> > --- a/fs/f2fs/f2fs.h
> > +++ b/fs/f2fs/f2fs.h
> > @@ -621,7 +621,6 @@ enum {
> >
> > enum {
> > GC_FAILURE_PIN,
> > - GC_FAILURE_ATOMIC,
> > MAX_GC_FAILURE
> > };
> >
> > @@ -1066,6 +1065,7 @@ enum {
> > SBI_POR_DOING, /* recovery is doing or not */
> > SBI_NEED_SB_WRITE, /* need to recover superblock */
> > SBI_NEED_CP, /* need to checkpoint */
> > + SBI_DISABLE_ATOMIC_WRITE, /* turn off atomic write */
> > };
> >
> > enum {
> > @@ -2833,7 +2833,7 @@ void f2fs_destroy_node_manager_caches(void);
> > */
> > bool f2fs_need_SSR(struct f2fs_sb_info *sbi);
> > void f2fs_register_inmem_page(struct inode *inode, struct page *page);
> > -void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi, bool gc_failure);
> > +void f2fs_disable_atomic_write(struct f2fs_sb_info *sbi);
> > void f2fs_drop_inmem_pages(struct inode *inode);
> > void f2fs_drop_inmem_page(struct inode *inode, struct page *page);
> > int f2fs_commit_inmem_pages(struct inode *inode);
> > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> > index 6880c6f78d58..b029a4ed3bb0 100644
> > --- a/fs/f2fs/file.c
> > +++ b/fs/f2fs/file.c
> > @@ -1682,6 +1682,9 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
> > if (!S_ISREG(inode->i_mode))
> > return -EINVAL;
> >
> > + if (is_sbi_flag_set(F2FS_I_SB(inode), SBI_DISABLE_ATOMIC_WRITE))
> > + return -EINVAL;
> > +
> > ret = mnt_want_write_file(filp);
> > if (ret)
> > return ret;
> > @@ -1750,7 +1753,6 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
> > ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
> > if (!ret) {
> > clear_inode_flag(inode, FI_ATOMIC_FILE);
> > - F2FS_I(inode)->i_gc_failures[GC_FAILURE_ATOMIC] = 0;
> > stat_dec_atomic_write(inode);
> > }
> > } else {
> > @@ -1853,6 +1855,8 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp)
> > ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
> > }
> >
> > + clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
> > +
> > inode_unlock(inode);
> >
> > mnt_drop_write_file(filp);
> > diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> > index 9093be6e7a7d..6d762f3cdfc7 100644
> > --- a/fs/f2fs/gc.c
> > +++ b/fs/f2fs/gc.c
> > @@ -629,7 +629,6 @@ static void move_data_block(struct inode *inode, block_t bidx,
> > goto out;
> >
> > if (f2fs_is_atomic_file(inode)) {
> > - F2FS_I(inode)->i_gc_failures[GC_FAILURE_ATOMIC]++;
> > F2FS_I_SB(inode)->skipped_atomic_files[gc_type]++;
> > goto out;
> > }
> > @@ -745,7 +744,6 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
> > goto out;
> >
> > if (f2fs_is_atomic_file(inode)) {
> > - F2FS_I(inode)->i_gc_failures[GC_FAILURE_ATOMIC]++;
> > F2FS_I_SB(inode)->skipped_atomic_files[gc_type]++;
> > goto out;
> > }
> > @@ -1100,7 +1098,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
> > if (has_not_enough_free_secs(sbi, sec_freed, 0)) {
> > if (skipped_round > MAX_SKIP_ATOMIC_COUNT &&
> > skipped_round * 2 >= round)
> > - f2fs_drop_inmem_pages_all(sbi, true);
> > + f2fs_disable_atomic_write(sbi);
> > segno = NULL_SEGNO;
> > goto gc_more;
> > }
> > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> > index 9efce174c51a..05877a2f1894 100644
> > --- a/fs/f2fs/segment.c
> > +++ b/fs/f2fs/segment.c
> > @@ -274,11 +274,14 @@ static int __revoke_inmem_pages(struct inode *inode,
> > return err;
> > }
> >
> > -void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi, bool gc_failure)
> > +void f2fs_disable_atomic_write(struct f2fs_sb_info *sbi)
> > {
> > struct list_head *head = &sbi->inode_list[ATOMIC_FILE];
> > struct inode *inode;
> > struct f2fs_inode_info *fi;
> > +
> > + /* just turn it off */
> > + set_sbi_flag(sbi, SBI_DISABLE_ATOMIC_WRITE);
> > next:
> > spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
> > if (list_empty(head)) {
> > @@ -290,17 +293,16 @@ void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi, bool gc_failure)
> > spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
> >
> > if (inode) {
> > - if (gc_failure) {
> > - if (fi->i_gc_failures[GC_FAILURE_ATOMIC])
> > - goto drop;
> > - goto skip;
> > + inode_lock(inode);
> > + /* need to check whether it was already revoked */
> > + if (f2fs_is_atomic_file(inode)) {
> > + f2fs_drop_inmem_pages(inode);
> > + set_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
> > }
> > -drop:
> > - set_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
> > - f2fs_drop_inmem_pages(inode);
> > + inode_unlock(inode);
> > iput(inode);
> > }
> > -skip:
> > +
> > congestion_wait(BLK_RW_ASYNC, HZ/50);
> > cond_resched();
> > goto next;
> > @@ -320,7 +322,6 @@ void f2fs_drop_inmem_pages(struct inode *inode)
> > mutex_unlock(&fi->inmem_lock);
> >
> > clear_inode_flag(inode, FI_ATOMIC_FILE);
> > - fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0;
> > stat_dec_atomic_write(inode);
> > }
> >
> >