[PATCH 5/7] f2fs: set fsync mark only for the last dnode

From: Jaegeuk Kim
Date: Mon Apr 25 2016 - 20:07:37 EST


In order to give atomic writes, we should consider power failure during
sync_node_pages in fsync.
So, this patch marks fsync flag only in the last dnode block.

Signed-off-by: Jaegeuk Kim <jaegeuk@xxxxxxxxxx>
---
fs/f2fs/f2fs.h | 4 +-
fs/f2fs/file.c | 14 +++++--
fs/f2fs/node.c | 106 +++++++++++++++++++++++++++++++++++++++++++++++++----
fs/f2fs/recovery.c | 9 ++---
4 files changed, 113 insertions(+), 20 deletions(-)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 269abe5..ca828b0 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -159,7 +159,6 @@ struct fsync_inode_entry {
struct inode *inode; /* vfs inode pointer */
block_t blkaddr; /* block address locating the last fsync */
block_t last_dentry; /* block address locating the last dentry */
- block_t last_inode; /* block address locating the last inode */
};

#define nats_in_cursum(jnl) (le16_to_cpu(jnl->n_nats))
@@ -1784,7 +1783,8 @@ void ra_node_page(struct f2fs_sb_info *, nid_t);
struct page *get_node_page(struct f2fs_sb_info *, pgoff_t);
struct page *get_node_page_ra(struct page *, int);
void sync_inode_page(struct dnode_of_data *);
-int fsync_node_pages(struct f2fs_sb_info *, nid_t, struct writeback_control *);
+int fsync_node_pages(struct f2fs_sb_info *, nid_t, struct writeback_control *,
+ bool);
int sync_node_pages(struct f2fs_sb_info *, struct writeback_control *);
bool alloc_nid(struct f2fs_sb_info *, nid_t *);
void alloc_nid_done(struct f2fs_sb_info *, nid_t);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 60fd64c..dc47d5c 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -182,7 +182,8 @@ static void try_to_fix_pino(struct inode *inode)
}
}

-int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
+static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
+ int datasync, bool atomic)
{
struct inode *inode = file->f_mapping->host;
struct f2fs_inode_info *fi = F2FS_I(inode);
@@ -256,7 +257,7 @@ go_write:
goto out;
}
sync_nodes:
- ret = fsync_node_pages(sbi, ino, &wbc);
+ ret = fsync_node_pages(sbi, ino, &wbc, atomic);
if (ret)
goto out;

@@ -290,6 +291,11 @@ out:
return ret;
}

+int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
+{
+ return f2fs_do_sync_file(file, start, end, datasync, false);
+}
+
static pgoff_t __get_first_dirty_index(struct address_space *mapping,
pgoff_t pgofs, int whence)
{
@@ -1407,7 +1413,7 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
}
}

- ret = f2fs_sync_file(filp, 0, LLONG_MAX, 0);
+ ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
err_out:
mnt_drop_write_file(filp);
return ret;
@@ -1465,7 +1471,7 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp)
drop_inmem_pages(inode);
if (f2fs_is_volatile_file(inode)) {
clear_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE);
- ret = f2fs_sync_file(filp, 0, LLONG_MAX, 0);
+ ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
}

mnt_drop_write_file(filp);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 8a1e211..de070a5 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1222,13 +1222,81 @@ iput_out:
iput(inode);
}

+static struct page *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino)
+{
+ pgoff_t index, end;
+ struct pagevec pvec;
+ struct page *last_page = NULL;
+
+ pagevec_init(&pvec, 0);
+ index = 0;
+ end = ULONG_MAX;
+
+ while (index <= end) {
+ int i, nr_pages;
+ nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
+ PAGECACHE_TAG_DIRTY,
+ min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
+ if (nr_pages == 0)
+ break;
+
+ for (i = 0; i < nr_pages; i++) {
+ struct page *page = pvec.pages[i];
+
+ if (unlikely(f2fs_cp_error(sbi))) {
+ f2fs_put_page(last_page, 0);
+ pagevec_release(&pvec);
+ return ERR_PTR(-EIO);
+ }
+
+ if (!IS_DNODE(page) || !is_cold_node(page))
+ continue;
+ if (ino_of_node(page) != ino)
+ continue;
+
+ lock_page(page);
+
+ if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
+continue_unlock:
+ unlock_page(page);
+ continue;
+ }
+ if (ino_of_node(page) != ino)
+ goto continue_unlock;
+
+ if (!PageDirty(page)) {
+ /* someone wrote it for us */
+ goto continue_unlock;
+ }
+
+ if (last_page)
+ f2fs_put_page(last_page, 0);
+
+ get_page(page);
+ last_page = page;
+ unlock_page(page);
+ }
+ pagevec_release(&pvec);
+ cond_resched();
+ }
+ return last_page;
+}
+
int fsync_node_pages(struct f2fs_sb_info *sbi, nid_t ino,
- struct writeback_control *wbc)
+ struct writeback_control *wbc, bool atomic)
{
pgoff_t index, end;
struct pagevec pvec;
int ret = 0;
+ struct page *last_page = NULL;
+ bool marked = false;

+ if (atomic) {
+ last_page = last_fsync_dnode(sbi, ino);
+ if (IS_ERR_OR_NULL(last_page))
+ return PTR_ERR_OR_ZERO(last_page);
+ }
+retry:
pagevec_init(&pvec, 0);
index = 0;
end = ULONG_MAX;
@@ -1245,6 +1313,7 @@ int fsync_node_pages(struct f2fs_sb_info *sbi, nid_t ino,
struct page *page = pvec.pages[i];

if (unlikely(f2fs_cp_error(sbi))) {
+ f2fs_put_page(last_page, 0);
pagevec_release(&pvec);
return -EIO;
}
@@ -1264,33 +1333,54 @@ continue_unlock:
if (ino_of_node(page) != ino)
goto continue_unlock;

- if (!PageDirty(page)) {
+ if (!PageDirty(page) && page != last_page) {
/* someone wrote it for us */
goto continue_unlock;
}

f2fs_wait_on_page_writeback(page, NODE, true);
BUG_ON(PageWriteback(page));
- if (!clear_page_dirty_for_io(page))
- goto continue_unlock;

- set_fsync_mark(page, 1);
- if (IS_INODE(page))
- set_dentry_mark(page,
+ if (!atomic || page == last_page) {
+ set_fsync_mark(page, 1);
+ if (IS_INODE(page))
+ set_dentry_mark(page,
need_dentry_mark(sbi, ino));
+ /* may be written by other thread */
+ if (!PageDirty(page))
+ set_page_dirty(page);
+ }
+
+ if (!clear_page_dirty_for_io(page))
+ goto continue_unlock;

ret = NODE_MAPPING(sbi)->a_ops->writepage(page, wbc);
if (ret) {
unlock_page(page);
+ f2fs_put_page(last_page, 0);
+ break;
+ }
+ if (page == last_page) {
+ f2fs_put_page(page, 0);
+ marked = true;
break;
}
}
pagevec_release(&pvec);
cond_resched();

- if (ret)
+ if (ret || marked)
break;
}
+ if (!ret && atomic && !marked) {
+ f2fs_msg(sbi->sb, KERN_DEBUG,
+ "Retry to write fsync mark: ino=%u, idx=%lx",
+ ino, last_page->index);
+ lock_page(last_page);
+ set_page_dirty(last_page);
+ unlock_page(last_page);
+ goto retry;
+ }
return ret ? -EIO: 0;
}

diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 2c87c12..a646d3b 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -257,11 +257,8 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
}
entry->blkaddr = blkaddr;

- if (IS_INODE(page)) {
- entry->last_inode = blkaddr;
- if (is_dent_dnode(page))
- entry->last_dentry = blkaddr;
- }
+ if (IS_INODE(page) && is_dent_dnode(page))
+ entry->last_dentry = blkaddr;
next:
/* check next segment */
blkaddr = next_blkaddr_of_node(page);
@@ -521,7 +518,7 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *head)
* In this case, we can lose the latest inode(x).
* So, call recover_inode for the inode update.
*/
- if (entry->last_inode == blkaddr)
+ if (IS_INODE(page))
recover_inode(entry->inode, page);
if (entry->last_dentry == blkaddr) {
err = recover_dentry(entry->inode, page);
--
2.6.3