[patch 5/7] VFS: add super operation writeback_inodes

From: Edward Shishkin
Date: Mon Feb 01 2010 - 20:56:43 EST


. add ->writeback_inodes() super operation.

This patch adds new operation to struct super_operations -
writeback_inodes, generic implementaion and changes
fs-writeback.c:writeback_inodes_wb() to call filesystem's
writeback_inodes if it is defined or generic implementaion
otherwise. This new operation allows filesystem to decide
itself what to flush.

Reiser4 flushes dirty pages on basic of atoms, not of inodes.
writeback_inodes_wb used to call address space flushing
method (writepages) for every dirty inode. For reiser4 it
caused having to commit atoms unnecessarily often. This
turned into substantial slowdown. Having this method helped
to fix that problem.

. add vfs library function writeback_skip_sb_inodes()

This function is for file systems which have their own means
of periodical writeout of old data.

Signed-off-by: Edward Shishkin <edward.shishkin@xxxxxxxxx>
---
fs/fs-writeback.c | 47 ++++++++++++++++++++++++++++++++++++++++++----
include/linux/fs.h | 10 +++++++++
include/linux/writeback.h | 6 +++++
3 files changed, 59 insertions(+), 4 deletions(-)

Index: linux-2.6.33-rc5-mm1/include/linux/fs.h
===================================================================
--- linux-2.6.33-rc5-mm1.orig/include/linux/fs.h
+++ linux-2.6.33-rc5-mm1/include/linux/fs.h
@@ -514,6 +514,7 @@ enum positive_aop_returns {
struct page;
struct address_space;
struct writeback_control;
+struct bdi_writeback;

struct iov_iter {
const struct iovec *iov;
@@ -1565,6 +1566,9 @@ struct super_operations {
int (*remount_fs) (struct super_block *, int *, char *);
void (*clear_inode) (struct inode *);
void (*umount_begin) (struct super_block *);
+ int (*writeback_inodes)(struct super_block *sb,
+ struct bdi_writeback *wb,
+ struct writeback_control *wbc);

int (*show_options)(struct seq_file *, struct vfsmount *);
int (*show_stats)(struct seq_file *, struct vfsmount *);
@@ -2073,6 +2077,12 @@ extern int invalidate_inode_pages2(struc
extern int invalidate_inode_pages2_range(struct address_space *mapping,
pgoff_t start, pgoff_t end);
extern int write_inode_now(struct inode *, int);
+extern void writeback_skip_sb_inodes(struct super_block *sb,
+ struct bdi_writeback *wb);
+extern void writeback_inodes_wbc(struct writeback_control *wbc);
+extern int generic_writeback_sb_inodes(struct super_block *sb,
+ struct bdi_writeback *wb,
+ struct writeback_control *wbc);
extern int filemap_fdatawrite(struct address_space *);
extern int filemap_flush(struct address_space *);
extern int filemap_fdatawait(struct address_space *);
Index: linux-2.6.33-rc5-mm1/include/linux/writeback.h
===================================================================
--- linux-2.6.33-rc5-mm1.orig/include/linux/writeback.h
+++ linux-2.6.33-rc5-mm1/include/linux/writeback.h
@@ -13,6 +13,12 @@ extern spinlock_t inode_lock;
extern struct list_head inode_in_use;
extern struct list_head inode_unused;

+static inline int is_flush_bd_task(struct task_struct *task)
+{
+ return task->flags & PF_FLUSHER;
+}
+#define current_is_flush_bd_task() is_flush_bd_task(current)
+
/*
* fs/fs-writeback.c
*/
Index: linux-2.6.33-rc5-mm1/fs/fs-writeback.c
===================================================================
--- linux-2.6.33-rc5-mm1.orig/fs/fs-writeback.c
+++ linux-2.6.33-rc5-mm1/fs/fs-writeback.c
@@ -605,9 +605,9 @@ static enum sb_pin_state pin_sb_for_writ
* Return 1, if the caller writeback routine should be
* interrupted. Otherwise return 0.
*/
-static int writeback_sb_inodes(struct super_block *sb,
- struct bdi_writeback *wb,
- struct writeback_control *wbc)
+int generic_writeback_sb_inodes(struct super_block *sb,
+ struct bdi_writeback *wb,
+ struct writeback_control *wbc)
{
while (!list_empty(&wb->b_io)) {
long pages_skipped;
@@ -658,6 +658,32 @@ static int writeback_sb_inodes(struct su
/* b_io is empty */
return 1;
}
+EXPORT_SYMBOL(generic_writeback_sb_inodes);
+
+/*
+ * This function is for file systems which have their
+ * own means of periodical write-out of old data.
+ * NOTE: inode_lock should be hold.
+ *
+ * Skip a portion of b_io inodes which belong to @sb
+ * and go sequentially in reverse order.
+ */
+void writeback_skip_sb_inodes(struct super_block *sb,
+ struct bdi_writeback *wb)
+{
+ while (1) {
+ struct inode *inode;
+
+ if (list_empty(&wb->b_io))
+ break;
+ inode = list_entry(wb->b_io.prev, struct inode, i_list);
+ if (sb != inode->i_sb)
+ break;
+ redirty_tail(inode);
+ }
+}
+EXPORT_SYMBOL(writeback_skip_sb_inodes);
+

static void writeback_inodes_wb(struct bdi_writeback *wb,
struct writeback_control *wbc)
@@ -687,7 +713,10 @@ static void writeback_inodes_wb(struct b
requeue_io(inode);
continue;
}
- ret = writeback_sb_inodes(sb, wb, wbc);
+ if (sb->s_op->writeback_inodes)
+ ret = sb->s_op->writeback_inodes(sb, wb, wbc);
+ else
+ ret = generic_writeback_sb_inodes(sb, wb, wbc);

if (state == SB_PINNED)
unpin_sb_for_writeback(sb);
@@ -704,6 +733,7 @@ void writeback_inodes_wbc(struct writeba

writeback_inodes_wb(&bdi->wb, wbc);
}
+EXPORT_SYMBOL(writeback_inodes_wbc);

/*
* The maximum number of pages to writeout in a single bdi flush/kupdate
@@ -1289,3 +1319,12 @@ int sync_inode(struct inode *inode, stru
return ret;
}
EXPORT_SYMBOL(sync_inode);
+/*
+ * Local variables:
+ * c-indentation-style: "K&R"
+ * mode-name: "LC"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * fill-column: 79
+ * End:
+ */
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/