[PATCH 6/7] ext4: Convert ext4 to new mkwrite code

From: Jan Kara
Date: Thu Sep 17 2009 - 11:22:39 EST

Next message: Cyrill Gorcunov: "Re: [PATCH v3 1/5] x86: fix fragile computation of vsyscall address"
Previous message: Jan Kara: "[PATCH 2/7] fs: Remove zeroing from nobh_writepage"
In reply to: Jan Kara: "[PATCH 2/7] fs: Remove zeroing from nobh_writepage"
Next in thread: Jan Kara: "[PATCH 3/7] ext4: Deprecate nobh mount option"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

Signed-off-by: Jan Kara <jack@xxxxxxx>
---
fs/ext4/ext4.h | 2 -
fs/ext4/extents.c | 4 -
fs/ext4/inode.c | 239 +++++++++++++++++++++++------------------------------
3 files changed, 102 insertions(+), 143 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 9714db3..9110114 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1374,8 +1374,6 @@ extern void ext4_set_aops(struct inode *inode);
extern int ext4_writepage_trans_blocks(struct inode *);
extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks);
extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
-extern int ext4_block_truncate_page(handle_t *handle,
- struct address_space *mapping, loff_t from);
extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
extern qsize_t ext4_get_reserved_space(struct inode *inode);

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 73ebfb4..76ece1b 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2996,7 +2996,6 @@ out2:

void ext4_ext_truncate(struct inode *inode)
{
- struct address_space *mapping = inode->i_mapping;
struct super_block *sb = inode->i_sb;
ext4_lblk_t last_block;
handle_t *handle;
@@ -3010,9 +3009,6 @@ void ext4_ext_truncate(struct inode *inode)
if (IS_ERR(handle))
return;

- if (inode->i_size & (sb->s_blocksize - 1))
- ext4_block_truncate_page(handle, mapping, inode->i_size);
-
if (ext4_orphan_add(handle, inode))
goto out_stop;

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index be25874..1df027c 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1451,6 +1451,53 @@ static int do_journal_get_write_access(handle_t *handle,
return ext4_journal_get_write_access(handle, bh);
}

+/*
+ * If we create a hole by this write, zero out a block partially under
+ * the hole created
+ */
+static int ext4_prepare_hole(handle_t *handle, struct inode *inode,
+ loff_t from, loff_t to, unsigned flags)
+{
+ struct buffer_head *bh;
+ unsigned hole_start, hole_len;
+ unsigned bsize = 1 << inode->i_blkbits;
+ int ret;
+
+ /* No hole created? */
+ if (from >= to)
+ return 0;
+
+ bh = block_prepare_hole_bh(inode, from, to, flags, ext4_get_block);
+ if (IS_ERR(bh))
+ return PTR_ERR(bh);
+ if (!bh)
+ return 0;
+
+ if (ext4_should_journal_data(inode)) {
+ ret = ext4_journal_get_write_access(handle, bh);
+ if (ret)
+ goto out;
+ }
+ /* Zero the tail of the block upto 'from' */
+ hole_start = from & (PAGE_CACHE_SIZE - 1);
+ if (to > ALIGN(from, bsize))
+ hole_len = bsize - (hole_start & (bsize - 1));
+ else
+ hole_len = to - from;
+
+ zero_user(bh->b_page, hole_start, hole_len);
+ if (ext4_should_order_data(inode)) {
+ ret = ext4_jbd2_file_inode(handle, inode);
+ mark_buffer_dirty(bh);
+ } else if (ext4_should_journal_data(inode))
+ ret = ext4_handle_dirty_metadata(handle, inode, bh);
+ else
+ mark_buffer_dirty(bh);
+out:
+ brelse(bh);
+ return ret;
+}
+
static int ext4_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
@@ -1465,10 +1512,13 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,

trace_ext4_write_begin(inode, pos, len, flags);
/*
- * Reserve one block more for addition to orphan list in case
- * we allocate blocks but write fails for some reason
+ * Reserve one block for addition to orphan list in case
+ * we allocate blocks but write fails for some reason and
+ * one block for zeroed block in case we create hole
+ * (needed only if we journal data)
*/
- needed_blocks = ext4_writepage_trans_blocks(inode) + 1;
+ needed_blocks = ext4_writepage_trans_blocks(inode) + 1 +
+ (pos > inode->i_size && ext4_should_journal_data(inode));
index = pos >> PAGE_CACHE_SHIFT;
from = pos & (PAGE_CACHE_SIZE - 1);
to = from + len;
@@ -1484,6 +1534,12 @@ retry:
* started */
flags |= AOP_FLAG_NOFS;

+ ret = ext4_prepare_hole(handle, inode, inode->i_size, pos, flags);
+ if (ret) {
+ ext4_journal_stop(handle);
+ goto out;
+ }
+
page = grab_cache_page_write_begin(mapping, index, flags);
if (!page) {
ext4_journal_stop(handle);
@@ -1548,23 +1604,17 @@ static int ext4_generic_write_end(struct file *file,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
{
- int i_size_changed = 0;
struct inode *inode = mapping->host;
handle_t *handle = ext4_journal_current_handle();
+ loff_t old_i_size = inode->i_size;

copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);

- /*
- * No need to use i_size_read() here, the i_size
- * cannot change under us because we hold i_mutex.
- *
- * But it's important to update i_size while still holding page lock:
- * page writeout could otherwise come in and zero beyond i_size.
- */
- if (pos + copied > inode->i_size) {
+ unlock_page(page);
+ page_cache_release(page);
+
+ if (pos + copied > inode->i_size)
i_size_write(inode, pos + copied);
- i_size_changed = 1;
- }

if (pos + copied > EXT4_I(inode)->i_disksize) {
/* We need to mark inode dirty even if
@@ -1572,19 +1622,9 @@ static int ext4_generic_write_end(struct file *file,
* bu greater than i_disksize.(hint delalloc)
*/
ext4_update_i_disksize(inode, (pos + copied));
- i_size_changed = 1;
- }
- unlock_page(page);
- page_cache_release(page);
-
- /*
- * Don't mark the inode dirty under page lock. First, it unnecessarily
- * makes the holding time of page lock longer. Second, it forces lock
- * ordering of page lock and transaction start for journaling
- * filesystems.
- */
- if (i_size_changed)
ext4_mark_inode_dirty(handle, inode);
+ }
+ block_finish_hole(inode, old_i_size, pos);

return copied;
}
@@ -1691,7 +1731,7 @@ static int ext4_journalled_write_end(struct file *file,
int ret = 0, ret2;
int partial = 0;
unsigned from, to;
- loff_t new_i_size;
+ loff_t new_i_size, old_i_size = inode->i_size;

trace_ext4_journalled_write_end(inode, pos, len, copied);
from = pos & (PAGE_CACHE_SIZE - 1);
@@ -1707,6 +1747,10 @@ static int ext4_journalled_write_end(struct file *file,
to, &partial, write_end_fn);
if (!partial)
SetPageUptodate(page);
+
+ unlock_page(page);
+ page_cache_release(page);
+
new_i_size = pos + copied;
if (new_i_size > inode->i_size)
i_size_write(inode, pos+copied);
@@ -1718,8 +1762,8 @@ static int ext4_journalled_write_end(struct file *file,
ret = ret2;
}

- unlock_page(page);
- page_cache_release(page);
+ block_finish_hole(inode, old_i_size, pos);
+
if (pos + len > inode->i_size && ext4_can_truncate(inode))
/* if we have allocated more blocks and copied
* less. We will have blocks allocated outside
@@ -2967,6 +3011,12 @@ retry:
* started */
flags |= AOP_FLAG_NOFS;

+ ret = ext4_prepare_hole(handle, inode, inode->i_size, pos, flags);
+ if (ret) {
+ ext4_journal_stop(handle);
+ goto out;
+ }
+
page = grab_cache_page_write_begin(mapping, index, flags);
if (!page) {
ext4_journal_stop(handle);
@@ -3434,100 +3484,6 @@ void ext4_set_aops(struct inode *inode)
}

/*
- * ext4_block_truncate_page() zeroes out a mapping from file offset `from'
- * up to the end of the block which corresponds to `from'.
- * This required during truncate. We need to physically zero the tail end
- * of that block so it doesn't yield old data if the file is later grown.
- */
-int ext4_block_truncate_page(handle_t *handle,
- struct address_space *mapping, loff_t from)
-{
- ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT;
- unsigned offset = from & (PAGE_CACHE_SIZE-1);
- unsigned blocksize, length, pos;
- ext4_lblk_t iblock;
- struct inode *inode = mapping->host;
- struct buffer_head *bh;
- struct page *page;
- int err = 0;
-
- page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT,
- mapping_gfp_mask(mapping) & ~__GFP_FS);
- if (!page)
- return -EINVAL;
-
- blocksize = inode->i_sb->s_blocksize;
- length = blocksize - (offset & (blocksize - 1));
- iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
-
- if (!page_has_buffers(page))
- create_empty_buffers(page, blocksize, 0);
-
- /* Find the buffer that contains "offset" */
- bh = page_buffers(page);
- pos = blocksize;
- while (offset >= pos) {
- bh = bh->b_this_page;
- iblock++;
- pos += blocksize;
- }
-
- err = 0;
- if (buffer_freed(bh)) {
- BUFFER_TRACE(bh, "freed: skip");
- goto unlock;
- }
-
- if (!buffer_mapped(bh)) {
- BUFFER_TRACE(bh, "unmapped");
- ext4_get_block(inode, iblock, bh, 0);
- /* unmapped? It's a hole - nothing to do */
- if (!buffer_mapped(bh)) {
- BUFFER_TRACE(bh, "still unmapped");
- goto unlock;
- }
- }
-
- /* Ok, it's mapped. Make sure it's up-to-date */
- if (PageUptodate(page))
- set_buffer_uptodate(bh);
-
- if (!buffer_uptodate(bh)) {
- err = -EIO;
- ll_rw_block(READ, 1, &bh);
- wait_on_buffer(bh);
- /* Uhhuh. Read error. Complain and punt. */
- if (!buffer_uptodate(bh))
- goto unlock;
- }
-
- if (ext4_should_journal_data(inode)) {
- BUFFER_TRACE(bh, "get write access");
- err = ext4_journal_get_write_access(handle, bh);
- if (err)
- goto unlock;
- }
-
- zero_user(page, offset, length);
-
- BUFFER_TRACE(bh, "zeroed end of block");
-
- err = 0;
- if (ext4_should_journal_data(inode)) {
- err = ext4_handle_dirty_metadata(handle, inode, bh);
- } else {
- if (ext4_should_order_data(inode))
- err = ext4_jbd2_file_inode(handle, inode);
- mark_buffer_dirty(bh);
- }
-
-unlock:
- unlock_page(page);
- page_cache_release(page);
- return err;
-}
-
-/*
* Probably it should be a library function... search for first non-zero word
* or memcmp with zero_page, whatever is better for particular architecture.
* Linus?
@@ -3932,7 +3888,6 @@ void ext4_truncate(struct inode *inode)
struct ext4_inode_info *ei = EXT4_I(inode);
__le32 *i_data = ei->i_data;
int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
- struct address_space *mapping = inode->i_mapping;
ext4_lblk_t offsets[4];
Indirect chain[4];
Indirect *partial;
@@ -3960,10 +3915,6 @@ void ext4_truncate(struct inode *inode)
last_block = (inode->i_size + blocksize-1)
>> EXT4_BLOCK_SIZE_BITS(inode->i_sb);

- if (inode->i_size & (blocksize - 1))
- if (ext4_block_truncate_page(handle, mapping, inode->i_size))
- goto out_stop;
-
n = ext4_block_to_path(inode, last_block, offsets, NULL);
if (n == 0)
goto out_stop; /* error */
@@ -4744,16 +4695,30 @@ static int ext4_setsize(struct inode *inode, loff_t newsize)
goto err_out;
}
}
- } else if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) {
- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ } else {
+ if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) {
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);

- if (newsize > sbi->s_bitmap_maxbytes) {
- error = -EFBIG;
- goto out;
+ if (newsize > sbi->s_bitmap_maxbytes) {
+ error = -EFBIG;
+ goto out;
+ }
}
+ handle = ext4_journal_start(inode, 1);
+ if (IS_ERR(handle)) {
+ error = PTR_ERR(handle);
+ goto err_out;
+ }
+ error = ext4_prepare_hole(handle, inode, oldsize, newsize,
+ AOP_FLAG_NOFS);
+ ext4_journal_stop(handle);
+ if (error)
+ goto err_out;
}

i_size_write(inode, newsize);
+ if (newsize > oldsize)
+ block_finish_hole(inode, oldsize, newsize);
truncate_pagecache(inode, oldsize, newsize);
ext4_truncate(inode);

@@ -5301,10 +5266,10 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
}
/*
* OK, we need to fill the hole... Do write_begin write_end
- * to do block allocation/reservation.We are not holding
- * inode.i__mutex here. That allow * parallel write_begin,
- * write_end call. lock_page prevent this from happening
- * on the same page though
+ * to do block allocation/reservation. We are not holding
+ * inode->i_mutex here. That allows parallel write_begin,
+ * write_end calls. lock_page prevent this from happening
+ * on the same page though.
*/
ret = mapping->a_ops->write_begin(file, mapping, page_offset(page),
len, AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata);
--
1.6.0.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Next message: Cyrill Gorcunov: "Re: [PATCH v3 1/5] x86: fix fragile computation of vsyscall address"
Previous message: Jan Kara: "[PATCH 2/7] fs: Remove zeroing from nobh_writepage"
In reply to: Jan Kara: "[PATCH 2/7] fs: Remove zeroing from nobh_writepage"
Next in thread: Jan Kara: "[PATCH 3/7] ext4: Deprecate nobh mount option"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]