[ 062/117] ext4: fix ext4_writepages() in presence of truncate

From: Greg Kroah-Hartman
Date: Tue Sep 24 2013 - 20:37:22 EST

Next message: Greg Kroah-Hartman: "[ 055/117] x86/mce: Pay no attention to F bit in MCACOD when parsing UC errors"
Previous message: Greg Kroah-Hartman: "[ 063/117] ext4: simplify truncation code in ext4_setattr()"
In reply to: Greg Kroah-Hartman: "[ 063/117] ext4: simplify truncation code in ext4_setattr()"
Next in thread: Ben Hutchings: "Re: [ 062/117] ext4: fix ext4_writepages() in presence of truncate"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

3.11-stable review patch. If anyone has any objections, please let me know.

------------------

From: Jan Kara <jack@xxxxxxx>

commit 5f1132b2ba8c873f25982cf45917e8455fb6c962 upstream.

Inode size can arbitrarily change while writeback is in progress. When
ext4_writepages() has prepared a long extent for mapping and truncate
then reduces i_size, mpage_map_and_submit_buffers() will always map just
one buffer in a page instead of all of them due to lblk < blocks check.
So we end up not using all blocks we've allocated (thus leaking them)
and also delalloc accounting goes wrong manifesting as a warning like:

ext4_da_release_space:1333: ext4_da_release_space: ino 12, to_free 1
with only 0 reserved data blocks

Note that the problem can happen only when blocksize < pagesize because
otherwise we have only a single buffer in the page.

Fix the problem by removing the size check from the mapping loop. We
have an extent allocated so we have to use it all before checking for
i_size. We also rename add_page_bufs_to_extent() to
mpage_process_page_bufs() and make that function submit the page for IO
if all buffers (upto EOF) in it are mapped.

Reported-by: Dave Jones <davej@xxxxxxxxxx>
Reported-by: Zheng Liu <gnehzuil.liu@xxxxxxxxx>
Signed-off-by: Jan Kara <jack@xxxxxxx>
Signed-off-by: "Theodore Ts'o" <tytso@xxxxxxx>
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>

---
fs/ext4/inode.c | 107 ++++++++++++++++++++++++++++++++++----------------------
1 file changed, 66 insertions(+), 41 deletions(-)

--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1890,6 +1890,26 @@ static int ext4_writepage(struct page *p
return ret;
}

+static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page)
+{
+ int len;
+ loff_t size = i_size_read(mpd->inode);
+ int err;
+
+ BUG_ON(page->index != mpd->first_page);
+ if (page->index == size >> PAGE_CACHE_SHIFT)
+ len = size & ~PAGE_CACHE_MASK;
+ else
+ len = PAGE_CACHE_SIZE;
+ clear_page_dirty_for_io(page);
+ err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc);
+ if (!err)
+ mpd->wbc->nr_to_write--;
+ mpd->first_page++;
+
+ return err;
+}
+
#define BH_FLAGS ((1 << BH_Unwritten) | (1 << BH_Delay))

/*
@@ -1948,12 +1968,29 @@ static bool mpage_add_bh_to_extent(struc
return false;
}

-static bool add_page_bufs_to_extent(struct mpage_da_data *mpd,
- struct buffer_head *head,
- struct buffer_head *bh,
- ext4_lblk_t lblk)
+/*
+ * mpage_process_page_bufs - submit page buffers for IO or add them to extent
+ *
+ * @mpd - extent of blocks for mapping
+ * @head - the first buffer in the page
+ * @bh - buffer we should start processing from
+ * @lblk - logical number of the block in the file corresponding to @bh
+ *
+ * Walk through page buffers from @bh upto @head (exclusive) and either submit
+ * the page for IO if all buffers in this page were mapped and there's no
+ * accumulated extent of buffers to map or add buffers in the page to the
+ * extent of buffers to map. The function returns 1 if the caller can continue
+ * by processing the next page, 0 if it should stop adding buffers to the
+ * extent to map because we cannot extend it anymore. It can also return value
+ * < 0 in case of error during IO submission.
+ */
+static int mpage_process_page_bufs(struct mpage_da_data *mpd,
+ struct buffer_head *head,
+ struct buffer_head *bh,
+ ext4_lblk_t lblk)
{
struct inode *inode = mpd->inode;
+ int err;
ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1)
>> inode->i_blkbits;

@@ -1963,32 +2000,18 @@ static bool add_page_bufs_to_extent(stru
if (lblk >= blocks || !mpage_add_bh_to_extent(mpd, lblk, bh)) {
/* Found extent to map? */
if (mpd->map.m_len)
- return false;
+ return 0;
/* Everything mapped so far and we hit EOF */
- return true;
+ break;
}
} while (lblk++, (bh = bh->b_this_page) != head);
- return true;
-}
-
-static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page)
-{
- int len;
- loff_t size = i_size_read(mpd->inode);
- int err;
-
- BUG_ON(page->index != mpd->first_page);
- if (page->index == size >> PAGE_CACHE_SHIFT)
- len = size & ~PAGE_CACHE_MASK;
- else
- len = PAGE_CACHE_SIZE;
- clear_page_dirty_for_io(page);
- err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc);
- if (!err)
- mpd->wbc->nr_to_write--;
- mpd->first_page++;
-
- return err;
+ /* So far everything mapped? Submit the page for IO. */
+ if (mpd->map.m_len == 0) {
+ err = mpage_submit_page(mpd, head->b_page);
+ if (err < 0)
+ return err;
+ }
+ return lblk < blocks;
}

/*
@@ -2012,8 +2035,6 @@ static int mpage_map_and_submit_buffers(
struct inode *inode = mpd->inode;
struct buffer_head *head, *bh;
int bpp_bits = PAGE_CACHE_SHIFT - inode->i_blkbits;
- ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1)
- >> inode->i_blkbits;
pgoff_t start, end;
ext4_lblk_t lblk;
sector_t pblock;
@@ -2048,18 +2069,26 @@ static int mpage_map_and_submit_buffers(
*/
mpd->map.m_len = 0;
mpd->map.m_flags = 0;
- add_page_bufs_to_extent(mpd, head, bh,
- lblk);
+ /*
+ * FIXME: If dioread_nolock supports
+ * blocksize < pagesize, we need to make
+ * sure we add size mapped so far to
+ * io_end->size as the following call
+ * can submit the page for IO.
+ */
+ err = mpage_process_page_bufs(mpd, head,
+ bh, lblk);
pagevec_release(&pvec);
- return 0;
+ if (err > 0)
+ err = 0;
+ return err;
}
if (buffer_delay(bh)) {
clear_buffer_delay(bh);
bh->b_blocknr = pblock++;
}
clear_buffer_unwritten(bh);
- } while (++lblk < blocks &&
- (bh = bh->b_this_page) != head);
+ } while (lblk++, (bh = bh->b_this_page) != head);

/*
* FIXME: This is going to break if dioread_nolock
@@ -2328,14 +2357,10 @@ static int mpage_prepare_extent_to_map(s
lblk = ((ext4_lblk_t)page->index) <<
(PAGE_CACHE_SHIFT - blkbits);
head = page_buffers(page);
- if (!add_page_bufs_to_extent(mpd, head, head, lblk))
+ err = mpage_process_page_bufs(mpd, head, head, lblk);
+ if (err <= 0)
goto out;
- /* So far everything mapped? Submit the page for IO. */
- if (mpd->map.m_len == 0) {
- err = mpage_submit_page(mpd, page);
- if (err < 0)
- goto out;
- }
+ err = 0;

/*
* Accumulated enough dirty pages? This doesn't apply

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Next message: Greg Kroah-Hartman: "[ 055/117] x86/mce: Pay no attention to F bit in MCACOD when parsing UC errors"
Previous message: Greg Kroah-Hartman: "[ 063/117] ext4: simplify truncation code in ext4_setattr()"
In reply to: Greg Kroah-Hartman: "[ 063/117] ext4: simplify truncation code in ext4_setattr()"
Next in thread: Ben Hutchings: "Re: [ 062/117] ext4: fix ext4_writepages() in presence of truncate"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]