Re: [patch] call truncate_inode_pages in the DIO fallback tobuffered I/O path

From: Andrew Morton
Date: Fri Oct 06 2006 - 16:12:38 EST


On Thu, 05 Oct 2006 15:31:43 -0400
Jeff Moyer <jmoyer@xxxxxxxxxx> wrote:

> akpm> I'd propose that we do this via
>
> akpm> generic_file_buffered_write(...);
> akpm> do_sync_file_range(..., SYNC_FILE_RANGE_WAIT_BEFORE|
> akpm> SYNC_FILE_RANGE_WRITE|
> akpm> SYNC_FILE_RANGE_WAIT_AFTER)
>
> akpm> invalidate_mapping_pages(...);
>
> OK, patch attached.

I mangled your patch rather a lot.

- coding style (multiple declarations and multiple assignments)

- `endbyte' should be loff_t, not pgoff_t.

- sync the region (pos, pos+written), not (pos, pos+count).

- attempt to return the correct value from __generic_file_aio_write_nolock
in all circumstances.

- Avoid testing the O_DIRECT flag twice - just call
generic_file_buffered_write() from two sites. Simpler and cleaner that way.


Patch is below. The end result looks like:


/* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
if (unlikely(file->f_flags & O_DIRECT)) {
loff_t endbyte;
ssize_t written_buffered;

written = generic_file_direct_write(iocb, iov, &nr_segs, pos,
ppos, count, ocount);
if (written < 0 || written == count)
goto out;
/*
* direct-io write to a hole: fall through to buffered I/O
* for completing the rest of the request.
*/
pos += written;
count -= written;
written_buffered = generic_file_buffered_write(iocb, iov,
nr_segs, pos, ppos, count,
written);

/*
* We need to ensure that the page cache pages are written to
* disk and invalidated to preserve the expected O_DIRECT
* semantics.
*/
endbyte = pos + written_buffered - 1;
err = do_sync_file_range(file, pos, endbyte,
SYNC_FILE_RANGE_WAIT_BEFORE|
SYNC_FILE_RANGE_WRITE|
SYNC_FILE_RANGE_WAIT_AFTER);
if (err == 0) {
written += written_buffered;
invalidate_mapping_pages(mapping,
pos >> PAGE_CACHE_SHIFT,
endbyte >> PAGE_CACHE_SHIFT);
} else {
/*
* We don't know how much we wrote, so just return
* the number of bytes which were direct-written
*/
}
} else {
written = generic_file_buffered_write(iocb, iov, nr_segs,
pos, ppos, count, written);
}
out:
current->backing_dev_info = NULL;
return written ? written : err;
}


Which I think is closer to correct, but boy it needs a lot of reviewing and
testing.



diff -puN mm/filemap.c~direct-io-sync-and-invalidate-file-region-when-falling-back-to-buffered-write-fixes mm/filemap.c
--- a/mm/filemap.c~direct-io-sync-and-invalidate-file-region-when-falling-back-to-buffered-write-fixes
+++ a/mm/filemap.c
@@ -2228,7 +2228,7 @@ __generic_file_aio_write_nolock(struct k
struct inode *inode = mapping->host;
unsigned long seg;
loff_t pos;
- ssize_t written, written_direct;
+ ssize_t written;
ssize_t err;

ocount = 0;
@@ -2258,7 +2258,7 @@ __generic_file_aio_write_nolock(struct k

/* We can write back this queue in page reclaim */
current->backing_dev_info = mapping->backing_dev_info;
- written = written_direct = 0;
+ written = 0;

err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
if (err)
@@ -2275,8 +2275,11 @@ __generic_file_aio_write_nolock(struct k

/* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
if (unlikely(file->f_flags & O_DIRECT)) {
- written = generic_file_direct_write(iocb, iov,
- &nr_segs, pos, ppos, count, ocount);
+ loff_t endbyte;
+ ssize_t written_buffered;
+
+ written = generic_file_direct_write(iocb, iov, &nr_segs, pos,
+ ppos, count, ocount);
if (written < 0 || written == count)
goto out;
/*
@@ -2285,31 +2288,34 @@ __generic_file_aio_write_nolock(struct k
*/
pos += written;
count -= written;
+ written_buffered = generic_file_buffered_write(iocb, iov,
+ nr_segs, pos, ppos, count,
+ written);

- written_direct = written;
- }
-
- written = generic_file_buffered_write(iocb, iov, nr_segs,
- pos, ppos, count, written);
-
- /*
- * When falling through to buffered I/O, we need to ensure that the
- * page cache pages are written to disk and invalidated to preserve
- * the expected O_DIRECT semantics.
- */
- if (unlikely(file->f_flags & O_DIRECT)) {
- pgoff_t endbyte = pos + count - 1;
-
+ /*
+ * We need to ensure that the page cache pages are written to
+ * disk and invalidated to preserve the expected O_DIRECT
+ * semantics.
+ */
+ endbyte = pos + written_buffered - 1;
err = do_sync_file_range(file, pos, endbyte,
SYNC_FILE_RANGE_WAIT_BEFORE|
SYNC_FILE_RANGE_WRITE|
SYNC_FILE_RANGE_WAIT_AFTER);
- if (err == 0)
+ if (err == 0) {
+ written += written_buffered;
invalidate_mapping_pages(mapping,
pos >> PAGE_CACHE_SHIFT,
endbyte >> PAGE_CACHE_SHIFT);
- else
- written = written_direct;
+ } else {
+ /*
+ * We don't know how much we wrote, so just return
+ * the number of bytes which were direct-written
+ */
+ }
+ } else {
+ written = generic_file_buffered_write(iocb, iov, nr_segs,
+ pos, ppos, count, written);
}
out:
current->backing_dev_info = NULL;
_

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/