[PATCH v3 4/5] mm: Scan for dirty ptes and update cmtime on MS_ASYNC

From: Andy Lutomirski
Date: Fri Aug 16 2013 - 20:16:38 EST


This is probably unimportant but improves POSIX compliance.

Signed-off-by: Andy Lutomirski <luto@xxxxxxxxxxxxxx>
---
mm/msync.c | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++++---------
1 file changed, 72 insertions(+), 11 deletions(-)

diff --git a/mm/msync.c b/mm/msync.c
index 632df45..9e41acd 100644
--- a/mm/msync.c
+++ b/mm/msync.c
@@ -13,13 +13,16 @@
#include <linux/file.h>
#include <linux/syscalls.h>
#include <linux/sched.h>
+#include <linux/rmap.h>
+#include <linux/pagemap.h>

/*
* MS_SYNC syncs the entire file - including mappings.
*
* MS_ASYNC does not start I/O (it used to, up to 2.5.67).
* Nor does it marks the relevant pages dirty (it used to up to 2.6.17).
- * Now it doesn't do anything, since dirty pages are properly tracked.
+ * Now all it does is ensure that file timestamps get updated, since POSIX
+ * requires it. We track dirty pages correct without MS_ASYNC.
*
* The application may now run fsync() to
* write out the dirty pages and wait on the writeout and check the result.
@@ -28,6 +31,57 @@
* So by _not_ starting I/O in MS_ASYNC we provide complete flexibility to
* applications.
*/
+
+static int msync_async_range(struct vm_area_struct *vma,
+ unsigned long *start, unsigned long end)
+{
+ struct mm_struct *mm;
+ struct address_space *mapping;
+ int iters = 0;
+
+ while (*start < end && *start < vma->vm_end && iters < 128) {
+ unsigned int page_mask, page_increm;
+
+ /*
+ * Require that the pte writable (because otherwise it can't
+ * be dirty, so there's nothing to clean).
+ *
+ * In theory we could check the pte dirty bit, but this is
+ * awkward and barely worth it.
+ */
+ struct page *page = follow_page_mask(vma, *start,
+ FOLL_GET | FOLL_WRITE,
+ &page_mask);
+
+ if (page && !IS_ERR(page)) {
+ if (lock_page_killable(page) == 0) {
+ page_mkclean(page);
+ unlock_page(page);
+ }
+ put_page(page);
+ }
+
+ if (IS_ERR(page))
+ return PTR_ERR(page);
+
+ page_increm = 1 + (~(*start >> PAGE_SHIFT) & page_mask);
+ *start += page_increm * PAGE_SIZE;
+ cond_resched();
+ iters++;
+ }
+
+ /* XXX: try to do this only once? */
+ mapping = vma->vm_file->f_mapping;
+ if (mapping->a_ops->flush_cmtime)
+ mapping->a_ops->flush_cmtime(mapping);
+
+ /* Give mmap_sem writers a chance. */
+ mm = current->mm;
+ up_read(&mm->mmap_sem);
+ down_read(&mm->mmap_sem);
+ return 0;
+}
+
SYSCALL_DEFINE3(msync, unsigned long, start, size_t, len, int, flags)
{
unsigned long end;
@@ -77,18 +131,25 @@ SYSCALL_DEFINE3(msync, unsigned long, start, size_t, len, int, flags)
goto out_unlock;
}
file = vma->vm_file;
- start = vma->vm_end;
- if ((flags & MS_SYNC) && file &&
- (vma->vm_flags & VM_SHARED)) {
- get_file(file);
- up_read(&mm->mmap_sem);
- error = vfs_fsync(file, 0);
- fput(file);
- if (error || start >= end)
- goto out;
- down_read(&mm->mmap_sem);
+ if (file && vma->vm_flags & VM_SHARED) {
+ if (flags & MS_SYNC) {
+ start = vma->vm_end;
+ get_file(file);
+ up_read(&mm->mmap_sem);
+ error = vfs_fsync(file, 0);
+ fput(file);
+ if (error || start >= end)
+ goto out;
+ down_read(&mm->mmap_sem);
+ } else if ((vma->vm_flags & VM_WRITE) &&
+ file->f_mapping) {
+ error = msync_async_range(vma, &start, end);
+ } else {
+ start = vma->vm_end;
+ }
vma = find_vma(mm, start);
} else {
+ start = vma->vm_end;
if (start >= end) {
error = 0;
goto out_unlock;
--
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/