[RFC/PATCH 2/5] mm/fs: execute in place (V2)

From: Carsten Otte
Date: Wed May 18 2005 - 09:17:11 EST


[RFC/PATCH 2/5] mm/fs: execute in place (V2)
This patch adds a new address space operation called get_xip_page, which
works similar to readpage/writepage but returns a reference to a struct
page for the on-disk data for the given page. The page is supposed to be
up-to-date.

In reaction to feedback for last version, this time filemap.c has been
split into three files:
- mm/filemap.h contains some inline functions moved here from mm/filemap.c
that are called in both filemap.c and filemap_xip.c. Macros have been
defined that check if execute in place should be used for a given object.
If no filesystems with xip support are compiled for the kernel
(CONFIG_FS_XIP not set) those expand to 0. Otherwise they expand to the
corresponding checks...
- mm/filemap.c now contains more or less its "classic" functionality.
However, above macros are used to call xip functions if xip is enabled at
compile time and if the address space has get_xip_page. In addition, some
inline functions have been moved away to filemap.h
- mm/filemap_xip.c now contains all xip related functions, they have been
in filemap.c in the previous version of the patch.
This addresses two issues:
- code path is unchanged for kernels that do not have any xip filesystems
enabled at compile time
- filemap.c stays as readable as it was to avoid headaches reading it

Signed-off-by: Carsten Otte <cotte@xxxxxxxxxx>
---
diff -ruN linux-git/fs/open.c linux-git-xip/fs/open.c
--- linux-git/fs/open.c 2005-05-17 14:23:32.000000000 +0200
+++ linux-git-xip/fs/open.c 2005-05-17 18:33:57.750457896 +0200
@@ -807,7 +807,9 @@

/* NB: we're sure to have correct a_ops only after f_op->open */
if (f->f_flags & O_DIRECT) {
- if (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO) {
+ if (!f->f_mapping->a_ops ||
+ ((!f->f_mapping->a_ops->direct_IO) &&
+ (!f->f_mapping->a_ops->get_xip_page))) {
fput(f);
f = ERR_PTR(-EINVAL);
}
diff -ruN linux-git/include/linux/fs.h linux-git-xip/include/linux/fs.h
--- linux-git/include/linux/fs.h 2005-05-17 18:01:33.000000000 +0200
+++ linux-git-xip/include/linux/fs.h 2005-05-17 18:33:57.753457440 +0200
@@ -330,6 +330,8 @@
int (*releasepage) (struct page *, int);
ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
loff_t offset, unsigned long nr_segs);
+ struct page* (*get_xip_page)(struct address_space *, sector_t,
+ int);
};

struct backing_dev_info;
@@ -1473,14 +1475,19 @@
unsigned long *, loff_t, loff_t *, size_t, size_t);
extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *,
unsigned long, loff_t, loff_t *, size_t, ssize_t);
+extern ssize_t generic_file_xip_write(struct kiocb *, const struct iovec *,
+ unsigned long, loff_t, loff_t *, size_t);
extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos);
extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos);
ssize_t generic_file_write_nolock(struct file *file, const struct iovec *iov,
unsigned long nr_segs, loff_t *ppos);
extern ssize_t generic_file_sendfile(struct file *, loff_t *, size_t, read_actor_t, void *);
extern void do_generic_mapping_read(struct address_space *mapping,
- struct file_ra_state *, struct file *,
- loff_t *, read_descriptor_t *, read_actor_t);
+ struct file_ra_state *, struct file *,
+ loff_t *, read_descriptor_t *, read_actor_t);
+extern void do_xip_mapping_read (struct address_space *mapping,
+ struct file_ra_state *, struct file *,
+ loff_t *, read_descriptor_t *, read_actor_t);
extern void
file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
extern ssize_t generic_file_direct_IO(int rw, struct kiocb *iocb,
@@ -1494,17 +1501,32 @@
extern loff_t remote_llseek(struct file *file, loff_t offset, int origin);
extern int generic_file_open(struct inode * inode, struct file * filp);
extern int nonseekable_open(struct inode * inode, struct file * filp);
+extern int xip_truncate_page(struct address_space *mapping, loff_t from);
+
+#ifdef CONFIG_FS_XIP
+#define file_is_xip(file) unlikely(file->f_mapping->a_ops->get_xip_page)
+#else
+#define file_is_xip(file) 0
+#endif

static inline void do_generic_file_read(struct file * filp, loff_t *ppos,
read_descriptor_t * desc,
read_actor_t actor)
{
- do_generic_mapping_read(filp->f_mapping,
- &filp->f_ra,
- filp,
- ppos,
- desc,
- actor);
+ if (file_is_xip(filp))
+ do_xip_mapping_read(filp->f_mapping,
+ &filp->f_ra,
+ filp,
+ ppos,
+ desc,
+ actor);
+ else
+ do_generic_mapping_read(filp->f_mapping,
+ &filp->f_ra,
+ filp,
+ ppos,
+ desc,
+ actor);
}

ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
diff -ruN linux-git/mm/Makefile linux-git-xip/mm/Makefile
--- linux-git/mm/Makefile 2005-05-17 14:23:36.000000000 +0200
+++ linux-git-xip/mm/Makefile 2005-05-17 18:33:57.754457288 +0200
@@ -18,3 +18,4 @@
obj-$(CONFIG_SHMEM) += shmem.o
obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o

+obj-$(CONFIG_FS_XIP) += filemap_xip.o
diff -ruN linux-git/mm/filemap.c linux-git-xip/mm/filemap.c
--- linux-git/mm/filemap.c 2005-05-17 14:23:36.000000000 +0200
+++ linux-git-xip/mm/filemap.c 2005-05-17 18:33:57.757456832 +0200
@@ -28,6 +28,7 @@
#include <linux/blkdev.h>
#include <linux/security.h>
#include <linux/syscalls.h>
+#include "filemap.h"
/*
* FIXME: remove all knowledge of the buffer layer from the core VM
*/
@@ -968,6 +969,7 @@
ssize_t retval;
unsigned long seg;
size_t count;
+ int xip = file_is_xip(filp) ? 1 : 0;

count = 0;
for (seg = 0; seg < nr_segs; seg++) {
@@ -990,7 +992,9 @@
}

/* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
- if (filp->f_flags & O_DIRECT) {
+ /* do not use generic_file_direct_IO on xip files, xip IO is
+ implicitly direct as well */
+ if (filp->f_flags & O_DIRECT && !xip) {
loff_t pos = *ppos, size;
struct address_space *mapping;
struct inode *inode;
@@ -1110,6 +1114,9 @@
do_readahead(struct address_space *mapping, struct file *filp,
unsigned long index, unsigned long nr)
{
+ if (mapping_is_xip_save(mapping))
+ return 0;
+
if (!mapping || !mapping->a_ops || !mapping->a_ops->readpage)
return -EINVAL;

@@ -1538,10 +1545,13 @@
{
struct address_space *mapping = file->f_mapping;

- if (!mapping->a_ops->readpage)
+ if ((!mapping->a_ops->readpage) && (!mapping_is_xip(mapping)))
return -ENOEXEC;
file_accessed(file);
- vma->vm_ops = &generic_file_vm_ops;
+ if (mapping_is_xip(mapping))
+ vma->vm_ops = &xip_file_vm_ops;
+ else
+ vma->vm_ops = &generic_file_vm_ops;
return 0;
}
EXPORT_SYMBOL(filemap_populate);
@@ -1714,32 +1724,7 @@
}
EXPORT_SYMBOL(remove_suid);

-/*
- * Copy as much as we can into the page and return the number of bytes which
- * were sucessfully copied. If a fault is encountered then clear the page
- * out to (offset+bytes) and return the number of bytes which were copied.
- */
-static inline size_t
-filemap_copy_from_user(struct page *page, unsigned long offset,
- const char __user *buf, unsigned bytes)
-{
- char *kaddr;
- int left;
-
- kaddr = kmap_atomic(page, KM_USER0);
- left = __copy_from_user_inatomic(kaddr + offset, buf, bytes);
- kunmap_atomic(kaddr, KM_USER0);
-
- if (left != 0) {
- /* Do it the slow way */
- kaddr = kmap(page);
- left = __copy_from_user(kaddr + offset, buf, bytes);
- kunmap(page);
- }
- return bytes - left;
-}
-
-static size_t
+size_t
__filemap_copy_from_user_iovec(char *vaddr,
const struct iovec *iov, size_t base, size_t bytes)
{
@@ -1767,52 +1752,6 @@
}

/*
- * This has the same sideeffects and return value as filemap_copy_from_user().
- * The difference is that on a fault we need to memset the remainder of the
- * page (out to offset+bytes), to emulate filemap_copy_from_user()'s
- * single-segment behaviour.
- */
-static inline size_t
-filemap_copy_from_user_iovec(struct page *page, unsigned long offset,
- const struct iovec *iov, size_t base, size_t bytes)
-{
- char *kaddr;
- size_t copied;
-
- kaddr = kmap_atomic(page, KM_USER0);
- copied = __filemap_copy_from_user_iovec(kaddr + offset, iov,
- base, bytes);
- kunmap_atomic(kaddr, KM_USER0);
- if (copied != bytes) {
- kaddr = kmap(page);
- copied = __filemap_copy_from_user_iovec(kaddr + offset, iov,
- base, bytes);
- kunmap(page);
- }
- return copied;
-}
-
-static inline void
-filemap_set_next_iovec(const struct iovec **iovp, size_t *basep, size_t bytes)
-{
- const struct iovec *iov = *iovp;
- size_t base = *basep;
-
- while (bytes) {
- int copy = min(bytes, iov->iov_len - base);
-
- bytes -= copy;
- base += copy;
- if (iov->iov_len == base) {
- iov++;
- base = 0;
- }
- }
- *iovp = iov;
- *basep = base;
-}
-
-/*
* Performs necessary checks before doing a write
*
* Can adjust writing position aor amount of bytes to write.
@@ -2123,6 +2062,13 @@

inode_update_time(inode, 1);

+ if (file_is_xip(file)) {
+ /* use execute in place to copy directly to disk */
+ written = generic_file_xip_write (iocb, iov,
+ nr_segs, pos, ppos, count);
+ goto out;
+ }
+
/* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
if (unlikely(file->f_flags & O_DIRECT)) {
written = generic_file_direct_write(iocb, iov,
diff -ruN linux-git/mm/filemap.h linux-git-xip/mm/filemap.h
--- linux-git/mm/filemap.h 1970-01-01 01:00:00.000000000 +0100
+++ linux-git-xip/mm/filemap.h 2005-05-17 18:33:57.792451512 +0200
@@ -0,0 +1,141 @@
+/*
+ * linux/mm/filemap.h
+ *
+ * Copyright (C) 2005 IBM Corporation
+ * Author: Carsten Otte <cotte@xxxxxxxxxx>
+ *
+ * derived from linux/mm/filemap.c
+ * Copyright (C) Linus Torvalds
+ */
+
+#ifndef __FILEMAP_H
+#define __FILEMAP_H
+
+#include <linux/types.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/uio.h>
+#include <linux/config.h>
+#include <asm/uaccess.h>
+
+#ifdef CONFIG_FS_XIP
+extern struct vm_operations_struct xip_file_vm_ops;
+
+void
+do_xip_mapping_read(struct address_space *mapping,
+ struct file_ra_state *_ra,
+ struct file *filp,
+ loff_t *ppos,
+ read_descriptor_t *desc,
+ read_actor_t actor);
+
+void
+__filemap_xip_unmap (struct address_space * mapping,
+ unsigned long pgoff);
+
+struct page *
+filemap_xip_nopage(struct vm_area_struct * area,
+ unsigned long address,
+ int *type);
+
+ssize_t
+generic_file_xip_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos, loff_t *ppos,
+ size_t count);
+
+int
+xip_truncate_page(struct address_space *mapping,
+ loff_t from);
+
+#define mapping_is_xip(map) unlikely(map->a_ops->get_xip_page)
+#define mapping_is_xip_save(map) unlikely(map && map->a_ops \
+ && map->a_ops->get_xip_page)
+#else /* not defined CONFIG_FS_XIP */
+#define mapping_is_xip(map) 0
+#define mapping_is_xip_save(map) 0
+#define do_xip_mapping_read(arg1, arg2, arg3, arg4, arg5) BUG()
+#define xip_truncate_page(map, from) BUG()
+#endif /* defined CONFIG_FS_XIP */
+
+extern struct vm_operations_struct xip_file_vm_ops;
+
+size_t
+__filemap_copy_from_user_iovec(char *vaddr,
+ const struct iovec *iov,
+ size_t base,
+ size_t bytes);
+
+/*
+ * Copy as much as we can into the page and return the number of bytes which
+ * were sucessfully copied. If a fault is encountered then clear the page
+ * out to (offset+bytes) and return the number of bytes which were copied.
+ */
+static inline size_t
+filemap_copy_from_user(struct page *page, unsigned long offset,
+ const char __user *buf, unsigned bytes)
+{
+ char *kaddr;
+ int left;
+
+ kaddr = kmap_atomic(page, KM_USER0);
+ left = __copy_from_user_inatomic(kaddr + offset, buf, bytes);
+ kunmap_atomic(kaddr, KM_USER0);
+
+ if (left != 0) {
+ /* Do it the slow way */
+ kaddr = kmap(page);
+ left = __copy_from_user(kaddr + offset, buf, bytes);
+ kunmap(page);
+ }
+ return bytes - left;
+}
+
+/*
+ * This has the same sideeffects and return value as filemap_copy_from_user().
+ * The difference is that on a fault we need to memset the remainder of the
+ * page (out to offset+bytes), to emulate filemap_copy_from_user()'s
+ * single-segment behaviour.
+ */
+static inline size_t
+filemap_copy_from_user_iovec(struct page *page, unsigned long offset,
+ const struct iovec *iov, size_t base, size_t bytes)
+{
+ char *kaddr;
+ size_t copied;
+
+ kaddr = kmap_atomic(page, KM_USER0);
+ copied = __filemap_copy_from_user_iovec(kaddr + offset, iov,
+ base, bytes);
+ kunmap_atomic(kaddr, KM_USER0);
+ if (copied != bytes) {
+ kaddr = kmap(page);
+ copied = __filemap_copy_from_user_iovec(kaddr + offset, iov,
+ base, bytes);
+ kunmap(page);
+ }
+ return copied;
+}
+
+static inline void
+filemap_set_next_iovec(const struct iovec **iovp, size_t *basep, size_t bytes)
+{
+ const struct iovec *iov = *iovp;
+ size_t base = *basep;
+
+ while (bytes) {
+ int copy = min(bytes, iov->iov_len - base);
+
+ bytes -= copy;
+ base += copy;
+ if (iov->iov_len == base) {
+ iov++;
+ base = 0;
+ }
+ }
+ *iovp = iov;
+ *basep = base;
+}
+
+
+#endif
diff -ruN linux-git/mm/filemap_xip.c linux-git-xip/mm/filemap_xip.c
--- linux-git/mm/filemap_xip.c 1970-01-01 01:00:00.000000000 +0100
+++ linux-git-xip/mm/filemap_xip.c 2005-05-17 18:33:57.794451208 +0200
@@ -0,0 +1,388 @@
+/*
+ * linux/mm/filemap_xip.c
+ *
+ * Copyright (C) 2005 IBM Corporation
+ * Author: Carsten Otte <cotte@xxxxxxxxxx>
+ *
+ * derived from linux/mm/filemap.c - Copyright (C) Linus Torvalds
+ *
+ */
+
+#include <linux/pagemap.h>
+#include <linux/module.h>
+#include <linux/uio.h>
+#include "filemap.h"
+
+struct vm_operations_struct xip_file_vm_ops = {
+ .nopage = filemap_xip_nopage,
+};
+
+
+/*
+ * This is a generic file read routine for execute in place files, and uses
+ * the mapping->a_ops->get_xip_page() function for the actual low-level
+ * stuff.
+ *
+ * Note the struct file* is not used at all. It may be NULL.
+ */
+void
+do_xip_mapping_read(struct address_space *mapping,
+ struct file_ra_state *_ra,
+ struct file *filp,
+ loff_t *ppos,
+ read_descriptor_t *desc,
+ read_actor_t actor)
+{
+ struct inode *inode = mapping->host;
+ unsigned long index, end_index, offset;
+ loff_t isize;
+
+ BUG_ON(!mapping->a_ops->get_xip_page);
+
+ index = *ppos >> PAGE_CACHE_SHIFT;
+ offset = *ppos & ~PAGE_CACHE_MASK;
+
+ isize = i_size_read(inode);
+ if (!isize)
+ goto out;
+
+ end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
+ for (;;) {
+ struct page *page;
+ unsigned long nr, ret;
+
+ /* nr is the maximum number of bytes to copy from this page */
+ nr = PAGE_CACHE_SIZE;
+ if (index >= end_index) {
+ if (index > end_index)
+ goto out;
+ nr = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
+ if (nr <= offset) {
+ goto out;
+ }
+ }
+ nr = nr - offset;
+
+ page = mapping->a_ops->get_xip_page(mapping,
+ index*(PAGE_SIZE/512), 0);
+ if (!page)
+ goto no_xip_page;
+ if (unlikely(IS_ERR(page))) {
+ if (PTR_ERR(page) == -ENODATA) {
+ /* sparse */
+ page = virt_to_page(empty_zero_page);
+ } else {
+ desc->error = PTR_ERR(page);
+ goto out;
+ }
+ } else
+ BUG_ON(!PageUptodate(page));
+
+ /* If users can be writing to this page using arbitrary
+ * virtual addresses, take care about potential aliasing
+ * before reading the page on the kernel side.
+ */
+ if (mapping_writably_mapped(mapping))
+ flush_dcache_page(page);
+
+ /*
+ * Ok, we have the page, and it's up-to-date, so
+ * now we can copy it to user space...
+ *
+ * The actor routine returns how many bytes were actually used..
+ * NOTE! This may not be the same as how much of a user buffer
+ * we filled up (we may be padding etc), so we can only update
+ * "pos" here (the actor routine has to update the user buffer
+ * pointers and the remaining count).
+ */
+ ret = actor(desc, page, offset, nr);
+ offset += ret;
+ index += offset >> PAGE_CACHE_SHIFT;
+ offset &= ~PAGE_CACHE_MASK;
+
+ if (ret == nr && desc->count)
+ continue;
+ goto out;
+
+no_xip_page:
+ /* Did not get the page. Report it */
+ desc->error = -EIO;
+ goto out;
+ }
+
+out:
+ *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
+ if (filp)
+ file_accessed(filp);
+}
+
+EXPORT_SYMBOL(do_xip_mapping_read);
+
+
+/*
+ * __filemap_xip_unmap is invoked from filemap_xip_unmap and
+ * generic_file_xip_write
+ *
+ * This function walks all vmas of the address_space and unmaps the
+ * empty_zero_page when found at pgoff. Should it go in rmap.c?
+ */
+void
+__filemap_xip_unmap (struct address_space * mapping,
+ unsigned long pgoff)
+{
+ struct vm_area_struct *vma;
+ struct mm_struct *mm;
+ struct prio_tree_iter iter;
+ unsigned long address;
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+ pte_t pteval;
+
+ spin_lock(&mapping->i_mmap_lock);
+ vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
+ mm = vma->vm_mm;
+ address = vma->vm_start +
+ ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
+ BUG_ON(address < vma->vm_start || address >= vma->vm_end);
+ /*
+ * We need the page_table_lock to protect us from page faults,
+ * munmap, fork, etc...
+ */
+ spin_lock(&mm->page_table_lock);
+ pgd = pgd_offset(mm, address);
+ if (!pgd_present(*pgd))
+ goto next_unlock;
+ pud = pud_offset(pgd, address);
+ if (!pud_present(*pud))
+ goto next_unlock;
+ pmd = pmd_offset(pud, address);
+ if (!pmd_present(*pmd))
+ goto next_unlock;
+
+ pte = pte_offset_map(pmd, address);
+ if (!pte_present(*pte))
+ goto next_unmap;
+ if ((page_to_pfn(virt_to_page(empty_zero_page)))
+ != pte_pfn(*pte))
+ /* pte does already reference new xip block here */
+ goto next_unmap;
+ /* Nuke the page table entry. */
+ flush_cache_page(vma, address, pte_pfn(pte));
+ pteval = ptep_clear_flush(vma, address, pte);
+ BUG_ON(pte_dirty(pteval));
+ next_unmap:
+ pte_unmap(pte);
+ next_unlock:
+ spin_unlock(&mm->page_table_lock);
+ }
+ spin_unlock(&mapping->i_mmap_lock);
+}
+
+
+/*
+ * filemap_xip_nopage() is invoked via the vma operations vector for a
+ * mapped memory region to read in file data during a page fault.
+ *
+ * This function is derived from filemap_nopage, but used for execute in place
+ */
+struct page *
+filemap_xip_nopage(struct vm_area_struct * area,
+ unsigned long address,
+ int *type)
+{
+ struct file *file = area->vm_file;
+ struct address_space *mapping = file->f_mapping;
+ struct inode *inode = mapping->host;
+ struct page *page;
+ unsigned long size, pgoff, endoff;
+
+ pgoff = ((address - area->vm_start) >> PAGE_CACHE_SHIFT)
+ + area->vm_pgoff;
+ endoff = ((area->vm_end - area->vm_start) >> PAGE_CACHE_SHIFT)
+ + area->vm_pgoff;
+
+ size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ if (pgoff >= size) {
+ return NULL;
+ }
+
+ page = mapping->a_ops->get_xip_page(mapping, pgoff*(PAGE_SIZE/512), 0);
+ if (!IS_ERR(page)) {
+ BUG_ON(!PageUptodate(page));
+ return page;
+ }
+ if (PTR_ERR(page) != -ENODATA)
+ return NULL;
+
+ /* sparse block */
+ if ((area->vm_flags & (VM_WRITE | VM_MAYWRITE)) &&
+ (area->vm_flags & (VM_SHARED| VM_MAYSHARE)) &&
+ (!(mapping->host->i_sb->s_flags & MS_RDONLY))) {
+ /* maybe shared writable, allocate new block */
+ page = mapping->a_ops->get_xip_page (mapping,
+ pgoff*(PAGE_SIZE/512), 1);
+ if (IS_ERR(page))
+ return NULL;
+ BUG_ON(!PageUptodate(page));
+ /* unmap page at pgoff from all other vmas */
+ __filemap_xip_unmap(mapping, pgoff);
+ } else {
+ /* not shared and writable, use empty_zero_page */
+ page = virt_to_page(empty_zero_page);
+ }
+
+ return page;
+}
+
+
+ssize_t
+generic_file_xip_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos, loff_t *ppos,
+ size_t count)
+{
+ struct file *file = iocb->ki_filp;
+ struct address_space * mapping = file->f_mapping;
+ struct address_space_operations *a_ops = mapping->a_ops;
+ struct inode *inode = mapping->host;
+ long status = 0;
+ struct page *page;
+ size_t bytes;
+ const struct iovec *cur_iov = iov; /* current iovec */
+ size_t iov_base = 0; /* offset in the current iovec */
+ char __user *buf;
+ ssize_t written = 0;
+
+ BUG_ON(!mapping->a_ops->get_xip_page);
+
+ buf = iov->iov_base;
+ do {
+ unsigned long index;
+ unsigned long offset;
+ size_t copied;
+
+ offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
+ index = pos >> PAGE_CACHE_SHIFT;
+ bytes = PAGE_CACHE_SIZE - offset;
+ if (bytes > count)
+ bytes = count;
+
+ /*
+ * Bring in the user page that we will copy from _first_.
+ * Otherwise there's a nasty deadlock on copying from the
+ * same page as we're writing to, without it being marked
+ * up-to-date.
+ */
+ fault_in_pages_readable(buf, bytes);
+
+ page = a_ops->get_xip_page(mapping,
+ index*(PAGE_SIZE/512), 0);
+ if (IS_ERR(page) && (PTR_ERR(page) == -ENODATA)) {
+ /* we allocate a new page unmap it */
+ page = a_ops->get_xip_page(mapping,
+ index*(PAGE_SIZE/512), 1);
+ if (!IS_ERR(page))
+ /* unmap page at pgoff from all other vmas */
+ __filemap_xip_unmap(mapping, index);
+
+ }
+
+ if (IS_ERR(page)) {
+ status = PTR_ERR(page);
+ break;
+ }
+
+ BUG_ON(!PageUptodate(page));
+
+ if (likely(nr_segs == 1))
+ copied = filemap_copy_from_user(page, offset,
+ buf, bytes);
+ else
+ copied = filemap_copy_from_user_iovec(page, offset,
+ cur_iov, iov_base, bytes);
+ flush_dcache_page(page);
+ if (likely(copied > 0)) {
+ status = copied;
+
+ if (status >= 0) {
+ written += status;
+ count -= status;
+ pos += status;
+ buf += status;
+ if (unlikely(nr_segs > 1))
+ filemap_set_next_iovec(&cur_iov,
+ &iov_base, status);
+ }
+ }
+ if (unlikely(copied != bytes))
+ if (status >= 0)
+ status = -EFAULT;
+ if (status < 0)
+ break;
+ } while (count);
+ *ppos = pos;
+ /*
+ * No need to use i_size_read() here, the i_size
+ * cannot change under us because we hold i_sem.
+ */
+ if (pos > inode->i_size) {
+ i_size_write(inode, pos);
+ mark_inode_dirty(inode);
+ }
+
+ return written ? written : status;
+}
+EXPORT_SYMBOL(generic_file_xip_write);
+
+
+/*
+ * truncate a page used for execute in place
+ * functionality is analog to block_truncate_page but does use get_xip_page
+ * to get the page instead of page cache
+ */
+int
+xip_truncate_page(struct address_space *mapping, loff_t from)
+{
+ pgoff_t index = from >> PAGE_CACHE_SHIFT;
+ unsigned offset = from & (PAGE_CACHE_SIZE-1);
+ unsigned blocksize;
+ unsigned length;
+ struct page *page;
+ void *kaddr;
+ int err;
+
+ blocksize = 1 << mapping->host->i_blkbits;
+ length = offset & (blocksize - 1);
+
+ /* Block boundary? Nothing to do */
+ if (!length)
+ return 0;
+
+ length = blocksize - length;
+
+ page = mapping->a_ops->get_xip_page(mapping,
+ index*(PAGE_SIZE/512), 0);
+ err = -ENOMEM;
+ if (!page)
+ goto out;
+ if (unlikely(IS_ERR(page))) {
+ if (PTR_ERR(page) == -ENODATA) {
+ /* Hole? No need to truncate */
+ return 0;
+ } else {
+ err = PTR_ERR(page);
+ goto out;
+ }
+ } else
+ BUG_ON(!PageUptodate(page));
+ kaddr = kmap_atomic(page, KM_USER0);
+ memset(kaddr + offset, 0, length);
+ kunmap_atomic(kaddr, KM_USER0);
+
+ flush_dcache_page(page);
+ err = 0;
+out:
+ return err;
+}
+EXPORT_SYMBOL(xip_truncate_page);


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/