Re: [PATCH] fuse: implement cuse mmap

From: Luca Risolia
Date: Wed Dec 16 2015 - 15:24:21 EST


I tested this patch and gave some hints to Jader when it first appeared on the libfuse mailing list some months ago.

Signed-off-by: Luca Risolia <luca.risolia@xxxxxxxxxxxxxxx>

Jader H. Silva wrote:
Implement cuse mmap using shmem to provide the actual memory maps.
Pages must be read/written using fuse's NOTIFY_RETRIEVE and NOTIFY_STORE api.

Signed-off-by: Jader H. Silva <jaderhs5@xxxxxxxxx>
---
fs/fuse/cuse.c | 459 +++++++++++++++++++++++++++++++++++++++++++++-
fs/fuse/dev.c | 163 +---------------
fs/fuse/fuse_i.h | 34 +++-
fs/fuse/inode.c | 166 ++++++++++++++++-
include/uapi/linux/fuse.h | 26 +++
5 files changed, 688 insertions(+), 160 deletions(-)

diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index eae2c11..7749c13 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -48,6 +48,9 @@
#include <linux/stat.h>
#include <linux/module.h>
#include <linux/uio.h>
+#include <linux/mman.h>
+#include <linux/falloc.h>
+#include <linux/shmem_fs.h>

#include "fuse_i.h"

@@ -175,6 +178,441 @@ static long cuse_file_compat_ioctl(struct file *file, unsigned int cmd,
return fuse_do_ioctl(file, cmd, arg, flags);
}

+struct fuse_dmmap_region {
+ u64 mapid;
+ u64 size;
+ struct file *filp;
+ struct vm_operations_struct vm_ops;
+ const struct vm_operations_struct *vm_original_ops;
+ struct list_head list;
+ atomic_t ref;
+};
+
+/*
+ * fuse_dmmap_vm represents the result of a single mmap() call, which
+ * can be shared by multiple client vmas created by forking.
+ */
+struct fuse_dmmap_vm {
+ u64 len;
+ u64 off;
+ atomic_t open_count;
+ struct fuse_dmmap_region *region;
+};
+
+static void fuse_dmmap_region_put(struct fuse_conn *fc,
+ struct fuse_dmmap_region *fdr)
+{
+ if (atomic_dec_and_lock(&fdr->ref, &fc->lock)) {
+
+ list_del(&fdr->list);
+
+ spin_unlock(&fc->lock);
+
+ fput(fdr->filp);
+ kfree(fdr);
+ }
+}
+
+static void fuse_dmmap_vm_open(struct vm_area_struct *vma)
+{
+ struct fuse_dmmap_vm *fdvm = vma->vm_private_data;
+ struct fuse_dmmap_region *fdr = fdvm->region;
+
+ /* vma copied */
+ atomic_inc(&fdvm->open_count);
+
+ if (fdr->vm_original_ops->open)
+ fdr->vm_original_ops->open(vma);
+}
+
+static void fuse_dmmap_vm_close(struct vm_area_struct *vma)
+{
+ struct fuse_dmmap_vm *fdvm = vma->vm_private_data;
+ struct fuse_dmmap_region *fdr = fdvm->region;
+ struct fuse_file *ff = vma->vm_file->private_data;
+ struct fuse_conn *fc = ff->fc;
+ struct fuse_req *req;
+ struct fuse_munmap_in *inarg;
+
+ if (fdr->vm_original_ops->close)
+ fdr->vm_original_ops->close(vma);
+
+ if (!atomic_dec_and_test(&fdvm->open_count))
+ return;
+
+ /*
+ * Notify server that the mmap region has been unmapped.
+ * Failing this might lead to resource leak in server, don't
+ * fail.
+ */
+ req = fuse_get_req_nofail_nopages(fc, vma->vm_file);
+ inarg = &req->misc.munmap_in;
+
+ inarg->fh = ff->fh;
+ inarg->mapid = fdvm->region->mapid;
+ inarg->size = fdvm->len;
+ inarg->offset = fdvm->off;
+
+ req->in.h.opcode = FUSE_MUNMAP;
+ req->in.h.nodeid = ff->nodeid;
+ req->in.numargs = 1;
+ req->in.args[0].size = sizeof(*inarg);
+ req->in.args[0].value = inarg;
+
+ fuse_request_send(fc, req);
+ fuse_put_request(fc, req);
+ fuse_dmmap_region_put(fc, fdvm->region);
+ kfree(fdvm);
+}
+
+static int fuse_dmmap_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ int ret;
+ struct file *filp = vma->vm_file;
+ struct fuse_dmmap_vm *fdvm = vma->vm_private_data;
+ struct fuse_dmmap_region *fdr = fdvm->region;
+
+ vma->vm_file = fdr->filp;
+ ret = fdr->vm_original_ops->fault(vma, vmf);
+
+ vma->vm_file = filp;
+
+ return ret;
+}
+
+static const struct vm_operations_struct fuse_dmmap_vm_ops = {
+ .open = fuse_dmmap_vm_open,
+ .close = fuse_dmmap_vm_close,
+ .fault = fuse_dmmap_vm_fault,
+};
+
+static struct fuse_dmmap_region *fuse_dmmap_find_locked(struct fuse_conn *fc,
+ u64 mapid)
+{
+ struct fuse_dmmap_region *curr;
+ struct fuse_dmmap_region *fdr = NULL;
+
+ list_for_each_entry(curr, &fc->dmmap_list, list) {
+ if (curr->mapid == mapid) {
+ fdr = curr;
+ atomic_inc(&fdr->ref);
+ break;
+ }
+ }
+
+ return fdr;
+}
+
+static struct fuse_dmmap_region *fuse_dmmap_find(struct fuse_conn *fc,
+ u64 mapid)
+{
+ struct fuse_dmmap_region *fdr;
+
+ spin_lock(&fc->lock);
+ fdr = fuse_dmmap_find_locked(fc, mapid);
+ spin_unlock(&fc->lock);
+
+ return fdr;
+}
+
+static struct fuse_dmmap_region *fuse_dmmap_get(struct fuse_conn *fc,
+ struct file *file, u64 mapid,
+ u64 size, unsigned long flags)
+{
+ struct fuse_dmmap_region *fdr;
+ char *pathbuf, *filepath;
+ struct file *shmem_file;
+
+ fdr = fuse_dmmap_find(fc, mapid);
+ if (!fdr) {
+ struct fuse_dmmap_region *tmp;
+
+ fdr = kzalloc(sizeof(struct fuse_dmmap_region), GFP_KERNEL);
+ if (!fdr)
+ return ERR_PTR(-ENOMEM);
+
+ atomic_set(&fdr->ref, 1);
+
+ pathbuf = kzalloc(PATH_MAX+1, GFP_KERNEL);
+ if (!pathbuf) {
+ kfree(fdr);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ filepath = d_path(&file->f_path, pathbuf, PATH_MAX+1);
+ if (IS_ERR(filepath)) {
+ kfree(fdr);
+ kfree(pathbuf);
+ return (struct fuse_dmmap_region *) filepath;
+ }
+
+ fdr->mapid = mapid;
+ shmem_file = shmem_file_setup(filepath, size, flags);
+ kfree(pathbuf);
+
+ if (IS_ERR(shmem_file)) {
+ kfree(fdr);
+ return (struct fuse_dmmap_region *) shmem_file;
+ }
+
+ fdr->filp = shmem_file;
+
+ spin_lock(&fc->lock);
+ tmp = fuse_dmmap_find_locked(fc, mapid);
+ if (tmp) {
+ fput(fdr->filp);
+ kfree(fdr);
+ fdr = tmp;
+ } else {
+ INIT_LIST_HEAD(&fdr->list);
+ list_add(&fdr->list, &fc->dmmap_list);
+ }
+ spin_unlock(&fc->lock);
+ }
+
+ if (size > fdr->size) {
+
+ fdr->filp->f_op->fallocate(fdr->filp, 0, 0, size);
+ fdr->size = size;
+ }
+
+ return fdr;
+}
+
+static int cuse_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ int err;
+ struct fuse_file *ff = file->private_data;
+ struct fuse_conn *fc = ff->fc;
+ struct fuse_dmmap_vm *fdvm;
+ struct fuse_dmmap_region *fdr;
+ struct fuse_req *req = NULL;
+ struct fuse_mmap_in inarg;
+ struct fuse_mmap_out outarg;
+
+ if (fc->no_dmmap)
+ return -ENOSYS;
+
+ req = fuse_get_req(fc, 0);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ /* ask server whether this mmap is okay and what the size should be */
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.fh = ff->fh;
+ inarg.addr = vma->vm_start;
+ inarg.len = vma->vm_end - vma->vm_start;
+ inarg.prot = ((vma->vm_flags & VM_READ) ? PROT_READ : 0) |
+ ((vma->vm_flags & VM_WRITE) ? PROT_WRITE : 0) |
+ ((vma->vm_flags & VM_EXEC) ? PROT_EXEC : 0);
+ inarg.flags = ((vma->vm_flags & VM_SHARED) ? MAP_SHARED : 0 ) |
+ ((vma->vm_flags & VM_GROWSDOWN) ? MAP_GROWSDOWN : 0) |
+ ((vma->vm_flags & VM_DENYWRITE) ? MAP_DENYWRITE : 0) |
+ ((vma->vm_flags & VM_EXEC) ? MAP_EXECUTABLE : 0) |
+ ((vma->vm_flags & VM_LOCKED) ? MAP_LOCKED : 0);
+ inarg.offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
+
+ req->in.h.opcode = FUSE_MMAP;
+ req->in.h.nodeid = ff->nodeid;
+ req->in.numargs = 1;
+ req->in.args[0].size = sizeof(inarg);
+ req->in.args[0].value = &inarg;
+ req->out.numargs = 1;
+ req->out.args[0].size = sizeof(outarg);
+ req->out.args[0].value = &outarg;
+
+ fuse_request_send(fc, req);
+ err = req->out.h.error;
+ if (err) {
+ if (err == -ENOSYS)
+ fc->no_dmmap = 1;
+ goto free_req;
+ }
+
+ fdr = fuse_dmmap_get(fc, file, outarg.mapid, outarg.size,
+ vma->vm_flags);
+ err = PTR_ERR(fdr);
+ if (IS_ERR(fdr))
+ goto free_req;
+
+ err = -ENOMEM;
+
+ fdvm = kzalloc(sizeof(*fdvm), GFP_KERNEL);
+ if (!fdvm) {
+ fuse_dmmap_region_put(fc, fdr);
+ goto free_req;
+ }
+
+ atomic_set(&fdvm->open_count, 1);
+ fdvm->region = fdr;
+ fdvm->len = inarg.len;
+ fdvm->off = inarg.offset;
+
+ fdr->filp->f_op->mmap(fdr->filp, vma);
+
+ memcpy(&fdr->vm_ops, vma->vm_ops, sizeof(fdr->vm_ops));
+ fdr->vm_ops.open = fuse_dmmap_vm_ops.open;
+ fdr->vm_ops.close = fuse_dmmap_vm_ops.close;
+ fdr->vm_ops.fault = fuse_dmmap_vm_ops.fault;
+
+ fdr->vm_original_ops = vma->vm_ops;
+
+ vma->vm_ops = &fdr->vm_ops;
+
+ vma->vm_private_data = fdvm;
+ vma->vm_flags |= VM_DONTEXPAND; /* disallow expansion for now */
+ err = 0;
+
+free_req:
+ fuse_put_request(fc, req);
+ return err;
+}
+
+static int fuse_notify_store_to_dmmap(struct fuse_conn *fc,
+ struct fuse_copy_state *cs,
+ u64 nodeid, u32 size, u64 pos)
+{
+ struct fuse_dmmap_region *fdr;
+ struct file *filp;
+ pgoff_t index;
+ unsigned int off;
+ int err;
+
+ fdr = fuse_dmmap_find(fc, nodeid);
+ if (!fdr)
+ return -ENOENT;
+
+ index = pos >> PAGE_SHIFT;
+ off = pos & ~PAGE_MASK;
+ if (pos > fdr->size)
+ size = 0;
+ else if (size > fdr->size - pos)
+ size = fdr->size - pos;
+
+ filp = fdr->filp;
+
+ while (size) {
+ struct page *page;
+ unsigned int this_num;
+
+ page = shmem_read_mapping_page_gfp(filp->f_inode->i_mapping,
+ index, GFP_HIGHUSER);
+ if (IS_ERR(page)) {
+
+ err = -ENOMEM;
+ goto out_iput;
+ }
+
+ this_num = min_t(unsigned, size, PAGE_SIZE - off);
+ err = fuse_copy_page(cs, &page, off, this_num, 0);
+
+ unlock_page(page);
+ page_cache_release(page);
+
+ if (err)
+ goto out_iput;
+
+ size -= this_num;
+ off = 0;
+ index++;
+ }
+
+ err = 0;
+
+out_iput:
+ fuse_dmmap_region_put(fc, fdr);
+
+ return err;
+}
+
+static void fuse_retrieve_dmmap_end(struct fuse_conn *fc, struct fuse_req *req)
+{
+ release_pages(req->pages, req->num_pages, 0);
+}
+
+static int fuse_notify_retrieve_from_dmmap(struct fuse_conn *fc,
+ struct fuse_notify_retrieve_out *outarg)
+{
+ struct fuse_dmmap_region *fdr;
+ struct fuse_req *req;
+ struct page *page;
+ struct file *filp;
+ pgoff_t index;
+ unsigned int num;
+ unsigned int offset;
+ unsigned int npages;
+ unsigned int this_num;
+ size_t total_len = 0;
+ int err;
+
+ fdr = fuse_dmmap_find(fc, outarg->nodeid);
+ if (!fdr)
+ return -ENOENT;
+
+ npages = outarg->size >> PAGE_SHIFT;
+ if (outarg->size & ~PAGE_MASK)
+ npages++;
+
+ req = fuse_get_req(fc, npages);
+ err = PTR_ERR(req);
+ if (IS_ERR(req))
+ goto out_put_region;
+
+ offset = outarg->offset & ~PAGE_MASK;
+
+ req->in.h.opcode = FUSE_NOTIFY_REPLY;
+ req->in.h.nodeid = outarg->nodeid;
+ req->in.numargs = 2;
+ req->in.argpages = 1;
+ req->end = fuse_retrieve_dmmap_end;
+
+ index = outarg->offset >> PAGE_SHIFT;
+ num = outarg->size;
+ if (outarg->offset > fdr->size)
+ num = 0;
+ else if (outarg->offset + num > fdr->size)
+ num = fdr->size - outarg->offset;
+
+ filp = fdr->filp;
+
+ npages = 0;
+ while (num && req->num_pages < FUSE_MAX_PAGES_PER_REQ) {
+
+ page = shmem_read_mapping_page_gfp(filp->f_inode->i_mapping,
+ index,
+ GFP_KERNEL);
+ if (IS_ERR(page)) {
+ err = -ENOMEM;
+ goto out_put_region;
+ }
+
+ this_num = min_t(unsigned, num, PAGE_SIZE - offset);
+ req->pages[req->num_pages] = page;
+ req->page_descs[req->num_pages].length = this_num;
+ req->num_pages++;
+
+ num -= this_num;
+ total_len += this_num;
+ index++;
+ npages++;
+ }
+ req->misc.retrieve_in.offset = outarg->offset;
+ req->misc.retrieve_in.size = total_len;
+ req->in.args[0].size = sizeof(req->misc.retrieve_in);
+ req->in.args[0].value = &req->misc.retrieve_in;
+ req->in.args[1].size = total_len;
+
+ err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique);
+ if (err)
+ fuse_retrieve_dmmap_end(fc, req);
+
+out_put_region:
+ fuse_dmmap_region_put(fc, fdr);
+
+ return err;
+}
+
+
static const struct file_operations cuse_frontend_fops = {
.owner = THIS_MODULE,
.read_iter = cuse_read_iter,
@@ -184,7 +622,8 @@ static const struct file_operations cuse_frontend_fops = {
.unlocked_ioctl = cuse_file_ioctl,
.compat_ioctl = cuse_file_compat_ioctl,
.poll = fuse_file_poll,
- .llseek = noop_llseek,
+ .llseek = noop_llseek,
+ .mmap = cuse_mmap,
};


@@ -468,10 +907,26 @@ err:

static void cuse_fc_release(struct fuse_conn *fc)
{
+ struct fuse_dmmap_region *fdr;
struct cuse_conn *cc = fc_to_cc(fc);
+
+ spin_lock(&fc->lock);
+ while (!list_empty(&fc->dmmap_list)) {
+
+ fdr = list_entry(fc->dmmap_list.next, typeof(*fdr), list);
+ fuse_dmmap_region_put(fc, fdr);
+ }
+ spin_unlock(&fc->lock);
+
kfree_rcu(cc, fc.rcu);
}

+static const struct fuse_conn_operations cuse_ops = {
+ .release = cuse_fc_release,
+ .notify_store = fuse_notify_store_to_dmmap,
+ .notify_retrieve = fuse_notify_retrieve_from_dmmap,
+};
+
/**
* cuse_channel_open - open method for /dev/cuse
* @inode: inode for /dev/cuse
@@ -507,7 +962,7 @@ static int cuse_channel_open(struct inode *inode, struct file *file)
}

INIT_LIST_HEAD(&cc->list);
- cc->fc.release = cuse_fc_release;
+ cc->fc.ops = &cuse_ops;

cc->fc.initialized = 1;
rc = cuse_send_init(cc);
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 80cc1b3..0faf92c 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -279,6 +279,7 @@ struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc,
__clear_bit(FR_BACKGROUND, &req->flags);
return req;
}
+EXPORT_SYMBOL_GPL(fuse_get_req_nofail_nopages);

void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
{
@@ -617,8 +618,8 @@ void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req)
}
EXPORT_SYMBOL_GPL(fuse_request_send_background);

-static int fuse_request_send_notify_reply(struct fuse_conn *fc,
- struct fuse_req *req, u64 unique)
+int fuse_request_send_notify_reply(struct fuse_conn *fc,
+ struct fuse_req *req, u64 unique)
{
int err = -ENODEV;
struct fuse_iqueue *fiq = &fc->iq;
@@ -674,6 +675,7 @@ static int lock_request(struct fuse_req *req)
}
return err;
}
+EXPORT_SYMBOL_GPL(fuse_request_send_notify_reply);

/*
* Unlock request. If it was aborted while locked, caller is responsible
@@ -967,8 +969,8 @@ static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
* Copy a page in the request to/from the userspace buffer. Must be
* done atomically
*/
-static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
- unsigned offset, unsigned count, int zeroing)
+int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
+ unsigned offset, unsigned count, int zeroing)
{
int err;
struct page *page = *pagep;
@@ -1003,6 +1005,7 @@ static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
flush_dcache_page(page);
return 0;
}
+EXPORT_SYMBOL_GPL(fuse_copy_page);

/* Copy pages in the request to/from userspace buffer */
static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
@@ -1597,15 +1600,7 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
struct fuse_copy_state *cs)
{
struct fuse_notify_store_out outarg;
- struct inode *inode;
- struct address_space *mapping;
- u64 nodeid;
int err;
- pgoff_t index;
- unsigned int offset;
- unsigned int num;
- loff_t file_size;
- loff_t end;

err = -EINVAL;
if (size < sizeof(outarg))
@@ -1619,145 +1614,18 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
if (size - sizeof(outarg) != outarg.size)
goto out_finish;

- nodeid = outarg.nodeid;
+ err = fc->ops->notify_store(fc, cs, outarg.nodeid, outarg.size,
+ outarg.offset);

- down_read(&fc->killsb);
-
- err = -ENOENT;
- if (!fc->sb)
- goto out_up_killsb;
-
- inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
- if (!inode)
- goto out_up_killsb;
-
- mapping = inode->i_mapping;
- index = outarg.offset >> PAGE_CACHE_SHIFT;
- offset = outarg.offset & ~PAGE_CACHE_MASK;
- file_size = i_size_read(inode);
- end = outarg.offset + outarg.size;
- if (end > file_size) {
- file_size = end;
- fuse_write_update_size(inode, file_size);
- }
-
- num = outarg.size;
- while (num) {
- struct page *page;
- unsigned int this_num;
-
- err = -ENOMEM;
- page = find_or_create_page(mapping, index,
- mapping_gfp_mask(mapping));
- if (!page)
- goto out_iput;
-
- this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
- err = fuse_copy_page(cs, &page, offset, this_num, 0);
- if (!err && offset == 0 &&
- (this_num == PAGE_CACHE_SIZE || file_size == end))
- SetPageUptodate(page);
- unlock_page(page);
- page_cache_release(page);
-
- if (err)
- goto out_iput;
-
- num -= this_num;
- offset = 0;
- index++;
- }
-
- err = 0;
-
-out_iput:
- iput(inode);
-out_up_killsb:
- up_read(&fc->killsb);
out_finish:
fuse_copy_finish(cs);
return err;
}

-static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req)
-{
- release_pages(req->pages, req->num_pages, false);
-}
-
-static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
- struct fuse_notify_retrieve_out *outarg)
-{
- int err;
- struct address_space *mapping = inode->i_mapping;
- struct fuse_req *req;
- pgoff_t index;
- loff_t file_size;
- unsigned int num;
- unsigned int offset;
- size_t total_len = 0;
- int num_pages;
-
- offset = outarg->offset & ~PAGE_CACHE_MASK;
- file_size = i_size_read(inode);
-
- num = outarg->size;
- if (outarg->offset > file_size)
- num = 0;
- else if (outarg->offset + num > file_size)
- num = file_size - outarg->offset;
-
- num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
- num_pages = min(num_pages, FUSE_MAX_PAGES_PER_REQ);
-
- req = fuse_get_req(fc, num_pages);
- if (IS_ERR(req))
- return PTR_ERR(req);
-
- req->in.h.opcode = FUSE_NOTIFY_REPLY;
- req->in.h.nodeid = outarg->nodeid;
- req->in.numargs = 2;
- req->in.argpages = 1;
- req->page_descs[0].offset = offset;
- req->end = fuse_retrieve_end;
-
- index = outarg->offset >> PAGE_CACHE_SHIFT;
-
- while (num && req->num_pages < num_pages) {
- struct page *page;
- unsigned int this_num;
-
- page = find_get_page(mapping, index);
- if (!page)
- break;
-
- this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
- req->pages[req->num_pages] = page;
- req->page_descs[req->num_pages].length = this_num;
- req->num_pages++;
-
- offset = 0;
- num -= this_num;
- total_len += this_num;
- index++;
- }
- req->misc.retrieve_in.offset = outarg->offset;
- req->misc.retrieve_in.size = total_len;
- req->in.args[0].size = sizeof(req->misc.retrieve_in);
- req->in.args[0].value = &req->misc.retrieve_in;
- req->in.args[1].size = total_len;
-
- err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique);
- if (err)
- fuse_retrieve_end(fc, req);
-
- return err;
-}
-
static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size,
struct fuse_copy_state *cs)
{
struct fuse_notify_retrieve_out outarg;
- struct inode *inode;
int err;

err = -EINVAL;
@@ -1770,18 +1638,7 @@ static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size,

fuse_copy_finish(cs);

- down_read(&fc->killsb);
- err = -ENOENT;
- if (fc->sb) {
- u64 nodeid = outarg.nodeid;
-
- inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
- if (inode) {
- err = fuse_retrieve(fc, inode, &outarg);
- iput(inode);
- }
- }
- up_read(&fc->killsb);
+ err = fc->ops->notify_retrieve(fc, &outarg);

return err;

diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 4051131..a56222b 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -337,6 +337,7 @@ struct fuse_req {
struct fuse_req *next;
} write;
struct fuse_notify_retrieve_in retrieve_in;
+ struct fuse_munmap_in munmap_in;
} misc;

/** page vector */
@@ -431,6 +432,21 @@ struct fuse_dev {
struct list_head entry;
};

+struct fuse_copy_state;
+
+struct fuse_conn_operations {
+ /** Called on final put */
+ void (*release)(struct fuse_conn *);
+
+ /** Called to store data into a mapping */
+ int (*notify_store)(struct fuse_conn *, struct fuse_copy_state *,
+ u64 nodeid, u32 size, u64 pos);
+
+ /** Called to retrieve data from a mapping */
+ int (*notify_retrieve)(struct fuse_conn *,
+ struct fuse_notify_retrieve_out *);
+};
+
/**
* A Fuse connection.
*
@@ -578,6 +594,9 @@ struct fuse_conn {
/** Is poll not implemented by fs? */
unsigned no_poll:1;

+ /** Is direct mmap not implemente by fs? */
+ unsigned no_dmmap:1;
+
/** Do multi-page cached writes */
unsigned big_writes:1;

@@ -635,9 +654,6 @@ struct fuse_conn {
/** Version counter for attribute changes */
u64 attr_version;

- /** Called on final put */
- void (*release)(struct fuse_conn *);
-
/** Super block for this connection. */
struct super_block *sb;

@@ -646,6 +662,12 @@ struct fuse_conn {

/** List of device instances belonging to this connection */
struct list_head devices;
+
+ /** List of direct mmaps (currently CUSE only) */
+ struct list_head dmmap_list;
+
+ /** Operations that fuse and cuse can implement differently */
+ const struct fuse_conn_operations *ops;
};

static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb)
@@ -944,4 +966,10 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,

void fuse_set_initialized(struct fuse_conn *fc);

+int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
+ unsigned offset, unsigned count, int zeroing);
+
+int fuse_request_send_notify_reply(struct fuse_conn *fc,
+ struct fuse_req *req, u64 unique);
+
#endif /* _FS_FUSE_I_H */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index ac81f48..5284b84 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -609,6 +609,7 @@ void fuse_conn_init(struct fuse_conn *fc)
fc->connected = 1;
fc->attr_version = 1;
get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
+ INIT_LIST_HEAD(&fc->dmmap_list);
}
EXPORT_SYMBOL_GPL(fuse_conn_init);

@@ -617,7 +618,7 @@ void fuse_conn_put(struct fuse_conn *fc)
if (atomic_dec_and_test(&fc->count)) {
if (fc->destroy_req)
fuse_request_free(fc->destroy_req);
- fc->release(fc);
+ fc->ops->release(fc);
}
}
EXPORT_SYMBOL_GPL(fuse_conn_put);
@@ -1025,6 +1026,167 @@ void fuse_dev_free(struct fuse_dev *fud)
}
EXPORT_SYMBOL_GPL(fuse_dev_free);

+static int fuse_notify_store_to_inode(struct fuse_conn *fc,
+ struct fuse_copy_state *cs,
+ u64 nodeid, u32 size, u64 pos)
+{
+ struct inode *inode;
+ struct address_space *mapping;
+ pgoff_t index;
+ unsigned int off;
+ loff_t file_size;
+ loff_t end;
+ int err;
+
+ down_read(&fc->killsb);
+
+ err = -ENOENT;
+ if (!fc->sb)
+ goto out_up_killsb;
+
+ inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
+ if (!inode)
+ goto out_up_killsb;
+
+ mapping = inode->i_mapping;
+ index = pos >> PAGE_CACHE_SHIFT;
+ off = pos & ~PAGE_CACHE_MASK;
+ file_size = i_size_read(inode);
+ end = pos + size;
+ if (end > file_size) {
+ file_size = end;
+ fuse_write_update_size(inode, file_size);
+ }
+
+ while (size) {
+ struct page *page;
+ unsigned int this_num;
+
+ err = -ENOMEM;
+ page = find_or_create_page(mapping, index,
+ mapping_gfp_mask(mapping));
+ if (!page)
+ goto out_iput;
+
+ this_num = min_t(unsigned, size, PAGE_CACHE_SIZE - off);
+ err = fuse_copy_page(cs, &page, off, this_num, 0);
+ if (!err && off == 0 && (size != 0 || file_size == end))
+ SetPageUptodate(page);
+ unlock_page(page);
+ page_cache_release(page);
+
+ if (err)
+ goto out_iput;
+
+ size -= this_num;
+ off = 0;
+ index++;
+ }
+
+ err = 0;
+
+out_iput:
+ iput(inode);
+out_up_killsb:
+ up_read(&fc->killsb);
+
+ return err;
+}
+
+static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req)
+{
+ release_pages(req->pages, req->num_pages, 0);
+}
+
+static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
+ struct fuse_notify_retrieve_out *outarg)
+{
+ int err;
+ struct address_space *mapping = inode->i_mapping;
+ struct fuse_req *req;
+ pgoff_t index;
+ loff_t file_size;
+ unsigned int num;
+ unsigned int offset;
+ size_t total_len = 0;
+
+ req = fuse_get_req(fc, 0);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ offset = outarg->offset & ~PAGE_CACHE_MASK;
+
+ req->in.h.opcode = FUSE_NOTIFY_REPLY;
+ req->in.h.nodeid = outarg->nodeid;
+ req->in.numargs = 2;
+ req->in.argpages = 1;
+ req->end = fuse_retrieve_end;
+
+ index = outarg->offset >> PAGE_CACHE_SHIFT;
+ file_size = i_size_read(inode);
+ num = outarg->size;
+ if (outarg->offset > file_size)
+ num = 0;
+ else if (outarg->offset + num > file_size)
+ num = file_size - outarg->offset;
+
+ while (num && req->num_pages < FUSE_MAX_PAGES_PER_REQ) {
+ struct page *page;
+ unsigned int this_num;
+
+ page = find_get_page(mapping, index);
+ if (!page)
+ break;
+
+ this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
+ req->pages[req->num_pages] = page;
+ req->num_pages++;
+
+ num -= this_num;
+ total_len += this_num;
+ index++;
+ }
+ req->misc.retrieve_in.offset = outarg->offset;
+ req->misc.retrieve_in.size = total_len;
+ req->in.args[0].size = sizeof(req->misc.retrieve_in);
+ req->in.args[0].value = &req->misc.retrieve_in;
+ req->in.args[1].size = total_len;
+
+ err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique);
+ if (err)
+ fuse_retrieve_end(fc, req);
+
+ return err;
+}
+
+static int fuse_notify_retrieve_from_inode(struct fuse_conn *fc,
+ struct fuse_notify_retrieve_out *outarg)
+{
+ struct inode *inode;
+ int err;
+
+ down_read(&fc->killsb);
+ err = -ENOENT;
+ if (fc->sb) {
+ u64 nodeid = outarg->nodeid;
+
+ inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
+ if (inode) {
+ err = fuse_retrieve(fc, inode, outarg);
+ iput(inode);
+ }
+ }
+ up_read(&fc->killsb);
+
+ return err;
+}
+
+static const struct fuse_conn_operations fuse_default_ops = {
+ .release = fuse_free_conn,
+ .notify_store = fuse_notify_store_to_inode,
+ .notify_retrieve = fuse_notify_retrieve_from_inode,
+};
+
static int fuse_fill_super(struct super_block *sb, void *data, int silent)
{
struct fuse_dev *fud;
@@ -1077,7 +1239,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
goto err_fput;

fuse_conn_init(fc);
- fc->release = fuse_free_conn;
+ fc->ops = &fuse_default_ops;

fud = fuse_dev_alloc(fc);
if (!fud)
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index c9aca04..3f4c54b 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -102,6 +102,7 @@
* - add ctime and ctimensec to fuse_setattr_in
* - add FUSE_RENAME2 request
* - add FUSE_NO_OPEN_SUPPORT flag
+ * - add FUSE_MMAP and FUSE_MUNMAP
*/

#ifndef _LINUX_FUSE_H
@@ -358,6 +359,8 @@ enum fuse_opcode {
FUSE_FALLOCATE = 43,
FUSE_READDIRPLUS = 44,
FUSE_RENAME2 = 45,
+ FUSE_MMAP = 46,
+ FUSE_MUNMAP = 47,

/* CUSE specific operations */
CUSE_INIT = 4096,
@@ -670,6 +673,29 @@ struct fuse_fallocate_in {
uint32_t padding;
};

+struct fuse_mmap_in {
+ __u64 fh;
+ __u64 addr;
+ __u64 len;
+ __u32 prot;
+ __u32 flags;
+ __u64 offset;
+};
+
+struct fuse_mmap_out {
+ __u64 mapid; /* Mmap ID, same namespace as Inode ID */
+ __u64 size; /* Size of memory region */
+ __u64 reserved;
+};
+
+struct fuse_munmap_in {
+ __u64 fh;
+ __u64 mapid;
+ __u64 size; /* Size of memory region */
+ __u64 offset;
+ __u64 reserved;
+};
+
struct fuse_in_header {
uint32_t len;
uint32_t opcode;

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/