[PATCH RFC v15 21/30] virt: gunyah: Add ioctl to bind guestmem to VMs

From: Elliot Berman
Date: Fri Dec 15 2023 - 19:27:07 EST


A maple tree is used to maintain a map from guest address ranges to a
guestmemfd that provides the memory for that range of memory for the
guest. The mapping of guest address range to guestmemfd is called a
binding. Implement an ioctl to add/remove bindings to the virtual
machine. The binding determines whether the memory is shared (host
retains access) or lent (host loses access).

Signed-off-by: Elliot Berman <quic_eberman@xxxxxxxxxxx>
---
drivers/virt/gunyah/guest_memfd.c | 277 ++++++++++++++++++++++++++++++++++++++
drivers/virt/gunyah/vm_mgr.c | 15 +++
drivers/virt/gunyah/vm_mgr.h | 6 +
include/uapi/linux/gunyah.h | 41 ++++++
4 files changed, 339 insertions(+)

diff --git a/drivers/virt/gunyah/guest_memfd.c b/drivers/virt/gunyah/guest_memfd.c
index 709aae9a1f44..c38380c4dc50 100644
--- a/drivers/virt/gunyah/guest_memfd.c
+++ b/drivers/virt/gunyah/guest_memfd.c
@@ -9,11 +9,61 @@
#include <linux/types.h>
#include <linux/falloc.h>
#include <linux/file.h>
+#include <linux/maple_tree.h>
#include <linux/migrate.h>
#include <linux/pagemap.h>

#include <uapi/linux/gunyah.h>

+#include "vm_mgr.h"
+
+/**
+ * struct gunyah_gmem_binding - Represents a binding of guestmem to a Gunyah VM
+ * @gfn: Guest address to place acquired folios
+ * @ghvm: Pointer to Gunyah VM in this binding
+ * @mt: Maple tree to track folios which have been provided to the VM
+ * @i_off: offset into the guestmem to grab folios from
+ * @inode: Pointer to guest mem inode
+ * @i_entry: list entry for inode->i_private_list
+ * @flags: Access flags for the binding
+ * @nr: Number of pages covered by this binding
+ */
+struct gunyah_gmem_binding {
+ u64 gfn;
+ struct gunyah_vm *ghvm;
+ struct maple_tree mt;
+
+ pgoff_t i_off;
+ struct inode *inode;
+ struct list_head i_entry;
+
+ u32 flags;
+ unsigned long nr;
+};
+
+static inline pgoff_t gunyah_gfn_to_off(struct gunyah_gmem_binding *b, u64 gfn)
+{
+ return gfn - b->gfn + b->i_off;
+}
+
+static inline u64 gunyah_off_to_gfn(struct gunyah_gmem_binding *b, pgoff_t off)
+{
+ return off - b->i_off + b->gfn;
+}
+
+static inline bool gunyah_guest_mem_is_lend(struct gunyah_vm *ghvm, u32 flags)
+{
+ u8 access = flags & GUNYAH_MEM_ACCESS_MASK;
+
+ if (access == GUNYAH_MEM_FORCE_LEND)
+ return true;
+ else if (access == GUNYAH_MEM_FORCE_SHARE)
+ return false;
+
+ /* RM requires all VMs to be protected (isolated) */
+ return true;
+}
+
static struct folio *gunyah_gmem_get_huge_folio(struct inode *inode,
pgoff_t index)
{
@@ -191,8 +241,15 @@ static long gunyah_gmem_fallocate(struct file *file, int mode, loff_t offset,

static int gunyah_gmem_release(struct inode *inode, struct file *file)
{
+ struct gunyah_gmem_binding *b, *n;
+
gunyah_gmem_punch_hole(inode, 0, U64_MAX);

+ list_for_each_entry_safe(b, n, &inode->i_mapping->i_private_list,
+ i_entry) {
+ gunyah_gmem_remove_binding(b);
+ }
+
return 0;
}

@@ -267,3 +324,223 @@ int gunyah_guest_mem_create(struct gunyah_create_mem_args *args)
put_unused_fd(fd);
return err;
}
+
+void gunyah_gmem_remove_binding(struct gunyah_gmem_binding *b)
+{
+ mtree_erase(&b->ghvm->mem_layout, b->gfn);
+ list_del(&b->i_entry);
+ kfree(b);
+}
+
+static inline unsigned long gunyah_gmem_page_mask(struct inode *inode)
+{
+ unsigned long gmem_flags = (unsigned long)inode->i_private;
+
+ if (gmem_flags & GHMF_ALLOW_HUGEPAGE) {
+#if IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)
+ return HPAGE_PMD_MASK;
+#else
+ return ULONG_MAX;
+#endif
+ }
+
+ return PAGE_MASK;
+}
+
+static int gunyah_gmem_init_binding(struct gunyah_vm *ghvm, struct inode *inode,
+ struct gunyah_map_mem_args *args,
+ struct gunyah_gmem_binding *binding)
+{
+ const unsigned long page_mask = ~gunyah_gmem_page_mask(inode);
+
+ if (args->flags & ~(GUNYAH_MEM_ALLOW_RWX | GUNYAH_MEM_ACCESS_MASK))
+ return -EINVAL;
+
+ if (args->guest_addr & page_mask)
+ return -EINVAL;
+
+ if (args->offset & page_mask)
+ return -EINVAL;
+
+ if (args->size & page_mask)
+ return -EINVAL;
+
+ binding->gfn = gunyah_gpa_to_gfn(args->guest_addr);
+ binding->ghvm = ghvm;
+ binding->i_off = args->offset >> PAGE_SHIFT;
+ binding->inode = inode;
+ binding->flags = args->flags;
+ binding->nr = args->size >> PAGE_SHIFT;
+
+ return 0;
+}
+
+static int gunyah_gmem_remove_mapping(struct gunyah_vm *ghvm,
+ struct inode *inode,
+ struct gunyah_map_mem_args *args)
+{
+ struct gunyah_gmem_binding argb;
+ struct gunyah_gmem_binding *b = NULL;
+ unsigned long start_delta, end_delta;
+ int ret;
+
+ ret = gunyah_gmem_init_binding(ghvm, inode, args, &argb);
+ if (ret)
+ return ret;
+
+ filemap_invalidate_lock(inode->i_mapping);
+ list_for_each_entry(b, &inode->i_mapping->i_private_list, i_entry) {
+ if (b->ghvm != argb.ghvm || b->flags != argb.flags ||
+ WARN_ON(b->inode != argb.inode))
+ continue;
+ /* Check if argb guest addresses is within b */
+ if (b->gfn > argb.gfn)
+ continue;
+ if (b->gfn + b->nr < argb.gfn + argb.nr)
+ continue;
+ start_delta = argb.gfn - b->gfn;
+ if (argb.i_off - b->i_off != start_delta)
+ continue;
+ end_delta = argb.gfn + argb.nr - b->gfn - b->nr;
+ if (!start_delta && !end_delta) {
+ /* wipe the mapping entirely */
+ gunyah_gmem_remove_binding(b);
+ goto out;
+ } else if (start_delta && !end_delta) {
+ /* shrink the end */
+ down_write(&ghvm->mem_lock);
+ mtree_erase(&b->ghvm->mem_layout, b->gfn);
+ b->nr = start_delta;
+ ret = mtree_insert_range(&ghvm->mem_layout, b->gfn,
+ b->gfn + b->nr - 1, b,
+ GFP_KERNEL);
+ up_write(&ghvm->mem_lock);
+ goto out;
+ } else if (!start_delta && end_delta) {
+ /* Shrink the beginning */
+ down_write(&ghvm->mem_lock);
+ mtree_erase(&b->ghvm->mem_layout, b->gfn);
+ b->gfn += argb.nr;
+ b->i_off += argb.nr;
+ b->nr -= argb.nr;
+ ret = mtree_insert_range(&ghvm->mem_layout, b->gfn,
+ b->gfn + b->nr - 1, b,
+ GFP_KERNEL);
+ up_write(&ghvm->mem_lock);
+ goto out;
+ } else {
+ /* TODO: split the mapping into 2 */
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+ ret = -ENOENT;
+out:
+ filemap_invalidate_unlock(inode->i_mapping);
+ return ret;
+}
+
+static bool gunyah_gmem_binding_allowed_overlap(struct gunyah_gmem_binding *a,
+ struct gunyah_gmem_binding *b)
+{
+ /* Bindings can't overlap within a VM. Only one guest mem can
+ * provide for a given guest address
+ */
+ if (a->ghvm == b->ghvm && a->gfn + a->nr <= b->gfn &&
+ a->gfn >= b->gfn + b->nr)
+ return false;
+
+ /* Gunyah only guarantees we can share a page with one VM and
+ * doesn't (currently) allow us to share same page with multiple VMs,
+ * regardless whether host can also access.
+ */
+ if (a->inode == b->inode) {
+ if (a->ghvm == b->ghvm) {
+ if (gunyah_guest_mem_is_lend(a->ghvm, a->flags) ||
+ gunyah_guest_mem_is_lend(b->ghvm, b->flags))
+ return false;
+ } else {
+ if (a->i_off + a->nr < b->i_off)
+ return false;
+ if (a->i_off > b->i_off + b->nr)
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static int gunyah_gmem_add_mapping(struct gunyah_vm *ghvm, struct inode *inode,
+ struct gunyah_map_mem_args *args)
+{
+ struct gunyah_gmem_binding *b, *tmp = NULL;
+ int ret;
+
+ b = kzalloc(sizeof(*b), GFP_KERNEL);
+ if (!b)
+ return -ENOMEM;
+
+ ret = gunyah_gmem_init_binding(ghvm, inode, args, b);
+ if (ret)
+ return ret;
+
+ filemap_invalidate_lock(inode->i_mapping);
+ list_for_each_entry(tmp, &inode->i_mapping->i_private_list, i_entry) {
+ if (!gunyah_gmem_binding_allowed_overlap(b, tmp)) {
+ ret = -EEXIST;
+ goto unlock;
+ }
+ }
+
+ ret = mtree_insert_range(&ghvm->mem_layout, b->gfn, b->gfn + b->nr - 1,
+ b, GFP_KERNEL);
+ if (ret)
+ goto unlock;
+
+ list_add(&b->i_entry, &inode->i_mapping->i_private_list);
+
+unlock:
+ filemap_invalidate_unlock(inode->i_mapping);
+ return ret;
+}
+
+int gunyah_gmem_modify_binding(struct gunyah_vm *ghvm,
+ struct gunyah_map_mem_args *args)
+{
+ u8 access = args->flags & GUNYAH_MEM_ACCESS_MASK;
+ struct file *file;
+ int ret = -EINVAL;
+
+ file = fget(args->guest_mem_fd);
+ if (!file)
+ return -EINVAL;
+
+ if (file->f_op != &gunyah_gmem_fops)
+ goto err_file;
+
+ if (args->flags & ~(GUNYAH_MEM_ALLOW_RWX | GUNYAH_MEM_UNMAP | GUNYAH_MEM_ACCESS_MASK))
+ goto err_file;
+
+ /* VM needs to have some permissions to the memory */
+ if (!(args->flags & GUNYAH_MEM_ALLOW_RWX))
+ goto err_file;
+
+ if (access != GUNYAH_MEM_DEFAULT_ACCESS &&
+ access != GUNYAH_MEM_FORCE_LEND && access != GUNYAH_MEM_FORCE_SHARE)
+ goto err_file;
+
+ if (!PAGE_ALIGNED(args->guest_addr) || !PAGE_ALIGNED(args->offset) ||
+ !PAGE_ALIGNED(args->size))
+ goto err_file;
+
+ if (args->flags & GUNYAH_MEM_UNMAP) {
+ args->flags &= ~GUNYAH_MEM_UNMAP;
+ ret = gunyah_gmem_remove_mapping(ghvm, file_inode(file), args);
+ } else {
+ ret = gunyah_gmem_add_mapping(ghvm, file_inode(file), args);
+ }
+
+err_file:
+ fput(file);
+ return ret;
+}
diff --git a/drivers/virt/gunyah/vm_mgr.c b/drivers/virt/gunyah/vm_mgr.c
index cd978d1ce93f..5666070453aa 100644
--- a/drivers/virt/gunyah/vm_mgr.c
+++ b/drivers/virt/gunyah/vm_mgr.c
@@ -519,6 +519,8 @@ static __must_check struct gunyah_vm *gunyah_vm_alloc(struct gunyah_rm *rm)
mutex_init(&ghvm->fn_lock);

mt_init(&ghvm->gm);
+ mt_init(&ghvm->mem_layout);
+ init_rwsem(&ghvm->mem_lock);

ghvm->addrspace_ticket.resource_type = GUNYAH_RESOURCE_TYPE_ADDR_SPACE;
ghvm->addrspace_ticket.label = GUNYAH_VM_ADDRSPACE_LABEL;
@@ -673,6 +675,14 @@ static long gunyah_vm_ioctl(struct file *filp, unsigned int cmd,
r = gunyah_vm_rm_function_instance(ghvm, &f);
break;
}
+ case GUNYAH_VM_MAP_MEM: {
+ struct gunyah_map_mem_args args;
+
+ if (copy_from_user(&args, argp, sizeof(args)))
+ return -EFAULT;
+
+ return gunyah_gmem_modify_binding(ghvm, &args);
+ }
default:
r = -ENOTTY;
break;
@@ -690,6 +700,8 @@ EXPORT_SYMBOL_GPL(gunyah_vm_get);
static void _gunyah_vm_put(struct kref *kref)
{
struct gunyah_vm *ghvm = container_of(kref, struct gunyah_vm, kref);
+ struct gunyah_gmem_binding *b;
+ unsigned long idx = 0;
int ret;

if (ghvm->vm_status == GUNYAH_RM_VM_STATUS_RUNNING)
@@ -697,6 +709,9 @@ static void _gunyah_vm_put(struct kref *kref)

gunyah_vm_remove_functions(ghvm);

+ mt_for_each(&ghvm->mem_layout, b, idx, ULONG_MAX)
+ gunyah_gmem_remove_binding(b);
+ mtree_destroy(&ghvm->mem_layout);
gunyah_vm_reclaim_memory(ghvm);

gunyah_vm_remove_resource_ticket(ghvm, &ghvm->addrspace_ticket);
diff --git a/drivers/virt/gunyah/vm_mgr.h b/drivers/virt/gunyah/vm_mgr.h
index d26693d10d22..8f1c3ade08dd 100644
--- a/drivers/virt/gunyah/vm_mgr.h
+++ b/drivers/virt/gunyah/vm_mgr.h
@@ -36,6 +36,8 @@ long gunyah_dev_vm_mgr_ioctl(struct gunyah_rm *rm, unsigned int cmd,
struct gunyah_vm {
u16 vmid;
struct maple_tree gm;
+ struct maple_tree mem_layout;
+ struct rw_semaphore mem_lock;
struct gunyah_vm_resource_ticket addrspace_ticket,
host_private_extent_ticket, host_shared_extent_ticket,
guest_private_extent_ticket, guest_shared_extent_ticket;
@@ -78,5 +80,9 @@ void gunyah_vm_reclaim_memory(struct gunyah_vm *ghvm);
int gunyah_vm_mmio_write(struct gunyah_vm *ghvm, u64 addr, u32 len, u64 data);

int gunyah_guest_mem_create(struct gunyah_create_mem_args *args);
+int gunyah_gmem_modify_binding(struct gunyah_vm *ghvm,
+ struct gunyah_map_mem_args *args);
+struct gunyah_gmem_binding;
+void gunyah_gmem_remove_binding(struct gunyah_gmem_binding *binding);

#endif
diff --git a/include/uapi/linux/gunyah.h b/include/uapi/linux/gunyah.h
index c5f506350364..1af4c5ae6bc3 100644
--- a/include/uapi/linux/gunyah.h
+++ b/include/uapi/linux/gunyah.h
@@ -87,6 +87,47 @@ struct gunyah_fn_desc {
#define GUNYAH_VM_ADD_FUNCTION _IOW(GUNYAH_IOCTL_TYPE, 0x4, struct gunyah_fn_desc)
#define GUNYAH_VM_REMOVE_FUNCTION _IOW(GUNYAH_IOCTL_TYPE, 0x7, struct gunyah_fn_desc)

+/**
+ * enum gunyah_map_flags- Possible flags on &struct gunyah_map_mem_args
+ * @GUNYAH_MEM_DEFAULT_SHARE: Use default host access for the VM type
+ * @GUNYAH_MEM_FORCE_LEND: Force unmapping the memory once the guest starts to use
+ * @GUNYAH_MEM_FORCE_SHARE: Allow host to continue accessing memory when guest starts to use
+ * @GUNYAH_MEM_ALLOW_READ: Allow guest to read memory
+ * @GUNYAH_MEM_ALLOW_WRITE: Allow guest to write to the memory
+ * @GUNYAH_MEM_ALLOW_EXEC: Allow guest to execute instructions in the memory
+ */
+enum gunyah_map_flags {
+ GUNYAH_MEM_DEFAULT_ACCESS = 0,
+ GUNYAH_MEM_FORCE_LEND = 1,
+ GUNYAH_MEM_FORCE_SHARE = 2,
+#define GUNYAH_MEM_ACCESS_MASK 0x7
+
+ GUNYAH_MEM_ALLOW_READ = 1UL << 4,
+ GUNYAH_MEM_ALLOW_WRITE = 1UL << 5,
+ GUNYAH_MEM_ALLOW_EXEC = 1UL << 6,
+ GUNYAH_MEM_ALLOW_RWX =
+ (GUNYAH_MEM_ALLOW_READ | GUNYAH_MEM_ALLOW_WRITE | GUNYAH_MEM_ALLOW_EXEC),
+
+ GUNYAH_MEM_UNMAP = 1UL << 8,
+};
+
+/**
+ * struct gunyah_map_mem_args - Description to provide guest memory into a VM
+ * @guest_addr: Location in guest address space to place the memory
+ * @flags: See &enum gunyah_map_flags.
+ * @guest_mem_fd: File descriptor created by GUNYAH_CREATE_GUEST_MEM
+ * @offset: Offset into the guest memory file
+ */
+struct gunyah_map_mem_args {
+ __u64 guest_addr;
+ __u32 flags;
+ __u32 guest_mem_fd;
+ __u64 offset;
+ __u64 size;
+};
+
+#define GUNYAH_VM_MAP_MEM _IOW(GUNYAH_IOCTL_TYPE, 0x9, struct gunyah_map_mem_args)
+
/*
* ioctls for vCPU fds
*/

--
2.43.0