Re: [PATCH v3 kvm/queue 04/16] KVM: Extend the memslot to support fd-based private memory

From: Sean Christopherson
Date: Thu Dec 23 2021 - 12:35:44 EST


On Thu, Dec 23, 2021, Chao Peng wrote:
> Extend the memslot definition to provide fd-based private memory support
> by adding two new fields(fd/ofs). The memslot then can maintain memory
> for both shared and private pages in a single memslot. Shared pages are
> provided in the existing way by using userspace_addr(hva) field and
> get_user_pages() while private pages are provided through the new
> fields(fd/ofs). Since there is no 'hva' concept anymore for private
> memory we cannot call get_user_pages() to get a pfn, instead we rely on
> the newly introduced MEMFD_OPS callbacks to do the same job.
>
> This new extension is indicated by a new flag KVM_MEM_PRIVATE.
>
> Signed-off-by: Yu Zhang <yu.c.zhang@xxxxxxxxxxxxxxx>
> Signed-off-by: Chao Peng <chao.p.peng@xxxxxxxxxxxxxxx>
> ---
> include/linux/kvm_host.h | 10 ++++++++++
> include/uapi/linux/kvm.h | 12 ++++++++++++
> 2 files changed, 22 insertions(+)
>
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index f8ed799e8674..2cd35560c44b 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -460,8 +460,18 @@ struct kvm_memory_slot {
> u32 flags;
> short id;
> u16 as_id;
> + u32 fd;

There should be no need to store the fd in the memslot, the fd should be unneeded
outside of __kvm_set_memory_region(), e.g.

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 1caebded52c4..4e43262887a3 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2029,10 +2029,10 @@ int __kvm_set_memory_region(struct kvm *kvm,
new->npages = npages;
new->flags = mem->flags;
new->userspace_addr = mem->userspace_addr;
- new->fd = mem->fd;
- new->file = NULL;
- new->ofs = mem->ofs;
-
+ if (mem->flags & KVM_MEM_PRIVATE) {
+ new->private_file = fget(mem->private_fd);
+ new->private_offset = mem->private_offset;
+ }
r = kvm_set_memslot(kvm, old, new, change);
if (r)
kfree(new);

> + struct file *file;

Please use more descriptive names, shaving characters is not at all priority.

> + u64 ofs;

I believe this should be loff_t.

struct file *private_file;
struct loff_t private_offset;

> };
>
> +static inline bool kvm_slot_is_private(const struct kvm_memory_slot *slot)
> +{
> + if (slot && (slot->flags & KVM_MEM_PRIVATE))
> + return true;
> + return false;

return slot && (slot->flags & KVM_MEM_PRIVATE);

> +}
> +
> static inline bool kvm_slot_dirty_track_enabled(const struct kvm_memory_slot *slot)
> {
> return slot->flags & KVM_MEM_LOG_DIRTY_PAGES;
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 1daa45268de2..41434322fa23 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -103,6 +103,17 @@ struct kvm_userspace_memory_region {
> __u64 userspace_addr; /* start of the userspace allocated memory */
> };
>
> +struct kvm_userspace_memory_region_ext {
> + __u32 slot;
> + __u32 flags;
> + __u64 guest_phys_addr;
> + __u64 memory_size; /* bytes */
> + __u64 userspace_addr; /* hva */

Would it make sense to embed "struct kvm_userspace_memory_region"?

> + __u64 ofs; /* offset into fd */
> + __u32 fd;

Again, use descriptive names, then comments like "offset into fd" are unnecessary.

__u64 private_offset;
__u32 private_fd;

> + __u32 padding[5];
> +};
> +
> /*
> * The bit 0 ~ bit 15 of kvm_memory_region::flags are visible for userspace,
> * other bits are reserved for kvm internal use which are defined in
> @@ -110,6 +121,7 @@ struct kvm_userspace_memory_region {
> */
> #define KVM_MEM_LOG_DIRTY_PAGES (1UL << 0)
> #define KVM_MEM_READONLY (1UL << 1)
> +#define KVM_MEM_PRIVATE (1UL << 2)
>
> /* for KVM_IRQ_LINE */
> struct kvm_irq_level {
> --
> 2.17.1
>