[PATCH 3/4] KVM: add KVM_CREATE_VM2 system ioctl

From: Radim KrÄmÃÅ
Date: Thu Apr 13 2017 - 16:20:36 EST


This patch allows userspace to tell how many VCPUs it is going to use,
which can save memory when allocating the kvm->vcpus array. This will
be done with a new KVM_CREATE_VM2 IOCTL.

An alternative would be to redo kvm->vcpus as a list or protect the
array with RCU. RCU is slower and a list is not even practical as
kvm->vcpus are being used for index-based accesses.

We could have an IOCTL that is called in between KVM_CREATE_VM and first
KVM_CREATE_VCPU and sets the size of the vcpus array, but we'd be making
one useless allocation. Knowing the desired number of VCPUs from the
beginning is seems best for now.

This patch also prepares generic code for architectures that will set
KVM_CONFIGURABLE_MAX_VCPUS to a non-zero value.

A disputable decision is that KVM_CREATE_VM2 actually works even if
KVM_CAP_CONFIGURABLE_MAX_VCPUS is 0, but uses that capability for its
detection.

Signed-off-by: Radim KrÄmÃÅ <rkrcmar@xxxxxxxxxx>
---
Documentation/virtual/kvm/api.txt | 28 ++++++++++++++++++++++++
include/linux/kvm_host.h | 3 +++
include/uapi/linux/kvm.h | 8 +++++++
virt/kvm/kvm_main.c | 45 +++++++++++++++++++++++++++++++++------
4 files changed, 77 insertions(+), 7 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index e60be91d8036..461130adbdc7 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3259,6 +3259,34 @@ Otherwise, if the MCE is a corrected error, KVM will just
store it in the corresponding bank (provided this bank is
not holding a previously reported uncorrected error).

+
+4.107 KVM_CREATE_VM2
+
+Capability: KVM_CAP_CONFIGURABLE_MAX_VCPUS
+Architectures: all
+Type: system ioctl
+Parameters: struct kvm_vm_config
+Returns: a VM fd that can be used to control the new virtual machine,
+ -E2BIG if the value of max_vcpus is not supported
+
+This is an extension of KVM_CREATE_VM that allows the user to pass more
+information through
+
+struct kvm_vm_config {
+ __u64 type;
+ __u32 max_vcpus;
+ __u8 reserved[52];
+};
+
+type is the argument to KVM_CREATE_VM
+
+max_vcpus is the desired maximal number of VCPUs, it must not exceed the value
+returned by KVM_CAP_CONFIGURABLE_MAX_VCPUS. Value of 0 treated as if userspace
+passed the value returned by KVM_CAP_MAX_VCPU instead.
+
+reserved is must be 0.
+
+
5. The kvm_run structure
------------------------

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 6ba7bc831094..b875c0997328 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -39,6 +39,9 @@
#ifndef KVM_MAX_VCPU_ID
#define KVM_MAX_VCPU_ID KVM_MAX_VCPUS
#endif
+#ifndef KVM_CONFIGURABLE_MAX_VCPUS
+#define KVM_CONFIGURABLE_MAX_VCPUS 0U
+#endif

/*
* The bit 16 ~ bit 31 of kvm_memory_region::flags are internally used
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 6180ea50e9ef..8349c73b3517 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -83,6 +83,12 @@ struct kvm_debug_guest {

/* *** End of deprecated interfaces *** */

+/* for KVM_CREATE_VM2 */
+struct kvm_vm_config {
+ __u64 type;
+ __u32 max_vcpus;
+ __u8 reserved[52];
+};

/* for KVM_CREATE_MEMORY_REGION */
struct kvm_memory_region {
@@ -713,6 +719,7 @@ struct kvm_ppc_resize_hpt {
*/
#define KVM_GET_API_VERSION _IO(KVMIO, 0x00)
#define KVM_CREATE_VM _IO(KVMIO, 0x01) /* returns a VM fd */
+#define KVM_CREATE_VM2 _IOR(KVMIO, 0x01, struct kvm_vm_config)
#define KVM_GET_MSR_INDEX_LIST _IOWR(KVMIO, 0x02, struct kvm_msr_list)

#define KVM_S390_ENABLE_SIE _IO(KVMIO, 0x06)
@@ -892,6 +899,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_MIPS_64BIT 139
#define KVM_CAP_S390_GS 140
#define KVM_CAP_S390_AIS 141
+#define KVM_CAP_CONFIGURABLE_MAX_VCPUS 142

#ifdef KVM_CAP_IRQ_ROUTING

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 0f1579f118b4..9ef52fa006ec 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -629,11 +629,20 @@ static inline void kvm_free_vm(struct kvm *kvm)
kfree(kvm);
}

-static struct kvm *kvm_create_vm(unsigned long type)
+static struct kvm *kvm_create_vm(struct kvm_vm_config *vm_config)
{
int r, i;
- struct kvm *kvm = kvm_alloc_vm(KVM_MAX_VCPUS);
+ struct kvm *kvm;

+ if (!KVM_CONFIGURABLE_MAX_VCPUS && vm_config->max_vcpus)
+ return ERR_PTR(-EINVAL);
+ if (vm_config->max_vcpus > KVM_CONFIGURABLE_MAX_VCPUS)
+ return ERR_PTR(-E2BIG);
+
+ if (!vm_config->max_vcpus)
+ vm_config->max_vcpus = KVM_MAX_VCPUS;
+
+ kvm = kvm_alloc_vm(vm_config->max_vcpus);
if (!kvm)
return ERR_PTR(-ENOMEM);

@@ -647,7 +656,7 @@ static struct kvm *kvm_create_vm(unsigned long type)
refcount_set(&kvm->users_count, 1);
INIT_LIST_HEAD(&kvm->devices);

- r = kvm_arch_init_vm(kvm, type);
+ r = kvm_arch_init_vm(kvm, vm_config->type);
if (r)
goto out_err_no_disable;

@@ -2957,6 +2966,8 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
#endif
case KVM_CAP_MAX_VCPU_ID:
return KVM_MAX_VCPU_ID;
+ case KVM_CAP_CONFIGURABLE_MAX_VCPUS:
+ return KVM_CONFIGURABLE_MAX_VCPUS;
default:
break;
}
@@ -3182,13 +3193,13 @@ static struct file_operations kvm_vm_fops = {
.llseek = noop_llseek,
};

-static int kvm_dev_ioctl_create_vm(unsigned long type)
+static int kvm_dev_ioctl_create_vm(struct kvm_vm_config *vm_config)
{
int r;
struct kvm *kvm;
struct file *file;

- kvm = kvm_create_vm(type);
+ kvm = kvm_create_vm(vm_config);
if (IS_ERR(kvm))
return PTR_ERR(kvm);
#ifdef CONFIG_KVM_MMIO
@@ -3223,6 +3234,7 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
static long kvm_dev_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
+ void __user *argp = (void __user *)arg;
long r = -EINVAL;

switch (ioctl) {
@@ -3231,9 +3243,28 @@ static long kvm_dev_ioctl(struct file *filp,
goto out;
r = KVM_API_VERSION;
break;
- case KVM_CREATE_VM:
- r = kvm_dev_ioctl_create_vm(arg);
+ case KVM_CREATE_VM: {
+ struct kvm_vm_config vm_config = {.type = arg};
+
+ r = kvm_dev_ioctl_create_vm(&vm_config);
break;
+ }
+ case KVM_CREATE_VM2: {
+ struct kvm_vm_config vm_config, check_reserved = {};
+
+ r = -EFAULT;
+ if (copy_from_user(&vm_config, argp, sizeof vm_config))
+ goto out;
+
+ r = -EINVAL;
+ check_reserved.type = vm_config.type;
+ check_reserved.max_vcpus = vm_config.max_vcpus;
+ if (memcmp(&vm_config, &check_reserved, sizeof check_reserved))
+ goto out;
+
+ r = kvm_dev_ioctl_create_vm(&vm_config);
+ break;
+ }
case KVM_CHECK_EXTENSION:
r = kvm_vm_ioctl_check_extension_generic(NULL, arg);
break;
--
2.12.0