[RFC PATCH 1/2] KVM: x86: Add a new system attribute for dynamic XSTATE component

From: Chang S. Bae
Date: Tue Aug 23 2022 - 19:23:58 EST


== Background ==

A set of architecture-specific prctl() options offer to control dynamic
XSTATE components in VCPUs. Userspace VMMs may interact with the host using
ARCH_GET_XCOMP_GUEST_PERM and ARCH_REQ_XCOMP_GUEST_PERM.

However, they are separated from the KVM API. KVM may select features that
the host supports and advertise them through the KVM_X86_XCOMP_GUEST_SUPP
attribute.

== Problem ==

QEMU [1] queries the features through the KVM API instead of using the x86
arch_prctl() option. But it still needs to use arch_prctl() to request the
permission. Then this step may become fragile because it does not guarantee
to comply with the KVM policy.

== Solution ==

Introduce a new attribute: KVM_X86_XCOMP_GUEST_PERM, and make it available
via the KVM_GET_DEVICE_ATTR and KVM_SET_DEVICE_ATTR APIs.

The implementation needs to use the established fpu_xstate_prctl()
extension for guest permissions. Export it via a new function
xstate_req_guest_perm() that KVM may use.

[1] https://gitlab.com/qemu-project/qemu/-/commit/19db68ca68a7
Signed-off-by: Chang S. Bae <chang.seok.bae@xxxxxxxxx>
Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx>
Cc: Yang Zhong <yang.zhong@xxxxxxxxx>
Cc: kvm@xxxxxxxxxxxxxxx
Cc: linux-kernel@xxxxxxxxxxxxxxx
---
arch/x86/include/asm/fpu/api.h | 1 +
arch/x86/include/uapi/asm/kvm.h | 1 +
arch/x86/kernel/fpu/xstate.c | 6 ++++++
arch/x86/kvm/x86.c | 31 +++++++++++++++++++++++++++++++
4 files changed, 39 insertions(+)

diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
index 503a577814b2..e4670d56b695 100644
--- a/arch/x86/include/asm/fpu/api.h
+++ b/arch/x86/include/asm/fpu/api.h
@@ -133,6 +133,7 @@ static inline void fpstate_free(struct fpu *fpu) { }
extern void fpstate_clear_xstate_component(struct fpstate *fps, unsigned int xfeature);

extern u64 xstate_get_guest_group_perm(void);
+extern int xstate_req_guest_perm(unsigned long idx);

/* KVM specific functions */
extern bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu);
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 46de10a809ec..6ab9a2b38061 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -461,6 +461,7 @@ struct kvm_sync_regs {

/* attributes for system fd (group 0) */
#define KVM_X86_XCOMP_GUEST_SUPP 0
+#define KVM_X86_XCOMP_GUEST_PERM 1

struct kvm_vmx_nested_state_data {
__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index c8340156bfd2..ac365cb96304 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1687,6 +1687,12 @@ u64 xstate_get_guest_group_perm(void)
}
EXPORT_SYMBOL_GPL(xstate_get_guest_group_perm);

+int xstate_req_guest_perm(unsigned long idx)
+{
+ return xstate_request_perm(idx, true);
+}
+EXPORT_SYMBOL_GPL(xstate_req_guest_perm);
+
/**
* fpu_xstate_prctl - xstate permission operations
* @tsk: Redundant pointer to current
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 205ebdc2b11b..f4a1e94117d6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4514,12 +4514,34 @@ static int kvm_x86_dev_get_attr(struct kvm_device_attr *attr)
if (put_user(kvm_caps.supported_xcr0, uaddr))
return -EFAULT;
return 0;
+ case KVM_X86_XCOMP_GUEST_PERM: {
+ u64 permitted = xstate_get_guest_group_perm() & kvm_caps.supported_xcr0;
+
+ return put_user(permitted, uaddr);
+ }
default:
return -ENXIO;
break;
}
}

+static int kvm_x86_dev_set_attr(struct kvm_device_attr *attr)
+{
+ unsigned long idx = (unsigned long) kvm_get_attr_addr(attr);
+
+ if (attr->group)
+ return -ENXIO;
+
+ switch (attr->attr) {
+ case KVM_X86_XCOMP_GUEST_PERM:
+ if (!((1ULL << idx) & kvm_caps.supported_xcr0))
+ return -EOPNOTSUPP;
+ return xstate_req_guest_perm(idx);
+ default:
+ return -ENXIO;
+ }
+}
+
static int kvm_x86_dev_has_attr(struct kvm_device_attr *attr)
{
if (attr->group)
@@ -4629,6 +4651,15 @@ long kvm_arch_dev_ioctl(struct file *filp,
r = kvm_x86_dev_get_attr(&attr);
break;
}
+ case KVM_SET_DEVICE_ATTR: {
+ struct kvm_device_attr attr;
+
+ r = -EFAULT;
+ if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
+ break;
+ r = kvm_x86_dev_set_attr(&attr);
+ break;
+ }
case KVM_HAS_DEVICE_ATTR: {
struct kvm_device_attr attr;
r = -EFAULT;
--
2.17.1