[PATCH RFC 4/4] VMX: Expose the LA57 feature to VM

From: Liang Li
Date: Thu Dec 29 2016 - 04:33:25 EST


This patch exposes 5 level page table feature to the VM,
at the same time, the canonical virtual address checking is
extended to support both 48-bits and 57-bits address width,
it's the prerequisite to support 5 level paging guest.

Signed-off-by: Liang Li <liang.z.li@xxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx>
Cc: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx>
Cc: Xiao Guangrong <guangrong.xiao@xxxxxxxxxxxxxxx>
Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx>
Cc: "Radim Krčmář" <rkrcmar@xxxxxxxxxx>
---
arch/x86/include/asm/kvm_host.h | 12 ++++++------
arch/x86/kvm/cpuid.c | 15 +++++++++------
arch/x86/kvm/emulate.c | 15 ++++++++++-----
arch/x86/kvm/kvm_cache_regs.h | 7 ++++++-
arch/x86/kvm/vmx.c | 4 ++--
arch/x86/kvm/x86.c | 8 ++++++--
6 files changed, 39 insertions(+), 22 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index e505dac..57850b3 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -86,8 +86,8 @@
| X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \
| X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \
| X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
- | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE | X86_CR4_SMAP \
- | X86_CR4_PKE))
+ | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \
+ | X86_CR4_SMAP | X86_CR4_PKE))

#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)

@@ -1269,15 +1269,15 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
}

-static inline u64 get_canonical(u64 la)
+static inline u64 get_canonical(u64 la, u8 vaddr_bits)
{
- return ((int64_t)la << 16) >> 16;
+ return ((int64_t)la << (64 - vaddr_bits)) >> (64 - vaddr_bits);
}

-static inline bool is_noncanonical_address(u64 la)
+static inline bool is_noncanonical_address(u64 la, u8 vaddr_bits)
{
#ifdef CONFIG_X86_64
- return get_canonical(la) != la;
+ return get_canonical(la, vaddr_bits) != la;
#else
return false;
#endif
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index e85f6bd..69e8c1a 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -126,13 +126,16 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
kvm_x86_ops->fpu_activate(vcpu);

/*
- * The existing code assumes virtual address is 48-bit in the canonical
- * address checks; exit if it is ever changed.
+ * The existing code assumes virtual address is 48-bit or 57-bit in the
+ * canonical address checks; exit if it is ever changed.
*/
best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
- if (best && ((best->eax & 0xff00) >> 8) != 48 &&
- ((best->eax & 0xff00) >> 8) != 0)
- return -EINVAL;
+ if (best) {
+ int vaddr_bits = (best->eax & 0xff00) >> 8;
+
+ if (vaddr_bits != 48 && vaddr_bits != 57 && vaddr_bits != 0)
+ return -EINVAL;
+ }

/* Update physical-address width */
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
@@ -383,7 +386,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,

/* cpuid 7.0.ecx*/
const u32 kvm_cpuid_7_0_ecx_x86_features =
- F(AVX512VBMI) | F(PKU) | 0 /*OSPKE*/;
+ F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/;

/* cpuid 7.0.edx*/
const u32 kvm_cpuid_7_0_edx_x86_features =
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 56628a4..da01dd7 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -676,6 +676,11 @@ static unsigned insn_alignment(struct x86_emulate_ctxt *ctxt, unsigned size)
}
}

+static __always_inline u8 virt_addr_bits(struct x86_emulate_ctxt *ctxt)
+{
+ return (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_LA57) ? 57 : 48;
+}
+
static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
struct segmented_address addr,
unsigned *max_size, unsigned size,
@@ -693,7 +698,7 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
switch (mode) {
case X86EMUL_MODE_PROT64:
*linear = la;
- if (is_noncanonical_address(la))
+ if (is_noncanonical_address(la, virt_addr_bits(ctxt)))
goto bad;

*max_size = min_t(u64, ~0u, (1ull << 48) - la);
@@ -1721,7 +1726,7 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
if (ret != X86EMUL_CONTINUE)
return ret;
if (is_noncanonical_address(get_desc_base(&seg_desc) |
- ((u64)base3 << 32)))
+ ((u64)base3 << 32), virt_addr_bits(ctxt)))
return emulate_gp(ctxt, 0);
}
load:
@@ -2796,8 +2801,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
ss_sel = cs_sel + 8;
cs.d = 0;
cs.l = 1;
- if (is_noncanonical_address(rcx) ||
- is_noncanonical_address(rdx))
+ if (is_noncanonical_address(rcx, virt_addr_bits(ctxt)) ||
+ is_noncanonical_address(rdx, virt_addr_bits(ctxt)))
return emulate_gp(ctxt, 0);
break;
}
@@ -3712,7 +3717,7 @@ static int em_lgdt_lidt(struct x86_emulate_ctxt *ctxt, bool lgdt)
if (rc != X86EMUL_CONTINUE)
return rc;
if (ctxt->mode == X86EMUL_MODE_PROT64 &&
- is_noncanonical_address(desc_ptr.address))
+ is_noncanonical_address(desc_ptr.address, virt_addr_bits(ctxt)))
return emulate_gp(ctxt, 0);
if (lgdt)
ctxt->ops->set_gdt(ctxt, &desc_ptr);
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index 762cdf2..5daf75f 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -4,7 +4,7 @@
#define KVM_POSSIBLE_CR0_GUEST_BITS X86_CR0_TS
#define KVM_POSSIBLE_CR4_GUEST_BITS \
(X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \
- | X86_CR4_OSXMMEXCPT | X86_CR4_PGE)
+ | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_PGE)

static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu,
enum kvm_reg reg)
@@ -78,6 +78,11 @@ static inline ulong kvm_read_cr4(struct kvm_vcpu *vcpu)
return kvm_read_cr4_bits(vcpu, ~0UL);
}

+static inline u8 get_virt_addr_bits(struct kvm_vcpu *vcpu)
+{
+ return kvm_read_cr4_bits(vcpu, X86_CR4_LA57) ? 57 : 48;
+}
+
static inline u64 kvm_read_edx_eax(struct kvm_vcpu *vcpu)
{
return (kvm_register_read(vcpu, VCPU_REGS_RAX) & -1u)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index bfc9f0a..183a53e 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -123,7 +123,7 @@
(KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
#define KVM_CR4_GUEST_OWNED_BITS \
(X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \
- | X86_CR4_OSXMMEXCPT | X86_CR4_TSD)
+ | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_TSD)

#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
@@ -7017,7 +7017,7 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu,
* non-canonical form. This is the only check on the memory
* destination for long mode!
*/
- exn = is_noncanonical_address(*ret);
+ exn = is_noncanonical_address(*ret, get_virt_addr_bits(vcpu));
} else if (is_protmode(vcpu)) {
/* Protected mode: apply checks for segment validity in the
* following order:
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 51ccfe0..b935658 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -762,6 +762,9 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
if (!guest_cpuid_has_pku(vcpu) && (cr4 & X86_CR4_PKE))
return 1;

+ if (!guest_cpuid_has_la57(vcpu) && (cr4 & X86_CR4_LA57))
+ return 1;
+
if (is_long_mode(vcpu)) {
if (!(cr4 & X86_CR4_PAE))
return 1;
@@ -1074,7 +1077,8 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
case MSR_KERNEL_GS_BASE:
case MSR_CSTAR:
case MSR_LSTAR:
- if (is_noncanonical_address(msr->data))
+ if (is_noncanonical_address(msr->data,
+ get_virt_addr_bits(vcpu)))
return 1;
break;
case MSR_IA32_SYSENTER_EIP:
@@ -1091,7 +1095,7 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
* value, and that something deterministic happens if the guest
* invokes 64-bit SYSENTER.
*/
- msr->data = get_canonical(msr->data);
+ msr->data = get_canonical(msr->data, get_virt_addr_bits(vcpu));
}
return kvm_x86_ops->set_msr(vcpu, msr);
}
--
1.9.1