[PATCH v2 2/5] mmu: don't set the present bit unconditionally

From: Bandan Das
Date: Tue Jul 12 2016 - 18:19:55 EST


To support execute only mappings on behalf of L1
hypervisors, we teach set_spte() to honor L1's valid XWR
bits. This is only if host supports EPT execute only. Reuse
ACC_USER_MASK to signify if the L1 hypervisor has the R bit
set. Add a new variable "shadow_present_mask" that is
set for non EPT cases and preserves the existing behavior
for those cases.

Signed-off-by: Bandan Das <bsd@xxxxxxxxxx>
---
arch/x86/include/asm/kvm_host.h | 2 +-
arch/x86/kvm/mmu.c | 15 ++++++++++++---
arch/x86/kvm/paging_tmpl.h | 8 +++++++-
arch/x86/kvm/vmx.c | 7 +++++--
arch/x86/kvm/x86.c | 4 ++--
5 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 69e62862..c0acc66 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1026,7 +1026,7 @@ void kvm_mmu_setup(struct kvm_vcpu *vcpu);
void kvm_mmu_init_vm(struct kvm *kvm);
void kvm_mmu_uninit_vm(struct kvm *kvm);
void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
- u64 dirty_mask, u64 nx_mask, u64 x_mask);
+ u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask);

void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 87b62dc..ae80aa4 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -175,6 +175,7 @@ static u64 __read_mostly shadow_user_mask;
static u64 __read_mostly shadow_accessed_mask;
static u64 __read_mostly shadow_dirty_mask;
static u64 __read_mostly shadow_mmio_mask;
+static u64 __read_mostly shadow_present_mask;

static void mmu_spte_set(u64 *sptep, u64 spte);
static void mmu_free_roots(struct kvm_vcpu *vcpu);
@@ -282,13 +283,14 @@ static bool check_mmio_spte(struct kvm_vcpu *vcpu, u64 spte)
}

void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
- u64 dirty_mask, u64 nx_mask, u64 x_mask)
+ u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask)
{
shadow_user_mask = user_mask;
shadow_accessed_mask = accessed_mask;
shadow_dirty_mask = dirty_mask;
shadow_nx_mask = nx_mask;
shadow_x_mask = x_mask;
+ shadow_present_mask = p_mask;
}
EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);

@@ -2515,13 +2517,20 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
gfn_t gfn, kvm_pfn_t pfn, bool speculative,
bool can_unsync, bool host_writable)
{
- u64 spte;
+ u64 spte = 0;
int ret = 0;

if (set_mmio_spte(vcpu, sptep, gfn, pfn, pte_access))
return 0;

- spte = PT_PRESENT_MASK;
+ /*
+ * In the non-EPT case, execonly is not valid and so
+ * the following line is equivalent to spte |= PT_PRESENT_MASK.
+ * For the EPT case, shadow_present_mask is 0 if hardware
+ * supports it and we honor whatever way the guest set it.
+ * See: FNAME(gpte_access) in paging_tmpl.h
+ */
+ spte |= shadow_present_mask;
if (!speculative)
spte |= shadow_accessed_mask;

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index bc019f7..f2741db 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -181,13 +181,19 @@ no_present:
return true;
}

+/*
+ * For PTTYPE_EPT, a page table can be executable but not readable
+ * on supported processors. Therefore, set_spte does not automatically
+ * set bit 0 if execute only is supported. Here, we repurpose ACC_USER_MASK
+ * to signify readability since it isn't used in the EPT case
+ */
static inline unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, u64 gpte)
{
unsigned access;
#if PTTYPE == PTTYPE_EPT
access = ((gpte & VMX_EPT_WRITABLE_MASK) ? ACC_WRITE_MASK : 0) |
((gpte & VMX_EPT_EXECUTABLE_MASK) ? ACC_EXEC_MASK : 0) |
- ACC_USER_MASK;
+ ((gpte & VMX_EPT_READABLE_MASK) ? ACC_USER_MASK : 0);
#else
BUILD_BUG_ON(ACC_EXEC_MASK != PT_PRESENT_MASK);
BUILD_BUG_ON(ACC_EXEC_MASK != 1);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 64a79f2..f73b5dc 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6366,10 +6366,13 @@ static __init int hardware_setup(void)
vmx_disable_intercept_msr_write_x2apic(0x83f);

if (enable_ept) {
- kvm_mmu_set_mask_ptes(0ull,
+ kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK,
(enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
(enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull,
- 0ull, VMX_EPT_EXECUTABLE_MASK);
+ 0ull, VMX_EPT_EXECUTABLE_MASK,
+ cpu_has_vmx_ept_execute_only() ?
+ 0ull : PT_PRESENT_MASK);
+ BUILD_BUG_ON(PT_PRESENT_MASK != VMX_EPT_READABLE_MASK);
ept_set_mmio_spte_mask();
kvm_enable_tdp();
} else
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7da5dd2..b15f214 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5867,8 +5867,8 @@ int kvm_arch_init(void *opaque)
kvm_x86_ops = ops;

kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
- PT_DIRTY_MASK, PT64_NX_MASK, 0);
-
+ PT_DIRTY_MASK, PT64_NX_MASK, 0,
+ PT_PRESENT_MASK);
kvm_timer_init();

perf_register_guest_info_callbacks(&kvm_guest_cbs);
--
2.5.5