[PATCH v3 12/15] KVM: MMU: check last spte with unawareness of mapping level

From: Xiao Guangrong
Date: Wed Oct 23 2013 - 09:32:45 EST


The sptes on the middle level should obey these rules:
- they are always writable
- they are not pointing to process's page, so that SPTE_HOST_WRITEABLE has
no chance to be set

So we can check last spte by using PT_WRITABLE_MASK and SPTE_HOST_WRITEABLE
that can be got from spte, then we can let is_last_spte() do not depend on
the mapping level anymore

This is important to implement lockless write-protection since only spte is
available at that time

Signed-off-by: Xiao Guangrong <xiaoguangrong@xxxxxxxxxxxxxxxxxx>
---
arch/x86/kvm/mmu.c | 25 ++++++++++++-------------
arch/x86/kvm/mmu_audit.c | 6 +++---
arch/x86/kvm/paging_tmpl.h | 6 ++----
3 files changed, 17 insertions(+), 20 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 5b42858..8b96d96 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -337,13 +337,13 @@ static int is_rmap_spte(u64 pte)
return is_shadow_present_pte(pte);
}

-static int is_last_spte(u64 pte, int level)
+static int is_last_spte(u64 pte)
{
- if (level == PT_PAGE_TABLE_LEVEL)
- return 1;
- if (is_large_pte(pte))
- return 1;
- return 0;
+ /*
+ * All the sptes on the middle level are writable but
+ * SPTE_HOST_WRITEABLE is not set.
+ */
+ return !(is_writable_pte(pte) && !(pte & SPTE_HOST_WRITEABLE));
}

static pfn_t spte_to_pfn(u64 pte)
@@ -2203,7 +2203,7 @@ static bool shadow_walk_okay(struct kvm_shadow_walk_iterator *iterator)
static void __shadow_walk_next(struct kvm_shadow_walk_iterator *iterator,
u64 spte)
{
- if (is_last_spte(spte, iterator->level)) {
+ if (is_last_spte(spte)) {
iterator->level = 0;
return;
}
@@ -2255,15 +2255,14 @@ static void validate_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep,
}
}

-static bool mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp,
- u64 *spte)
+static bool mmu_page_zap_pte(struct kvm *kvm, u64 *spte)
{
u64 pte;
struct kvm_mmu_page *child;

pte = *spte;
if (is_shadow_present_pte(pte)) {
- if (is_last_spte(pte, sp->role.level)) {
+ if (is_last_spte(pte)) {
drop_spte(kvm, spte);
if (is_large_pte(pte))
--kvm->stat.lpages;
@@ -2286,7 +2285,7 @@ static void kvm_mmu_page_unlink_children(struct kvm *kvm,
unsigned i;

for (i = 0; i < PT64_ENT_PER_PAGE; ++i)
- mmu_page_zap_pte(kvm, sp, sp->spt + i);
+ mmu_page_zap_pte(kvm, sp->spt + i);
}

static void kvm_mmu_put_page(struct kvm_mmu_page *sp, u64 *parent_pte)
@@ -3068,7 +3067,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
}

sp = page_header(__pa(iterator.sptep));
- if (!is_last_spte(spte, sp->role.level))
+ if (!is_last_spte(spte))
goto exit;

/*
@@ -4316,7 +4315,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
local_flush = true;
while (npte--) {
entry = *spte;
- mmu_page_zap_pte(vcpu->kvm, sp, spte);
+ mmu_page_zap_pte(vcpu->kvm, spte);
if (gentry &&
!((sp->role.word ^ vcpu->arch.mmu.base_role.word)
& mask.word) && rmap_can_add(vcpu))
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c
index daff69e..d54e2ad 100644
--- a/arch/x86/kvm/mmu_audit.c
+++ b/arch/x86/kvm/mmu_audit.c
@@ -45,7 +45,7 @@ static void __mmu_spte_walk(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
fn(vcpu, ent + i, level);

if (is_shadow_present_pte(ent[i]) &&
- !is_last_spte(ent[i], level)) {
+ !is_last_spte(ent[i])) {
struct kvm_mmu_page *child;

child = page_header(ent[i] & PT64_BASE_ADDR_MASK);
@@ -110,7 +110,7 @@ static void audit_mappings(struct kvm_vcpu *vcpu, u64 *sptep, int level)
}
}

- if (!is_shadow_present_pte(*sptep) || !is_last_spte(*sptep, level))
+ if (!is_shadow_present_pte(*sptep) || !is_last_spte(*sptep))
return;

gfn = kvm_mmu_page_get_gfn(sp, sptep - sp->spt);
@@ -158,7 +158,7 @@ static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep)

static void audit_sptes_have_rmaps(struct kvm_vcpu *vcpu, u64 *sptep, int level)
{
- if (is_shadow_present_pte(*sptep) && is_last_spte(*sptep, level))
+ if (is_shadow_present_pte(*sptep) && is_last_spte(*sptep))
inspect_spte_has_rmap(vcpu->kvm, sptep);
}

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index ad75d77..33f0216 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -809,7 +809,6 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
{
struct kvm_shadow_walk_iterator iterator;
struct kvm_mmu_page *sp;
- int level;
u64 *sptep;

vcpu_clear_mmio_info(vcpu, gva);
@@ -822,11 +821,10 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)

spin_lock(&vcpu->kvm->mmu_lock);
for_each_shadow_entry(vcpu, gva, iterator) {
- level = iterator.level;
sptep = iterator.sptep;

sp = page_header(__pa(sptep));
- if (is_last_spte(*sptep, level)) {
+ if (is_last_spte(*sptep)) {
pt_element_t gpte;
gpa_t pte_gpa;

@@ -836,7 +834,7 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
pte_gpa = FNAME(get_level1_sp_gpa)(sp);
pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);

- if (mmu_page_zap_pte(vcpu->kvm, sp, sptep))
+ if (mmu_page_zap_pte(vcpu->kvm, sptep))
kvm_flush_remote_tlbs(vcpu->kvm);

if (!rmap_can_add(vcpu))
--
1.8.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/