[Patch v2 3/5] KVM: x86/mmu: Optimize SPTE change for aging gfn range

From: Vipin Sharma
Date: Fri Feb 03 2023 - 14:28:43 EST


No need to check all of the conditions in __handle_changed_spte(). Aging
a gfn range implies resetting access bit or marking spte for access
tracking.

Use atomic operation to only reset those bits. This avoids checking many
conditions in __handle_changed_spte() API. Also, clean up code by
removing dead code and API parameters.

Signed-off-by: Vipin Sharma <vipinsh@xxxxxxxxxx>
---
arch/x86/kvm/mmu/tdp_mmu.c | 68 ++++++++++++++------------------------
1 file changed, 25 insertions(+), 43 deletions(-)

diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 83f15052aa6c..18630a06fa1f 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -697,7 +697,7 @@ static inline int tdp_mmu_zap_spte_atomic(struct kvm *kvm,


/*
- * __tdp_mmu_set_spte - Set a TDP MMU SPTE and handle the associated bookkeeping
+ * _tdp_mmu_set_spte - Set a TDP MMU SPTE and handle the associated bookkeeping
* @kvm: KVM instance
* @as_id: Address space ID, i.e. regular vs. SMM
* @sptep: Pointer to the SPTE
@@ -705,18 +705,12 @@ static inline int tdp_mmu_zap_spte_atomic(struct kvm *kvm,
* @new_spte: The new value that will be set for the SPTE
* @gfn: The base GFN that was (or will be) mapped by the SPTE
* @level: The level _containing_ the SPTE (its parent PT's level)
- * @record_acc_track: Notify the MM subsystem of changes to the accessed state
- * of the page. Should be set unless handling an MMU
- * notifier for access tracking. Leaving record_acc_track
- * unset in that case prevents page accesses from being
- * double counted.
*
* Returns the old SPTE value, which _may_ be different than @old_spte if the
* SPTE had voldatile bits.
*/
-static u64 __tdp_mmu_set_spte(struct kvm *kvm, int as_id, tdp_ptep_t sptep,
- u64 old_spte, u64 new_spte, gfn_t gfn, int level,
- bool record_acc_track)
+static u64 _tdp_mmu_set_spte(struct kvm *kvm, int as_id, tdp_ptep_t sptep,
+ u64 old_spte, u64 new_spte, gfn_t gfn, int level)
{
lockdep_assert_held_write(&kvm->mmu_lock);

@@ -732,37 +726,20 @@ static u64 __tdp_mmu_set_spte(struct kvm *kvm, int as_id, tdp_ptep_t sptep,
old_spte = kvm_tdp_mmu_write_spte(sptep, old_spte, new_spte, level);

__handle_changed_spte(kvm, as_id, gfn, old_spte, new_spte, level, false);
-
- if (record_acc_track)
- handle_changed_spte_acc_track(old_spte, new_spte, level);
-
+ handle_changed_spte_acc_track(old_spte, new_spte, level);
handle_changed_spte_dirty_log(kvm, as_id, gfn, old_spte, new_spte,
level);
return old_spte;
}

-static inline void _tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter,
- u64 new_spte, bool record_acc_track)
-{
- WARN_ON_ONCE(iter->yielded);
-
- iter->old_spte = __tdp_mmu_set_spte(kvm, iter->as_id, iter->sptep,
- iter->old_spte, new_spte,
- iter->gfn, iter->level,
- record_acc_track);
-}
-
static inline void tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter,
u64 new_spte)
{
- _tdp_mmu_set_spte(kvm, iter, new_spte, true);
-}
+ WARN_ON_ONCE(iter->yielded);

-static inline void tdp_mmu_set_spte_no_acc_track(struct kvm *kvm,
- struct tdp_iter *iter,
- u64 new_spte)
-{
- _tdp_mmu_set_spte(kvm, iter, new_spte, false);
+ iter->old_spte = _tdp_mmu_set_spte(kvm, iter->as_id, iter->sptep,
+ iter->old_spte, new_spte,
+ iter->gfn, iter->level);
}

#define tdp_root_for_each_pte(_iter, _root, _start, _end) \
@@ -911,8 +888,8 @@ bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
if (WARN_ON_ONCE(!is_shadow_present_pte(old_spte)))
return false;

- __tdp_mmu_set_spte(kvm, kvm_mmu_page_as_id(sp), sp->ptep, old_spte, 0,
- sp->gfn, sp->role.level + 1, true);
+ _tdp_mmu_set_spte(kvm, kvm_mmu_page_as_id(sp), sp->ptep, old_spte, 0,
+ sp->gfn, sp->role.level + 1);

return true;
}
@@ -1251,32 +1228,37 @@ static __always_inline bool kvm_tdp_mmu_handle_gfn(struct kvm *kvm,
/*
* Mark the SPTEs range of GFNs [start, end) unaccessed and return non-zero
* if any of the GFNs in the range have been accessed.
+ *
+ * No need to mark corresponding PFN as accessed as this call is coming from
+ * MMU notifier for that page via HVA.
*/
static bool age_gfn_range(struct kvm *kvm, struct tdp_iter *iter,
struct kvm_gfn_range *range)
{
- u64 new_spte = 0;
+ u64 new_spte;

/* If we have a non-accessed entry we don't need to change the pte. */
if (!is_accessed_spte(iter->old_spte))
return false;

- new_spte = iter->old_spte;
-
- if (spte_ad_enabled(new_spte)) {
- new_spte &= ~shadow_accessed_mask;
+ if (spte_ad_enabled(iter->old_spte)) {
+ iter->old_spte = kvm_tdp_mmu_clear_spte_bit(iter,
+ shadow_accessed_mask);
+ new_spte = iter->old_spte & ~shadow_accessed_mask;
} else {
+ new_spte = mark_spte_for_access_track(iter->old_spte);
+ iter->old_spte = kvm_tdp_mmu_write_spte(iter->sptep, iter->old_spte,
+ new_spte, iter->level);
/*
* Capture the dirty status of the page, so that it doesn't get
* lost when the SPTE is marked for access tracking.
*/
- if (is_writable_pte(new_spte))
- kvm_set_pfn_dirty(spte_to_pfn(new_spte));
-
- new_spte = mark_spte_for_access_track(new_spte);
+ if (is_writable_pte(iter->old_spte))
+ kvm_set_pfn_dirty(spte_to_pfn(iter->old_spte));
}

- tdp_mmu_set_spte_no_acc_track(kvm, iter, new_spte);
+ trace_kvm_tdp_mmu_spte_changed(iter->as_id, iter->gfn, iter->level,
+ iter->old_spte, new_spte);

return true;
}
--
2.39.1.519.gcb327c4b5f-goog