[PATCH] KVM: arm64: Only save S1PIE registers when dirty

From: Mark Brown
Date: Fri Mar 01 2024 - 13:24:52 EST


Currently we save the S1PIE registers every time we exit the guest but
the expected usage pattern for these registers is that they will be
written to very infrequently, likely once during initialisation and then
never updated again. This means that most likely most of our saves of
these registers are redundant. Let's avoid these redundant saves by
enabling fine grained write traps for the EL0 and EL1 PIE registers when
switching to the guest and only saving if a write happened.

We track if the registers have been written by storing a mask of bits
for HFGWTR_EL2, we may be able to use the same approach for other
registers with similar access patterns. We assume that it is likely
that both registers will be written in quick succession and mark both
PIR_EL1 and PIRE0_EL1 as dirty if either is written in order to minimise
overhead.

This will have a negative performance impact if guests do start updating
these registers frequently but since the PIE indexes have a wide impact
on the page tables it seems likely that this will not be the case.

We do not need to check for FGT support since it is mandatory for
systems with PIE.

Signed-off-by: Mark Brown <broonie@xxxxxxxxxx>
---
I don't have a good sense if this is a good idea or not, or if this is a
desirable implementation of the concept - the patch is based on some
concerns about the cost of the system register context switching. We
should be able to do something similar for some of the other registers.
---
arch/arm64/include/asm/kvm_host.h | 37 ++++++++++++++++++++++++++++++
arch/arm64/kvm/hyp/include/hyp/switch.h | 36 +++++++++++++++++++++++++++++
arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h | 6 ++++-
3 files changed, 78 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 21c57b812569..340567e9b206 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -475,6 +475,8 @@ enum vcpu_sysreg {
};

struct kvm_cpu_context {
+ u64 reg_dirty; /* Mask of HFGWTR_EL2 bits */
+
struct user_pt_regs regs; /* sp = sp_el0 */

u64 spsr_abt;
@@ -492,6 +494,32 @@ struct kvm_cpu_context {
u64 *vncr_array;
};

+static inline bool __ctxt_reg_dirty(const struct kvm_cpu_context *ctxt,
+ u64 trap)
+{
+ return ctxt->reg_dirty & trap;
+}
+
+static inline bool __ctxt_reg_set_dirty(struct kvm_cpu_context *ctxt, u64 trap)
+{
+ return ctxt->reg_dirty |= trap;
+}
+
+static inline bool __ctxt_reg_set_clean(struct kvm_cpu_context *ctxt, u64 trap)
+{
+ return ctxt->reg_dirty &= ~trap;
+}
+
+#define ctxt_reg_dirty(ctxt, trap) \
+ __ctxt_reg_dirty(ctxt, HFGxTR_EL2_##trap)
+
+
+#define ctxt_reg_set_dirty(ctxt, trap) \
+ __ctxt_reg_set_dirty(ctxt, HFGxTR_EL2_##trap)
+
+#define ctxt_reg_set_clean(ctxt, trap) \
+ __ctxt_reg_set_clean(ctxt, HFGxTR_EL2_##trap)
+
struct kvm_host_data {
struct kvm_cpu_context host_ctxt;
};
@@ -1118,6 +1146,15 @@ static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt)
{
/* The host's MPIDR is immutable, so let's set it up at boot time */
ctxt_sys_reg(cpu_ctxt, MPIDR_EL1) = read_cpuid_mpidr();
+
+ /*
+ * Save the S1PIE setup on first use, we assume the host will
+ * never update.
+ */
+ if (cpus_have_final_cap(ARM64_HAS_S1PIE)) {
+ ctxt_reg_set_dirty(cpu_ctxt, nPIR_EL1);
+ ctxt_reg_set_dirty(cpu_ctxt, nPIRE0_EL1);
+ }
}

static inline bool kvm_system_needs_idmapped_vectors(void)
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index a038320cdb08..2cccc7f2b492 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -152,6 +152,12 @@ static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu)
if (cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38))
w_set |= HFGxTR_EL2_TCR_EL1_MASK;

+ /*
+ * Trap writes to infrequently updated registers.
+ */
+ if (cpus_have_final_cap(ARM64_HAS_S1PIE))
+ w_clr |= HFGxTR_EL2_nPIR_EL1 | HFGxTR_EL2_nPIRE0_EL1;
+
if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) {
compute_clr_set(vcpu, HFGRTR_EL2, r_clr, r_set);
compute_clr_set(vcpu, HFGWTR_EL2, w_clr, w_set);
@@ -570,6 +576,33 @@ static bool handle_ampere1_tcr(struct kvm_vcpu *vcpu)
return true;
}

+static inline bool kvm_hyp_handle_read_mostly_sysreg(struct kvm_vcpu *vcpu)
+{
+ u32 sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_esr(vcpu));
+ u64 fgt_w_set;
+
+ switch (sysreg) {
+ case SYS_PIR_EL1:
+ case SYS_PIRE0_EL1:
+ /*
+ * PIR registers are always restored, we just need to
+ * disable write traps. We expect EL0 and EL1
+ * controls to be updated close together so enable
+ * both.
+ */
+ if (!cpus_have_final_cap(ARM64_HAS_S1PIE))
+ return false;
+ fgt_w_set = HFGxTR_EL2_nPIRE0_EL1 | HFGxTR_EL2_nPIR_EL1;
+ break;
+ default:
+ return false;
+ }
+
+ sysreg_clear_set_s(SYS_HFGWTR_EL2, 0, fgt_w_set);
+ vcpu->arch.ctxt.reg_dirty |= fgt_w_set;
+ return true;
+}
+
static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code)
{
if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) &&
@@ -590,6 +623,9 @@ static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code)
if (kvm_hyp_handle_cntpct(vcpu))
return true;

+ if (kvm_hyp_handle_read_mostly_sysreg(vcpu))
+ return true;
+
return false;
}

diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
index bb6b571ec627..f60d27288b2a 100644
--- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
+++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
@@ -55,9 +55,13 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
ctxt_sys_reg(ctxt, CONTEXTIDR_EL1) = read_sysreg_el1(SYS_CONTEXTIDR);
ctxt_sys_reg(ctxt, AMAIR_EL1) = read_sysreg_el1(SYS_AMAIR);
ctxt_sys_reg(ctxt, CNTKCTL_EL1) = read_sysreg_el1(SYS_CNTKCTL);
- if (cpus_have_final_cap(ARM64_HAS_S1PIE)) {
+ if (ctxt_reg_dirty(ctxt, nPIR_EL1)) {
ctxt_sys_reg(ctxt, PIR_EL1) = read_sysreg_el1(SYS_PIR);
+ ctxt_reg_set_clean(ctxt, nPIR_EL1);
+ }
+ if (ctxt_reg_dirty(ctxt, nPIRE0_EL1)) {
ctxt_sys_reg(ctxt, PIRE0_EL1) = read_sysreg_el1(SYS_PIRE0);
+ ctxt_reg_set_clean(ctxt, nPIRE0_EL1);
}
ctxt_sys_reg(ctxt, PAR_EL1) = read_sysreg_par();
ctxt_sys_reg(ctxt, TPIDR_EL1) = read_sysreg(tpidr_el1);

---
base-commit: 54be6c6c5ae8e0d93a6c4641cb7528eb0b6ba478
change-id: 20240227-kvm-arm64-defer-regs-d29ae460d0b3

Best regards,
--
Mark Brown <broonie@xxxxxxxxxx>