Re: [PATCH 2/2] KVM: arm64: timers: Adjust CVAL of a ptimer across guest entry and exits

From: Marc Zyngier
Date: Tue Aug 22 2023 - 08:16:43 EST


On Thu, 17 Aug 2023 10:27:55 +0100,
Ganapatrao Kulkarni <gankulkarni@xxxxxxxxxxxxxxxxxxxxxx> wrote:
>
>
> Hi Marc,
>
> On 17-08-2023 01:57 pm, Marc Zyngier wrote:
> > [Fixing Christoffer's email address]
>
> Thanks.
> >
> > On Thu, 17 Aug 2023 07:03:14 +0100,
> > Ganapatrao Kulkarni <gankulkarni@xxxxxxxxxxxxxxxxxxxxxx> wrote:
> >>
> >> As per FEAT_ECV, when HCR_EL2.{E2H, TGE} == {1, 1}, Enhanced Counter
> >> Virtualization functionality is disabled and CNTPOFF_EL2 value is treated
> >> as zero. On VHE host, E2H and TGE are set, hence it is required
> >> to adjust CVAL by incrementing it by CNTPOFF_EL2 after guest
> >> exit to avoid false physical timer interrupts and also
> >> decrement/restore CVAL before the guest entry.
> >
> > No, this is wrong. Neither E2H nor TGE have any impact on writing to
> > CNTPOFF_EL2, nor does it have an impact on CNTP_CVAL_EL0. Just read
> > the pseudocode to convince yourself.
> >
> > CNTPOFF_EL2 is applied at exactly two points: when SW is reading
> > CNTPCT_EL0 from EL1 while {E2H,TGE}=={1, 0} and when the HW is
> > comparing CNTPCT_EL0 with the CNTP_CVAL_EL0. In both cases the offset
> > is subtracted from the counter. And that's the point where the running
> > EL matters. Which means that CNTPOFF_EL2 behaves exactly like
> > CNTVOFF_EL2. No ifs, no buts.
>
> As per ARM ARM (ARM DDI 0487J.a page D11-5989)
> "When FEAT_ECV is implemented, the CNTPOFF_EL2 register allows an
> offset to be applied to the physical counter, as viewed from EL1 and
> EL0, and to the EL1 physical timer. The functionality of this 64-bit
> register is affected by CNTHCTL_EL2.ECV."
>
> As per ARM ARM (ARM DDI 0487J.a page D19-7857)
> "When HCR_EL2.{E2H, TGE} == {1, 1} or SCR_EL3.{NS, EEL2} == {0, 0}, then
> Enhanced Counter Virtualization functionality is disabled."
>
> "The EL1 physical timer interrupt is triggered when ((PCount<63:0> -
> CNTPOFF_EL2<63:0>) - PCVal<63:0>) is greater than or equal to 0."
>
> As per ARM ARM (ARM DDI 0487J.a page D19-7938)
> "When EL2 is implemented and enabled in the current Security state,
> the physical counter uses a fixed physical offset of *zero* if any of
> the following are true:
> • CNTHCTL_EL2.ECV is 0.
> • SCR_EL3.ECVEn is 0.
> • HCR_EL2.{E2H, TGE} is {1, 1}."
>
> In VHE host hypervisor, E2H=TGE=1 hence ECV is disabled and Ptimer
> interrupt is triggered based on PCount<63:0> - PCVal<63:0>
>
> Since cval is set by Guest as per offsetted PCounter value and pCount
> is not subtracted by CNTPOFF when in VHE-L0, results in cval becoming
> much lesser than physical counter(bumped up since CNTPOFF is zero) and
> timer interrupt trigger condition is met falsely.
>
> There is no issue/impact on cval due to ECV, however it can be/is
> manipulated to handle this on and off of CNTPOFF/ECV.
>
> IIUC, CNTPOFF and CNTVOFF are not same as per specification.

I owe you an apology. You are correct, and I was totally wrong. I'm
truly amazed how wrong we got this part of the architecture, but it is
way too late for any change, and we'll have to live with it.

Now, to the actual patch: I think the way you offset CVAL isn't
great. You should never have to change it on entry, and you should
instead read the correct value from memory. Then, save/restore of CVAL
must be amended to always apply the offset. Can you give the hack
below a go on your HW?

Thanks,

M.

diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index ea46b4e1e7a8..bb80fdd84676 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -55,11 +55,6 @@ static struct irq_ops arch_timer_irq_ops = {
.get_input_level = kvm_arch_timer_get_input_level,
};

-static bool has_cntpoff(void)
-{
- return (has_vhe() && cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF));
-}
-
static int nr_timers(struct kvm_vcpu *vcpu)
{
if (!vcpu_has_nv2(vcpu))
@@ -180,7 +175,7 @@ u64 kvm_phys_timer_read(void)
return timecounter->cc->read(timecounter->cc);
}

-static void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map)
+void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map)
{
if (vcpu_has_nv2(vcpu)) {
if (is_hyp_ctxt(vcpu)) {
@@ -569,8 +564,7 @@ static void timer_save_state(struct arch_timer_context *ctx)
timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTP_CTL));
cval = read_sysreg_el0(SYS_CNTP_CVAL);

- if (!has_cntpoff())
- cval -= timer_get_offset(ctx);
+ cval -= timer_get_offset(ctx);

timer_set_cval(ctx, cval);

@@ -657,8 +651,7 @@ static void timer_restore_state(struct arch_timer_context *ctx)
cval = timer_get_cval(ctx);
offset = timer_get_offset(ctx);
set_cntpoff(offset);
- if (!has_cntpoff())
- cval += offset;
+ cval += offset;
write_sysreg_el0(cval, SYS_CNTP_CVAL);
isb();
write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTP_CTL);
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index 9611b4eaf661..6e3d3e16563f 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -90,6 +90,20 @@ static void __activate_traps(struct kvm_vcpu *vcpu)

___activate_traps(vcpu, hcr);

+ if (has_cntpoff()) {
+ struct timer_map map;
+
+ get_timer_map(vcpu, &map);
+
+ if (map.direct_ptimer == vcpu_ptimer(vcpu))
+ val = __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0);
+ else
+ val = __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2);
+
+ isb();
+ write_sysreg_s(val, SYS_CNTP_CVAL_EL0);
+ }
+
val = read_sysreg(cpacr_el1);
val |= CPACR_ELx_TTA;
val &= ~(CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN |
@@ -131,6 +145,23 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)

write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);

+ if (has_cntpoff()) {
+ struct timer_map map;
+ u64 val, offset;
+
+ get_timer_map(vcpu, &map);
+
+ val = read_sysreg_s(SYS_CNTP_CVAL_EL0);
+ if (map.direct_ptimer == vcpu_ptimer(vcpu))
+ __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = val;
+ else
+ __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2) = val;
+
+ offset = read_sysreg_s(SYS_CNTPOFF_EL2);
+ write_sysreg_s(val + offset, SYS_CNTP_CVAL_EL0);
+ isb();
+ }
+
/*
* ARM errata 1165522 and 1530923 require the actual execution of the
* above before we can switch to the EL2/EL0 translation regime used by
diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
index ea77a569a907..86a73ad1446a 100644
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -82,6 +82,8 @@ struct timer_map {
struct arch_timer_context *emul_ptimer;
};

+void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map);
+
struct arch_timer_cpu {
struct arch_timer_context timers[NR_KVM_TIMERS];

@@ -149,4 +151,9 @@ void kvm_timer_cpu_down(void);
/* CNTKCTL_EL1 valid bits as of DDI0476J.a */
#define CNTKCTL_VALID_BITS (BIT(17) | GENMASK_ULL(9, 0))

+static inline bool has_cntpoff(void)
+{
+ return (has_vhe() && cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF));
+}
+
#endif

--
Without deviation from the norm, progress is not possible.