Re: [PATCH] clocksource: Add Marvell Errata-38627 workaround

From: Marc Zyngier
Date: Mon Jul 05 2021 - 05:27:01 EST


On Mon, 05 Jul 2021 07:08:43 +0100,
Bharat Bhushan <bbhushan2@xxxxxxxxxxx> wrote:
>
> CPU pipeline have unpredicted behavior when timer
> interrupt appears and then disappears prior to the
> exception happening.

What kind of unpredictable behaviours? What happens if a guest isn't
aware of the erratum or actively tries to trigger it?

> Time interrupt appears on timer
> expiry and disappears when timer programming or timer
> disable. This typically can happen when a load
> instruction misses in the cache, which can take
> few hundreds of cycles, and an interrupt appears
> after the load instruction starts executing but
> disappears before the load instruction completes.
>
> Workaround of this is to ensure maximum 2us of time

maximum? I'm not sure how you can bound this. Or did you mean
*minimum*?

How was this value obtained? What guarantees that it is safe?

> gap between timer interrupt and timer programming
> which can de-assert timer interrupt.

What guarantees do you have on the propagation of the interrupt signal
from the timer to the CPU, given that the GIC is involved in the
middle of it?

>
> Signed-off-by: Linu Cherian <lcherian@xxxxxxxxxxx>
> Signed-off-by: Bharat Bhushan <bbhushan2@xxxxxxxxxxx>
> ---
> arch/arm64/include/asm/cputype.h | 13 +++++++
> arch/arm64/kernel/cpu_errata.c | 24 +++++++++++++
> arch/arm64/tools/cpucaps | 1 +
> drivers/clocksource/Kconfig | 13 +++++++
> drivers/clocksource/arm_arch_timer.c | 54 ++++++++++++++++++++++++++++
> 5 files changed, 105 insertions(+)
>
> diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
> index 6231e1f0abe7..e9a76935ee0f 100644
> --- a/arch/arm64/include/asm/cputype.h
> +++ b/arch/arm64/include/asm/cputype.h
> @@ -81,6 +81,13 @@
> #define CAVIUM_CPU_PART_THUNDERX_83XX 0x0A3
> #define CAVIUM_CPU_PART_THUNDERX2 0x0AF
>
> +#define MRVL_CPU_PART_OCTEONTX2_98XX 0x0B1
> +#define MRVL_CPU_PART_OCTEONTX2_96XX 0x0B2
> +#define MRVL_CPU_PART_OCTEONTX2_95XX 0x0B3
> +#define MRVL_CPU_PART_OCTEONTX2_LOKI 0x0B4
> +#define MRVL_CPU_PART_OCTEONTX2_95MM 0x0B5
> +#define MRVL_CPU_PART_OCTEONTX2_95O 0x0B6
> +
> #define BRCM_CPU_PART_BRAHMA_B53 0x100
> #define BRCM_CPU_PART_VULCAN 0x516
>
> @@ -117,6 +124,12 @@
> #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
> #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX)
> #define MIDR_CAVIUM_THUNDERX2 MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX2)
> +#define MIDR_MRVL_OCTEONTX2_98XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, MRVL_CPU_PART_OCTEONTX2_98XX)
> +#define MIDR_MRVL_OCTEONTX2_96XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, MRVL_CPU_PART_OCTEONTX2_96XX)
> +#define MIDR_MRVL_OCTEONTX2_95XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, MRVL_CPU_PART_OCTEONTX2_95XX)
> +#define MIDR_MRVL_OCTEONTX2_LOKI MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, MRVL_CPU_PART_OCTEONTX2_LOKI)
> +#define MIDR_MRVL_OCTEONTX2_95MM MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, MRVL_CPU_PART_OCTEONTX2_95MM)
> +#define MIDR_MRVL_OCTEONTX2_95O MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, MRVL_CPU_PART_OCTEONTX2_95O)
> #define MIDR_BRAHMA_B53 MIDR_CPU_MODEL(ARM_CPU_IMP_BRCM, BRCM_CPU_PART_BRAHMA_B53)
> #define MIDR_BRCM_VULCAN MIDR_CPU_MODEL(ARM_CPU_IMP_BRCM, BRCM_CPU_PART_VULCAN)
> #define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1)

This part should be is a separate patch.

> diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
> index e2c20c036442..363f83adb333 100644
> --- a/arch/arm64/kernel/cpu_errata.c
> +++ b/arch/arm64/kernel/cpu_errata.c
> @@ -339,6 +339,23 @@ static const struct midr_range erratum_1463225[] = {
> {},
> };
> #endif
> +#ifdef CONFIG_MARVELL_ERRATUM_38627
> +static const struct midr_range marvell_erratum_38627_cpus[] = {
> + /* Marvell OcteonTX 2, 95xx all passes */
> + MIDR_ALL_VERSIONS(MIDR_MRVL_OCTEONTX2_95XX),
> + /* Marvell OcteonTX 2, 95MM all passes */
> + MIDR_ALL_VERSIONS(MIDR_MRVL_OCTEONTX2_95MM),
> + /* Marvell OcteonTX 2, LOKI all passes */
> + MIDR_ALL_VERSIONS(MIDR_MRVL_OCTEONTX2_LOKI),
> + /* Marvell OcteonTX 2, 96xx all passes */
> + MIDR_ALL_VERSIONS(MIDR_MRVL_OCTEONTX2_96XX),
> + /* Marvell OcteonTX 2, 98xx pass 1.0 */
> + MIDR_REV(MIDR_MRVL_OCTEONTX2_98XX, 0, 0),
> + /* Marvell OcteonTX 2, 95O pass 1.0 */
> + MIDR_REV(MIDR_MRVL_OCTEONTX2_95O, 0, 0),

Is there any part that is *not* affected?

> + {},
> +};
> +#endif
>
> const struct arm64_cpu_capabilities arm64_errata[] = {
> #ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE
> @@ -439,6 +456,13 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
> .capability = ARM64_WORKAROUND_858921,
> ERRATA_MIDR_ALL_VERSIONS(MIDR_CORTEX_A73),
> },
> +#endif
> +#ifdef CONFIG_MARVELL_ERRATUM_38627
> + {
> + .desc = "MARVELL erratum 38627",
> + .capability = ARM64_WORKAROUND_MRVL_38627,
> + ERRATA_MIDR_RANGE_LIST(marvell_erratum_38627_cpus),
> + },
> #endif
> {
> .desc = "Spectre-v2",
> diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
> index 49305c2e6dfd..99dbe80445ef 100644
> --- a/arch/arm64/tools/cpucaps
> +++ b/arch/arm64/tools/cpucaps
> @@ -60,6 +60,7 @@ WORKAROUND_CAVIUM_TX2_219_PRFM
> WORKAROUND_CAVIUM_TX2_219_TVM
> WORKAROUND_CLEAN_CACHE
> WORKAROUND_DEVICE_LOAD_ACQUIRE
> +WORKAROUND_MRVL_38627
> WORKAROUND_NVIDIA_CARMEL_CNP
> WORKAROUND_QCOM_FALKOR_E1003
> WORKAROUND_REPEAT_TLBI
> diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
> index eb661b539a3e..83feb502cf02 100644
> --- a/drivers/clocksource/Kconfig
> +++ b/drivers/clocksource/Kconfig
> @@ -309,6 +309,19 @@ config ARM_ARCH_TIMER_EVTSTREAM
> config ARM_ARCH_TIMER_OOL_WORKAROUND
> bool
>
> +config MARVELL_ERRATUM_38627
> + bool "Workaround for Marvell Erratum 38627"
> + default y
> + depends on ARM_ARCH_TIMER && ARM64
> + select ARM_ARCH_TIMER_OOL_WORKAROUND
> + help
> + This option enables a workaround for Marvell Erratum
> + 38627. According to this errata CPU pipeline have
> + unpredicted behavior when timer interrupt appears and
> + then disappears prior to the exception happening.

This wording is actually a lot clearer than the commit message. What
happens if a guest plays with the timer?

> + This Errata workaround is applicable only to some Marvell
> + OcteonTX2 series of processors.
> +
> config FSL_ERRATUM_A008585
> bool "Workaround for Freescale/NXP Erratum A-008585"
> default y
> diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
> index be6d741d404c..d1c5e2aa2e7c 100644
> --- a/drivers/clocksource/arm_arch_timer.c
> +++ b/drivers/clocksource/arm_arch_timer.c
> @@ -27,6 +27,7 @@
> #include <linux/acpi.h>
> #include <linux/arm-smccc.h>
> #include <linux/ptp_kvm.h>
> +#include <linux/delay.h>
>
> #include <asm/arch_timer.h>
> #include <asm/virt.h>
> @@ -88,6 +89,8 @@ static enum vdso_clock_mode vdso_default = VDSO_CLOCKMODE_NONE;
>
> static cpumask_t evtstrm_available = CPU_MASK_NONE;
> static bool evtstrm_enable __ro_after_init = IS_ENABLED(CONFIG_ARM_ARCH_TIMER_EVTSTREAM);
> +static __always_inline void set_next_event(const int access, unsigned long evt,
> + struct clock_event_device *clk);
>
> static int __init early_evtstrm_cfg(char *buf)
> {
> @@ -432,6 +435,48 @@ static __maybe_unused int erratum_set_next_event_tval_phys(unsigned long evt,
> return 0;
> }
>
> +#ifdef CONFIG_MARVELL_ERRATUM_38627
> +/* Workaround is to ensure maximum 2us of time gap between timer expiry
> + * and timer programming which can de-assert timer interrupt.
> + * Time calculation below is based on 100MHz as timer frequency is fixed
> + * to 100MHz on all affected parts.
> + */

Please fix the comment style.

> +static __always_inline

Please drop this __always_inline. There is no hard requirement for it,
and you are about to *wait*, so waiting faster is... not that
interesting. The compiler will figure it out.

> +void erratum_38627_set_next_event(const int access, unsigned long evt,
> + struct clock_event_device *clk)
> +{
> + int32_t tval;
> +
> + tval = arch_timer_reg_read(access, ARCH_TIMER_REG_TVAL, clk);
> +
> + /* Timer already expired, wait for (2 - expired time)us */
> + if ((tval > -200) && (tval < 0))
> + udelay(2 + tval/100);
> +
> + /* Timer is about to expire, wait for 2us + time to expire */
> + if (tval >= 0 && tval < 200)
> + udelay(3 + tval/100);

I read 3us here, which contradicts the comment. Also, even if you wait
for an arbitrary amount of time, I still don't see what guarantees you
may have that the interrupt will have actually be signalled.

> +
> + set_next_event(access, evt, clk);
> +}
> +
> +static __maybe_unused
> +int erratum_38627_set_next_event_tval_virt(unsigned long evt,
> + struct clock_event_device *clk)
> +{
> + erratum_38627_set_next_event(ARCH_TIMER_VIRT_ACCESS, evt, clk);
> + return 0;
> +}
> +
> +static __maybe_unused
> +int erratum_38627_set_next_event_tval_phys(unsigned long evt,
> + struct clock_event_device *clk)
> +{
> + erratum_38627_set_next_event(ARCH_TIMER_PHYS_ACCESS, evt, clk);
> + return 0;
> +}
> +#endif
> +
> static const struct arch_timer_erratum_workaround ool_workarounds[] = {
> #ifdef CONFIG_FSL_ERRATUM_A008585
> {
> @@ -479,6 +524,15 @@ static const struct arch_timer_erratum_workaround ool_workarounds[] = {
> .read_cntvct_el0 = arm64_858921_read_cntvct_el0,
> },
> #endif
> +#ifdef CONFIG_MARVELL_ERRATUM_38627
> + {
> + .match_type = ate_match_local_cap_id,
> + .id = (void *)ARM64_WORKAROUND_MRVL_38627,
> + .desc = "Marvell erratum 38627",
> + .set_next_event_phys = erratum_38627_set_next_event_tval_phys,
> + .set_next_event_virt = erratum_38627_set_next_event_tval_virt,
> + },
> +#endif
> #ifdef CONFIG_SUN50I_ERRATUM_UNKNOWN1
> {
> .match_type = ate_match_dt,

You need to update Documentation/arm64/silicon-errata.rst.

Overall, this patch raises *a lot* of questions...

M.

--
Without deviation from the norm, progress is not possible.