Re: [Qemu-devel] [Patch V2 1/2] x86, mce: Basic support to add LMCE support to QEMU

From: Eduardo Habkost
Date: Mon Dec 14 2015 - 11:29:34 EST


Hi,

Comments below:

On Thu, Dec 10, 2015 at 02:41:21PM -0500, Ashok Raj wrote:
> This patch adds basic enumeration, control msr's required to support
> Local Machine Check Exception Support (LMCE).
>
> - Added Local Machine Check definitions, changed MCG_CAP
> - Added support for IA32_FEATURE_CONTROL.
> - When delivering MCE to guest, we deliver to just a single CPU
> when guest OS has opted in to Local delivery.
>
> Signed-off-by: Ashok Raj <ashok.raj@xxxxxxxxx>
> Tested-by: Gong Chen <gong.chen@xxxxxxxxx>
> ---
> Resending with proper commit message for second patch
>
> target-i386/cpu.c | 8 ++++++++
> target-i386/cpu.h | 8 ++++++--
> target-i386/kvm.c | 38 +++++++++++++++++++++++++++++++-------
> 3 files changed, 45 insertions(+), 9 deletions(-)
>
> diff --git a/target-i386/cpu.c b/target-i386/cpu.c
> index 11e5e39..167669a 100644
> --- a/target-i386/cpu.c
> +++ b/target-i386/cpu.c
> @@ -2737,6 +2737,13 @@ static void mce_init(X86CPU *cpu)
> }
> }
>
> +static void feature_control_init(X86CPU *cpu)
> +{
> + CPUX86State *cenv = &cpu->env;
> +
> + cenv->msr_ia32_feature_control = ((1<<20) | (1<<0));

FEATURE_CONTROL_LOCKED and FEATURE_CONTROL_LMCE macros would make
the code clearer.

> +}
> +
> #ifndef CONFIG_USER_ONLY
> static void x86_cpu_apic_create(X86CPU *cpu, Error **errp)
> {
> @@ -2858,6 +2865,7 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp)
> #endif
>
> mce_init(cpu);
> + feature_control_init(cpu);
>
> #ifndef CONFIG_USER_ONLY
> if (tcg_enabled()) {
> diff --git a/target-i386/cpu.h b/target-i386/cpu.h
> index 84edfd0..a567d7a 100644
> --- a/target-i386/cpu.h
> +++ b/target-i386/cpu.h
> @@ -282,8 +282,9 @@
>
> #define MCG_CTL_P (1ULL<<8) /* MCG_CAP register available */
> #define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */
> +#define MCG_LMCE_P (1ULL<<27) /* Local Machine Check Supported */

Please use spaces instead of tabs. You can detect this and other
coding style issues in this patch with checkpatch.pl.


>
> -#define MCE_CAP_DEF (MCG_CTL_P|MCG_SER_P)
> +#define MCE_CAP_DEF (MCG_CTL_P|MCG_SER_P|MCG_LMCE_P)

This makes mcg_cap change when upgrading QEMU.

VMs with MCG_LMCE_P enabled shouldn't be migratable to hosts
running older kernels, or the guest may try to read or write
MSR_IA32_MCG_EXT_CTL after miration and get a #GP. That means:

1) Older machine-types (pc-2.5 and older) should keep the
old (MCG_CTL_P|MCG_SER_P) default.
2) We can't make pc-2.6 enable LMCE by default, either, because
QEMU guarantees that just changing the machine-type shouldn't
introduce new host requirements (see:
http://article.gmane.org/gmane.comp.emulators.qemu/346651)

It looks like we need a new -cpu option to enable the feature,
then. At least until we raise the minimum kernel version
requirements of QEMU.

(I didn't review the kvm_mce_inject() changes as I am not
familiar with the details of how LMCE is implemented.)


> #define MCE_BANKS_DEF 10
>
> #define MCG_CAP_BANKS_MASK 0xff
> @@ -291,6 +292,7 @@
> #define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */
> #define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */
> #define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */
> +#define MCG_STATUS_LMCE (1ULL<<3) /* Local MCE signaled */
>
> #define MCI_STATUS_VAL (1ULL<<63) /* valid error */
> #define MCI_STATUS_OVER (1ULL<<62) /* previous errors lost */
> @@ -333,6 +335,7 @@
> #define MSR_MCG_CAP 0x179
> #define MSR_MCG_STATUS 0x17a
> #define MSR_MCG_CTL 0x17b
> +#define MSR_MCG_EXT_CTL 0x4d0
>
> #define MSR_P6_EVNTSEL0 0x186
>
> @@ -892,7 +895,6 @@ typedef struct CPUX86State {
>
> uint64_t mcg_status;
> uint64_t msr_ia32_misc_enable;
> - uint64_t msr_ia32_feature_control;
>
> uint64_t msr_fixed_ctr_ctrl;
> uint64_t msr_global_ctrl;
> @@ -977,8 +979,10 @@ typedef struct CPUX86State {
> int64_t tsc_khz;
> void *kvm_xsave_buf;
>
> + uint64_t msr_ia32_feature_control;
> uint64_t mcg_cap;
> uint64_t mcg_ctl;
> + uint64_t mcg_ext_ctl;
> uint64_t mce_banks[MCE_BANKS_DEF*4];
>
> uint64_t tsc_aux;
> diff --git a/target-i386/kvm.c b/target-i386/kvm.c
> index 6dc9846..c61fe1f 100644
> --- a/target-i386/kvm.c
> +++ b/target-i386/kvm.c
> @@ -72,6 +72,7 @@ static bool has_msr_tsc_aux;
> static bool has_msr_tsc_adjust;
> static bool has_msr_tsc_deadline;
> static bool has_msr_feature_control;
> +static bool has_msr_ext_mcg_ctl;
> static bool has_msr_async_pf_en;
> static bool has_msr_pv_eoi_en;
> static bool has_msr_misc_enable;
> @@ -370,18 +371,30 @@ static void kvm_mce_inject(X86CPU *cpu, hwaddr paddr, int code)
> uint64_t status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN |
> MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S;
> uint64_t mcg_status = MCG_STATUS_MCIP;
> + int flags = 0;
> + CPUState *cs = CPU(cpu);
> +
> + /*
> + * We need to read back the value of MSR_EXT_MCG_CTL that was set by the
> + * guest kernel back into Qemu
> + */
> + cpu_synchronize_state(cs);
> +
> + flags = cpu_x86_support_mca_broadcast(env) ? MCE_INJECT_BROADCAST : 0;
>
> if (code == BUS_MCEERR_AR) {
> - status |= MCI_STATUS_AR | 0x134;
> - mcg_status |= MCG_STATUS_EIPV;
> + status |= MCI_STATUS_AR | 0x134;
> + mcg_status |= MCG_STATUS_EIPV;
> + if (env->mcg_ext_ctl & 0x1) {
> + mcg_status |= MCG_STATUS_LMCE;
> + flags = 0; /* No Broadcast when LMCE is opted by guest */
> + }
> } else {
> status |= 0xc0;
> mcg_status |= MCG_STATUS_RIPV;
> }
> cpu_x86_inject_mce(NULL, cpu, 9, status, mcg_status, paddr,
> - (MCM_ADDR_PHYS << 6) | 0xc,
> - cpu_x86_support_mca_broadcast(env) ?
> - MCE_INJECT_BROADCAST : 0);
> + (MCM_ADDR_PHYS << 6) | 0xc, flags);
> }
>
> static void hardware_memory_error(void)
> @@ -808,10 +821,14 @@ int kvm_arch_init_vcpu(CPUState *cs)
>
> c = cpuid_find_entry(&cpuid_data.cpuid, 1, 0);
> if (c) {
> - has_msr_feature_control = !!(c->ecx & CPUID_EXT_VMX) ||
> - !!(c->ecx & CPUID_EXT_SMX);
> + has_msr_feature_control = !!((c->ecx & CPUID_EXT_VMX) ||
> + !!(c->ecx & CPUID_EXT_SMX) ||
> + !!(env->mcg_cap & MCG_LMCE_P));
> }
>
> + if (has_msr_feature_control && (env->mcg_cap & MCG_LMCE_P))
> + has_msr_ext_mcg_ctl = true;
> +
> c = cpuid_find_entry(&cpuid_data.cpuid, 0x80000007, 0);
> if (c && (c->edx & 1<<8) && invtsc_mig_blocker == NULL) {
> /* for migration */
> @@ -1557,6 +1574,7 @@ static int kvm_put_msrs(X86CPU *cpu, int level)
>
> kvm_msr_entry_set(&msrs[n++], MSR_MCG_STATUS, env->mcg_status);
> kvm_msr_entry_set(&msrs[n++], MSR_MCG_CTL, env->mcg_ctl);
> + kvm_msr_entry_set(&msrs[n++], MSR_MCG_EXT_CTL, env->mcg_ext_ctl);
> for (i = 0; i < (env->mcg_cap & 0xff) * 4; i++) {
> kvm_msr_entry_set(&msrs[n++], MSR_MC0_CTL + i, env->mce_banks[i]);
> }
> @@ -1811,6 +1829,9 @@ static int kvm_get_msrs(X86CPU *cpu)
> if (has_msr_feature_control) {
> msrs[n++].index = MSR_IA32_FEATURE_CONTROL;
> }
> + if (has_msr_ext_mcg_ctl) {
> + msrs[n++].index = MSR_MCG_EXT_CTL;
> + }
> if (has_msr_bndcfgs) {
> msrs[n++].index = MSR_IA32_BNDCFGS;
> }
> @@ -1981,6 +2002,9 @@ static int kvm_get_msrs(X86CPU *cpu)
> case MSR_IA32_FEATURE_CONTROL:
> env->msr_ia32_feature_control = msrs[i].data;
> break;
> + case MSR_MCG_EXT_CTL:
> + env->mcg_ext_ctl = msrs[i].data;
> + break;
> case MSR_IA32_BNDCFGS:
> env->msr_bndcfgs = msrs[i].data;
> break;
> --
> 2.4.3
>
>

--
Eduardo
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/