Re: [PATCH 3/3] x86/xen: Move paravirt IOPL switching to slow the path

From: Juergen Gross
Date: Wed Jun 14 2017 - 09:14:26 EST


On 14/06/17 14:40, Brian Gerst wrote:
> Since tasks using IOPL are very rare, move the switching code to the slow
> path for lower impact on normal tasks.
>
> Signed-off-by: Brian Gerst <brgerst@xxxxxxxxx>
> ---
> arch/x86/include/asm/paravirt.h | 6 ++++++
> arch/x86/include/asm/processor.h | 1 +
> arch/x86/include/asm/thread_info.h | 5 ++++-
> arch/x86/include/asm/xen/hypervisor.h | 2 --
> arch/x86/kernel/ioport.c | 6 ++++++
> arch/x86/kernel/process.c | 8 ++++++++
> arch/x86/kernel/process_32.c | 9 ---------
> arch/x86/kernel/process_64.c | 11 -----------
> arch/x86/xen/enlighten_pv.c | 2 +-
> 9 files changed, 26 insertions(+), 24 deletions(-)
>
> diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
> index 9a15739..2145dbd 100644
> --- a/arch/x86/include/asm/paravirt.h
> +++ b/arch/x86/include/asm/paravirt.h
> @@ -265,6 +265,12 @@ static inline void write_idt_entry(gate_desc *dt, int entry, const gate_desc *g)
> {
> PVOP_VCALL3(pv_cpu_ops.write_idt_entry, dt, entry, g);
> }
> +
> +static inline bool paravirt_iopl(void)
> +{
> + return pv_cpu_ops.set_iopl_mask != paravirt_nop;
> +}
> +
> static inline void set_iopl_mask(unsigned mask)
> {
> PVOP_VCALL1(pv_cpu_ops.set_iopl_mask, mask);
> diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
> index 06c4795..4411d67 100644
> --- a/arch/x86/include/asm/processor.h
> +++ b/arch/x86/include/asm/processor.h
> @@ -523,6 +523,7 @@ static inline void load_sp0(struct tss_struct *tss,
> native_load_sp0(tss, thread);
> }
>
> +static inline bool paravirt_iopl(void) { return false; }
> static inline void set_iopl_mask(unsigned mask) { }
> #endif /* CONFIG_PARAVIRT */
>
> diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
> index e00e1bd..350f3d3 100644
> --- a/arch/x86/include/asm/thread_info.h
> +++ b/arch/x86/include/asm/thread_info.h
> @@ -79,6 +79,7 @@ struct thread_info {
> #define TIF_SIGPENDING 2 /* signal pending */
> #define TIF_NEED_RESCHED 3 /* rescheduling necessary */
> #define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/
> +#define TIF_PV_IOPL 5 /* call hypervisor to change IOPL */
> #define TIF_SYSCALL_EMU 6 /* syscall emulation active */
> #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
> #define TIF_SECCOMP 8 /* secure computing */
> @@ -104,6 +105,7 @@ struct thread_info {
> #define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
> #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
> #define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
> +#define _TIF_PV_IOPL (1 << TIF_PV_IOPL)
> #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
> #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
> #define _TIF_SECCOMP (1 << TIF_SECCOMP)
> @@ -141,7 +143,8 @@ struct thread_info {
>
> /* flags to check in __switch_to() */
> #define _TIF_WORK_CTXSW \
> - (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP)
> + (_TIF_IO_BITMAP | _TIF_NOCPUID | _TIF_NOTSC | _TIF_BLOCKSTEP | \
> + _TIF_PV_IOPL)
>
> #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
> #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
> diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h
> index 39171b3..8b2d4be 100644
> --- a/arch/x86/include/asm/xen/hypervisor.h
> +++ b/arch/x86/include/asm/xen/hypervisor.h
> @@ -62,6 +62,4 @@ void xen_arch_register_cpu(int num);
> void xen_arch_unregister_cpu(int num);
> #endif
>
> -extern void xen_set_iopl_mask(unsigned mask);
> -
> #endif /* _ASM_X86_XEN_HYPERVISOR_H */
> diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
> index 9c3cf09..2051e7d 100644
> --- a/arch/x86/kernel/ioport.c
> +++ b/arch/x86/kernel/ioport.c
> @@ -126,6 +126,12 @@ SYSCALL_DEFINE1(iopl, unsigned int, level)
> regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) |
> (level << X86_EFLAGS_IOPL_BIT);
> t->iopl = level << X86_EFLAGS_IOPL_BIT;
> + if (paravirt_iopl()) {
> + if (level > 0)
> + set_thread_flag(TIF_PV_IOPL);
> + else
> + clear_thread_flag(TIF_PV_IOPL);
> + }

You could get rid of paravirt_iopl() by adding a "setflag" boolean
parameter to set_iopl_mask(). Only the Xen variant would need above
thread flag manipulation.


Juergen

> set_iopl_mask(t->iopl);
>
> return 0;
> diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
> index 0bb88428c..78d1ab2 100644
> --- a/arch/x86/kernel/process.c
> +++ b/arch/x86/kernel/process.c
> @@ -296,6 +296,14 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
>
> if ((tifp ^ tifn) & _TIF_NOCPUID)
> set_cpuid_faulting(!!(tifn & _TIF_NOCPUID));
> +
> + /*
> + * On Xen PV, IOPL bits in pt_regs->flags have no effect, and
> + * current_pt_regs()->flags may not match the current task's
> + * intended IOPL. We need to switch it manually.
> + */
> + if (prev->iopl != next->iopl)
> + set_iopl_mask(next->iopl);
> }
>
> /*
> diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
> index b2d1f7c..19527b4 100644
> --- a/arch/x86/kernel/process_32.c
> +++ b/arch/x86/kernel/process_32.c
> @@ -258,15 +258,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
> load_TLS(next, cpu);
>
> /*
> - * Restore IOPL if needed. In normal use, the flags restore
> - * in the switch assembly will handle this. But if the kernel
> - * is running virtualized at a non-zero CPL, the popf will
> - * not restore flags, so it must be done in a separate step.
> - */
> - if (unlikely(prev->iopl != next->iopl))
> - set_iopl_mask(next->iopl);
> -
> - /*
> * Now maybe handle debug registers and/or IO bitmaps
> */
> if (unlikely(task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV ||
> diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
> index 9c39ab8..9525e10 100644
> --- a/arch/x86/kernel/process_64.c
> +++ b/arch/x86/kernel/process_64.c
> @@ -446,17 +446,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
> task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
> __switch_to_xtra(prev_p, next_p, tss);
>
> -#ifdef CONFIG_XEN_PV
> - /*
> - * On Xen PV, IOPL bits in pt_regs->flags have no effect, and
> - * current_pt_regs()->flags may not match the current task's
> - * intended IOPL. We need to switch it manually.
> - */
> - if (unlikely(static_cpu_has(X86_FEATURE_XENPV) &&
> - prev->iopl != next->iopl))
> - xen_set_iopl_mask(next->iopl);
> -#endif
> -
> if (static_cpu_has_bug(X86_BUG_SYSRET_SS_ATTRS)) {
> /*
> * AMD CPUs have a misfeature: SYSRET sets the SS selector but
> diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
> index 05257c0..ea0d269 100644
> --- a/arch/x86/xen/enlighten_pv.c
> +++ b/arch/x86/xen/enlighten_pv.c
> @@ -813,7 +813,7 @@ static void xen_load_sp0(struct tss_struct *tss,
> tss->x86_tss.sp0 = thread->sp0;
> }
>
> -void xen_set_iopl_mask(unsigned mask)
> +static void xen_set_iopl_mask(unsigned mask)
> {
> struct physdev_set_iopl set_iopl;
>
>