Re: [RFC] x86/kvm/emulate: Avoid RET for fastops

From: Peter Zijlstra
Date: Wed Nov 29 2023 - 10:05:15 EST


On Tue, Nov 28, 2023 at 05:37:52PM -0800, Sean Christopherson wrote:
> On Sun, Nov 12, 2023, Peter Zijlstra wrote:
> > Hi,
> >
> > Inspired by the likes of ba5ca5e5e6a1 ("x86/retpoline: Don't clobber
> > RFLAGS during srso_safe_ret()") I had it on my TODO to look at this,
> > because the call-depth-tracking rethunk definitely also clobbers flags
> > and that's a ton harder to fix.
> >
> > Looking at this recently I noticed that there's really only one callsite
> > (twice, the testcc thing is basically separate from the rest of the
> > fastop stuff) and thus CALL+RET is totally silly, we can JMP+JMP.
> >
> > The below implements this, and aside from objtool going apeshit (it
> > fails to recognise the fastop JMP_NOSPEC as a jump-table and instead
> > classifies it as a tail-call), it actually builds and the asm looks
> > good sensible enough.
> >
> > I've not yet figured out how to test this stuff, but does something like
> > this look sane to you guys?
>
> Yes? The idea seems sound, but I haven't thought _that_ hard about whether or not
> there's any possible gotchas. I did a quick test and nothing exploded (and
> usually when this code breaks, it breaks spectacularly).

Looking at this more, I was wondering if there is something magical
about test_cc(). Both naming and purpose seems to be testing the flags,
but it has a side effect of actually setting the flags too, does
anything rely on that?

That is, we already have code that emulates the condition-codes, might
as well use it here and avoid a bunch of dodgy asm, no?

---
arch/x86/include/asm/text-patching.h | 20 +++++++++++++-------
arch/x86/kvm/emulate.c | 34 ++--------------------------------
2 files changed, 15 insertions(+), 39 deletions(-)

diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h
index 29832c338cdc..7b9d7fe0ab64 100644
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -186,9 +186,9 @@ void int3_emulate_ret(struct pt_regs *regs)
}

static __always_inline
-void int3_emulate_jcc(struct pt_regs *regs, u8 cc, unsigned long ip, unsigned long disp)
+bool __emulate_cc(unsigned long flags, u8 cc)
{
- static const unsigned long jcc_mask[6] = {
+ static const unsigned long cc_mask[6] = {
[0] = X86_EFLAGS_OF,
[1] = X86_EFLAGS_CF,
[2] = X86_EFLAGS_ZF,
@@ -201,15 +201,21 @@ void int3_emulate_jcc(struct pt_regs *regs, u8 cc, unsigned long ip, unsigned lo
bool match;

if (cc < 0xc) {
- match = regs->flags & jcc_mask[cc >> 1];
+ match = flags & cc_mask[cc >> 1];
} else {
- match = ((regs->flags & X86_EFLAGS_SF) >> X86_EFLAGS_SF_BIT) ^
- ((regs->flags & X86_EFLAGS_OF) >> X86_EFLAGS_OF_BIT);
+ match = ((flags & X86_EFLAGS_SF) >> X86_EFLAGS_SF_BIT) ^
+ ((flags & X86_EFLAGS_OF) >> X86_EFLAGS_OF_BIT);
if (cc >= 0xe)
- match = match || (regs->flags & X86_EFLAGS_ZF);
+ match = match || (flags & X86_EFLAGS_ZF);
}

- if ((match && !invert) || (!match && invert))
+ return (match && !invert) || (!match && invert);
+}
+
+static __always_inline
+void int3_emulate_jcc(struct pt_regs *regs, u8 cc, unsigned long ip, unsigned long disp)
+{
+ if (__emulate_cc(regs->flags, cc))
ip += disp;

int3_emulate_jmp(regs, ip);
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 2673cd5c46cb..0e971222c1f4 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -26,6 +26,7 @@
#include <asm/debugreg.h>
#include <asm/nospec-branch.h>
#include <asm/ibt.h>
+#include <asm/text-patching.h>

#include "x86.h"
#include "tss.h"
@@ -416,31 +417,6 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop);
ON64(FOP3E(op##q, rax, rdx, cl)) \
FOP_END

-/* Special case for SETcc - 1 instruction per cc */
-#define FOP_SETCC(op) \
- FOP_FUNC(op) \
- #op " %al \n\t" \
- FOP_RET(op)
-
-FOP_START(setcc)
-FOP_SETCC(seto)
-FOP_SETCC(setno)
-FOP_SETCC(setc)
-FOP_SETCC(setnc)
-FOP_SETCC(setz)
-FOP_SETCC(setnz)
-FOP_SETCC(setbe)
-FOP_SETCC(setnbe)
-FOP_SETCC(sets)
-FOP_SETCC(setns)
-FOP_SETCC(setp)
-FOP_SETCC(setnp)
-FOP_SETCC(setl)
-FOP_SETCC(setnl)
-FOP_SETCC(setle)
-FOP_SETCC(setnle)
-FOP_END;
-
FOP_START(salc)
FOP_FUNC(salc)
"pushf; sbb %al, %al; popf \n\t"
@@ -1063,13 +1039,7 @@ static int em_bsr_c(struct x86_emulate_ctxt *ctxt)

static __always_inline u8 test_cc(unsigned int condition, unsigned long flags)
{
- u8 rc;
- void (*fop)(void) = (void *)em_setcc + FASTOP_SIZE * (condition & 0xf);
-
- flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF;
- asm("push %[flags]; popf; " CALL_NOSPEC
- : "=a"(rc) : [thunk_target]"r"(fop), [flags]"r"(flags));
- return rc;
+ return __emulate_cc(flags, condition & 0xf);
}

static void fetch_register_operand(struct operand *op)