[PATCH v2 3/4] x86/paravirt: switch mixed paravirt/alternative calls to alternative_2

From: Juergen Gross
Date: Mon Oct 16 2023 - 08:40:09 EST


Instead of stacking alternative and paravirt patching, use the new
ALT_FLAG_CALL flag to switch those mixed calls to pure alternative
handling.

This eliminates the need to be careful regarding the sequence of
alternative and paravirt patching.

For call depth tracking callthunks_setup() needs to be adapted to patch
calls at alternative patching sites instead of paravirt calls.

Signed-off-by: Juergen Gross <jgross@xxxxxxxx>
---
arch/x86/include/asm/alternative.h | 5 +--
arch/x86/include/asm/paravirt.h | 49 +++++++++++++--------------
arch/x86/include/asm/paravirt_types.h | 29 +++++++---------
arch/x86/kernel/callthunks.c | 17 +++++-----
arch/x86/kernel/module.c | 20 +++--------
5 files changed, 51 insertions(+), 69 deletions(-)

diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index dd63b96577e9..f6c0ff678e2e 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -89,6 +89,8 @@ struct alt_instr {
u8 replacementlen; /* length of new instruction */
} __packed;

+extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
+
/*
* Debug flag that can be tested to see whether alternative
* instructions were patched in already:
@@ -104,11 +106,10 @@ extern void apply_fineibt(s32 *start_retpoline, s32 *end_retpoine,
s32 *start_cfi, s32 *end_cfi);

struct module;
-struct paravirt_patch_site;

struct callthunk_sites {
s32 *call_start, *call_end;
- struct paravirt_patch_site *pv_start, *pv_end;
+ struct alt_instr *alt_start, *alt_end;
};

#ifdef CONFIG_CALL_THUNKS
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index ed5c7342f2ef..9c6c5cfa9fe2 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -142,8 +142,7 @@ static inline void write_cr0(unsigned long x)
static __always_inline unsigned long read_cr2(void)
{
return PVOP_ALT_CALLEE0(unsigned long, mmu.read_cr2,
- "mov %%cr2, %%rax;",
- ALT_NOT(X86_FEATURE_XENPV));
+ "mov %%cr2, %%rax;", ALT_NOT_XEN);
}

static __always_inline void write_cr2(unsigned long x)
@@ -154,13 +153,12 @@ static __always_inline void write_cr2(unsigned long x)
static inline unsigned long __read_cr3(void)
{
return PVOP_ALT_CALL0(unsigned long, mmu.read_cr3,
- "mov %%cr3, %%rax;", ALT_NOT(X86_FEATURE_XENPV));
+ "mov %%cr3, %%rax;", ALT_NOT_XEN);
}

static inline void write_cr3(unsigned long x)
{
- PVOP_ALT_VCALL1(mmu.write_cr3, x,
- "mov %%rdi, %%cr3", ALT_NOT(X86_FEATURE_XENPV));
+ PVOP_ALT_VCALL1(mmu.write_cr3, x, "mov %%rdi, %%cr3", ALT_NOT_XEN);
}

static inline void __write_cr4(unsigned long x)
@@ -182,7 +180,7 @@ extern noinstr void pv_native_wbinvd(void);

static __always_inline void wbinvd(void)
{
- PVOP_ALT_VCALL0(cpu.wbinvd, "wbinvd", ALT_NOT(X86_FEATURE_XENPV));
+ PVOP_ALT_VCALL0(cpu.wbinvd, "wbinvd", ALT_NOT_XEN);
}

static inline u64 paravirt_read_msr(unsigned msr)
@@ -390,27 +388,25 @@ static inline void paravirt_release_p4d(unsigned long pfn)
static inline pte_t __pte(pteval_t val)
{
return (pte_t) { PVOP_ALT_CALLEE1(pteval_t, mmu.make_pte, val,
- "mov %%rdi, %%rax",
- ALT_NOT(X86_FEATURE_XENPV)) };
+ "mov %%rdi, %%rax", ALT_NOT_XEN) };
}

static inline pteval_t pte_val(pte_t pte)
{
return PVOP_ALT_CALLEE1(pteval_t, mmu.pte_val, pte.pte,
- "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV));
+ "mov %%rdi, %%rax", ALT_NOT_XEN);
}

static inline pgd_t __pgd(pgdval_t val)
{
return (pgd_t) { PVOP_ALT_CALLEE1(pgdval_t, mmu.make_pgd, val,
- "mov %%rdi, %%rax",
- ALT_NOT(X86_FEATURE_XENPV)) };
+ "mov %%rdi, %%rax", ALT_NOT_XEN) };
}

static inline pgdval_t pgd_val(pgd_t pgd)
{
return PVOP_ALT_CALLEE1(pgdval_t, mmu.pgd_val, pgd.pgd,
- "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV));
+ "mov %%rdi, %%rax", ALT_NOT_XEN);
}

#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
@@ -444,14 +440,13 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
static inline pmd_t __pmd(pmdval_t val)
{
return (pmd_t) { PVOP_ALT_CALLEE1(pmdval_t, mmu.make_pmd, val,
- "mov %%rdi, %%rax",
- ALT_NOT(X86_FEATURE_XENPV)) };
+ "mov %%rdi, %%rax", ALT_NOT_XEN) };
}

static inline pmdval_t pmd_val(pmd_t pmd)
{
return PVOP_ALT_CALLEE1(pmdval_t, mmu.pmd_val, pmd.pmd,
- "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV));
+ "mov %%rdi, %%rax", ALT_NOT_XEN);
}

static inline void set_pud(pud_t *pudp, pud_t pud)
@@ -464,7 +459,7 @@ static inline pud_t __pud(pudval_t val)
pudval_t ret;

ret = PVOP_ALT_CALLEE1(pudval_t, mmu.make_pud, val,
- "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV));
+ "mov %%rdi, %%rax", ALT_NOT_XEN);

return (pud_t) { ret };
}
@@ -472,7 +467,7 @@ static inline pud_t __pud(pudval_t val)
static inline pudval_t pud_val(pud_t pud)
{
return PVOP_ALT_CALLEE1(pudval_t, mmu.pud_val, pud.pud,
- "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV));
+ "mov %%rdi, %%rax", ALT_NOT_XEN);
}

static inline void pud_clear(pud_t *pudp)
@@ -492,8 +487,7 @@ static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
static inline p4d_t __p4d(p4dval_t val)
{
p4dval_t ret = PVOP_ALT_CALLEE1(p4dval_t, mmu.make_p4d, val,
- "mov %%rdi, %%rax",
- ALT_NOT(X86_FEATURE_XENPV));
+ "mov %%rdi, %%rax", ALT_NOT_XEN);

return (p4d_t) { ret };
}
@@ -501,7 +495,7 @@ static inline p4d_t __p4d(p4dval_t val)
static inline p4dval_t p4d_val(p4d_t p4d)
{
return PVOP_ALT_CALLEE1(p4dval_t, mmu.p4d_val, p4d.p4d,
- "mov %%rdi, %%rax", ALT_NOT(X86_FEATURE_XENPV));
+ "mov %%rdi, %%rax", ALT_NOT_XEN);
}

static inline void __set_pgd(pgd_t *pgdp, pgd_t pgd)
@@ -687,17 +681,17 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu);
static __always_inline unsigned long arch_local_save_flags(void)
{
return PVOP_ALT_CALLEE0(unsigned long, irq.save_fl, "pushf; pop %%rax;",
- ALT_NOT(X86_FEATURE_XENPV));
+ ALT_NOT_XEN);
}

static __always_inline void arch_local_irq_disable(void)
{
- PVOP_ALT_VCALLEE0(irq.irq_disable, "cli;", ALT_NOT(X86_FEATURE_XENPV));
+ PVOP_ALT_VCALLEE0(irq.irq_disable, "cli;", ALT_NOT_XEN);
}

static __always_inline void arch_local_irq_enable(void)
{
- PVOP_ALT_VCALLEE0(irq.irq_enable, "sti;", ALT_NOT(X86_FEATURE_XENPV));
+ PVOP_ALT_VCALLEE0(irq.irq_enable, "sti;", ALT_NOT_XEN);
}

static __always_inline unsigned long arch_local_irq_save(void)
@@ -746,20 +740,23 @@ void native_pv_lock_init(void) __init;

#ifdef CONFIG_X86_64
#ifdef CONFIG_PARAVIRT_XXL
+#ifdef CONFIG_DEBUG_ENTRY

#define PARA_PATCH(off) ((off) / 8)
#define PARA_SITE(ptype, ops) _PVSITE(ptype, ops, .quad, 8)
#define PARA_INDIRECT(addr) *addr(%rip)

-#ifdef CONFIG_DEBUG_ENTRY
.macro PARA_IRQ_save_fl
PARA_SITE(PARA_PATCH(PV_IRQ_save_fl),
ANNOTATE_RETPOLINE_SAFE;
call PARA_INDIRECT(pv_ops+PV_IRQ_save_fl);)
+ ANNOTATE_RETPOLINE_SAFE;
+ call PARA_INDIRECT(pv_ops+PV_IRQ_save_fl);
.endm

-#define SAVE_FLAGS ALTERNATIVE "PARA_IRQ_save_fl;", "pushf; pop %rax;", \
- ALT_NOT(X86_FEATURE_XENPV)
+#define SAVE_FLAGS ALTERNATIVE_2 "PARA_IRQ_save_fl;", \
+ ALT_CALL_INSTR, ALT_CALL_ALWAYS, \
+ "pushf; pop %rax;", ALT_NOT_XEN
#endif
#endif /* CONFIG_PARAVIRT_XXL */
#endif /* CONFIG_X86_64 */
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 9f84dc074f88..323dca625eea 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -278,15 +278,11 @@ extern struct paravirt_patch_template pv_ops;
#define NATIVE_LABEL(a,x,b) "\n\t.globl " a #x "_" #b "\n" a #x "_" #b ":\n\t"

unsigned int paravirt_patch(u8 type, void *insn_buff, unsigned long addr, unsigned int len);
+#define paravirt_ptr(op) [paravirt_opptr] "m" (pv_ops.op)

int paravirt_disable_iospace(void);

-/*
- * This generates an indirect call based on the operation type number.
- * The type number, computed in PARAVIRT_PATCH, is derived from the
- * offset into the paravirt_patch_template structure, and can therefore be
- * freely converted back into a structure offset.
- */
+/* This generates an indirect call based on the operation type number. */
#define PARAVIRT_CALL \
ANNOTATE_RETPOLINE_SAFE \
"call *%[paravirt_opptr];"
@@ -319,12 +315,6 @@ int paravirt_disable_iospace(void);
* However, x86_64 also has to clobber all caller saved registers, which
* unfortunately, are quite a bit (r8 - r11)
*
- * The call instruction itself is marked by placing its start address
- * and size into the .parainstructions section, so that
- * apply_paravirt() in arch/i386/kernel/alternative.c can do the
- * appropriate patching under the control of the backend pv_init_ops
- * implementation.
- *
* Unfortunately there's no way to get gcc to generate the args setup
* for the call, and then allow the call itself to be generated by an
* inline asm. Because of this, we must do the complete arg setup and
@@ -428,9 +418,10 @@ int paravirt_disable_iospace(void);
({ \
PVOP_CALL_ARGS; \
PVOP_TEST_NULL(op); \
- asm volatile(paravirt_alt(PARAVIRT_CALL) \
+ asm volatile(ALTERNATIVE(PARAVIRT_CALL, ALT_CALL_INSTR, \
+ ALT_CALL_ALWAYS) \
: call_clbr, ASM_CALL_CONSTRAINT \
- : paravirt_type(op), \
+ : paravirt_ptr(op), \
##__VA_ARGS__ \
: "memory", "cc" extra_clbr); \
ret; \
@@ -441,10 +432,11 @@ int paravirt_disable_iospace(void);
({ \
PVOP_CALL_ARGS; \
PVOP_TEST_NULL(op); \
- asm volatile(ALTERNATIVE(paravirt_alt(PARAVIRT_CALL), \
- alt, cond) \
+ asm volatile(ALTERNATIVE_2(PARAVIRT_CALL, \
+ ALT_CALL_INSTR, ALT_CALL_ALWAYS, \
+ alt, cond) \
: call_clbr, ASM_CALL_CONSTRAINT \
- : paravirt_type(op), \
+ : paravirt_ptr(op), \
##__VA_ARGS__ \
: "memory", "cc" extra_clbr); \
ret; \
@@ -557,5 +549,8 @@ extern struct paravirt_patch_site __parainstructions[],
__parainstructions_end[];

#endif /* __ASSEMBLY__ */
+
+#define ALT_NOT_XEN ALT_NOT(X86_FEATURE_XENPV)
+
#endif /* CONFIG_PARAVIRT */
#endif /* _ASM_X86_PARAVIRT_TYPES_H */
diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c
index faa9f2299848..200ea8087ddb 100644
--- a/arch/x86/kernel/callthunks.c
+++ b/arch/x86/kernel/callthunks.c
@@ -238,14 +238,13 @@ patch_call_sites(s32 *start, s32 *end, const struct core_text *ct)
}

static __init_or_module void
-patch_paravirt_call_sites(struct paravirt_patch_site *start,
- struct paravirt_patch_site *end,
- const struct core_text *ct)
+patch_alt_call_sites(struct alt_instr *start, struct alt_instr *end,
+ const struct core_text *ct)
{
- struct paravirt_patch_site *p;
+ struct alt_instr *a;

- for (p = start; p < end; p++)
- patch_call(p->instr, ct);
+ for (a = start; a < end; a++)
+ patch_call((u8 *)&a->instr_offset + a->instr_offset, ct);
}

static __init_or_module void
@@ -253,7 +252,7 @@ callthunks_setup(struct callthunk_sites *cs, const struct core_text *ct)
{
prdbg("Patching call sites %s\n", ct->name);
patch_call_sites(cs->call_start, cs->call_end, ct);
- patch_paravirt_call_sites(cs->pv_start, cs->pv_end, ct);
+ patch_alt_call_sites(cs->alt_start, cs->alt_end, ct);
prdbg("Patching call sites done%s\n", ct->name);
}

@@ -262,8 +261,8 @@ void __init callthunks_patch_builtin_calls(void)
struct callthunk_sites cs = {
.call_start = __call_sites,
.call_end = __call_sites_end,
- .pv_start = __parainstructions,
- .pv_end = __parainstructions_end
+ .alt_start = __alt_instructions,
+ .alt_end = __alt_instructions_end
};

if (!cpu_feature_enabled(X86_FEATURE_CALL_DEPTH))
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 5f71a0cf4399..e18914c0e38a 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -276,7 +276,7 @@ int module_finalize(const Elf_Ehdr *hdr,
struct module *me)
{
const Elf_Shdr *s, *alt = NULL, *locks = NULL,
- *para = NULL, *orc = NULL, *orc_ip = NULL,
+ *orc = NULL, *orc_ip = NULL,
*retpolines = NULL, *returns = NULL, *ibt_endbr = NULL,
*calls = NULL, *cfi = NULL;
char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
@@ -286,8 +286,6 @@ int module_finalize(const Elf_Ehdr *hdr,
alt = s;
if (!strcmp(".smp_locks", secstrings + s->sh_name))
locks = s;
- if (!strcmp(".parainstructions", secstrings + s->sh_name))
- para = s;
if (!strcmp(".orc_unwind", secstrings + s->sh_name))
orc = s;
if (!strcmp(".orc_unwind_ip", secstrings + s->sh_name))
@@ -304,14 +302,6 @@ int module_finalize(const Elf_Ehdr *hdr,
ibt_endbr = s;
}

- /*
- * See alternative_instructions() for the ordering rules between the
- * various patching types.
- */
- if (para) {
- void *pseg = (void *)para->sh_addr;
- apply_paravirt(pseg, pseg + para->sh_size);
- }
if (retpolines || cfi) {
void *rseg = NULL, *cseg = NULL;
unsigned int rsize = 0, csize = 0;
@@ -341,7 +331,7 @@ int module_finalize(const Elf_Ehdr *hdr,
void *aseg = (void *)alt->sh_addr;
apply_alternatives(aseg, aseg + alt->sh_size);
}
- if (calls || para) {
+ if (calls || alt) {
struct callthunk_sites cs = {};

if (calls) {
@@ -349,9 +339,9 @@ int module_finalize(const Elf_Ehdr *hdr,
cs.call_end = (void *)calls->sh_addr + calls->sh_size;
}

- if (para) {
- cs.pv_start = (void *)para->sh_addr;
- cs.pv_end = (void *)para->sh_addr + para->sh_size;
+ if (alt) {
+ cs.alt_start = (void *)alt->sh_addr;
+ cs.alt_end = (void *)alt->sh_addr + alt->sh_size;
}

callthunks_patch_module_calls(&cs, me);
--
2.35.3