[RFC PATCH 3/4] x86/asm: Make alternative macro interfaces more clear and consistent

From: Josh Poimboeuf
Date: Thu Aug 31 2017 - 10:13:41 EST


The alternative code has some macros which are used for wrapping inline
asm statements. These macros can be confusing:

- Some of them require the caller to start their positional operands at
'%1' instead of '%0'.

- There are multiple variants of the macros which basically do the same
thing, but their interfaces are subtly different.

- There's an optional ASM_OUTPUT2() macro which is used for grouping
output constraint arguments. This macro is poorly named as it doesn't
clearly describe its purpose. And it's only used some of the time
(when there's more than one output constraint), making the interface
more confusing.

- There's no corresponding ASM_INPUT2() macro for inputs, so it's not
always intuitive to figure out which arguments are the outputs and
which ones are the inputs.

- The ASM_NO_INPUT_CLOBBER() macro is also confusing unless you know
what it does.

Make the following changes:

- Give alternative_io(), alternative_call(), and alternative_call_2()
consistent interfaces. The constraints are provided by use of the
OUTPUTS(), INPUTS(), and CLOBBERS() macros.

- Get rid of the alternative_input() macro and replace it with a more
general purpose version of alternative_io().

- Also get rid of alternative_input_2() since it's not used anywhere.

Signed-off-by: Josh Poimboeuf <jpoimboe@xxxxxxxxxx>
---
arch/x86/include/asm/alternative.h | 74 ++++++++++---------------
arch/x86/include/asm/apic.h | 3 +-
arch/x86/include/asm/atomic64_32.h | 101 ++++++++++++++++++++++-------------
arch/x86/include/asm/cmpxchg_32.h | 20 +++----
arch/x86/include/asm/page_64.h | 5 +-
arch/x86/include/asm/percpu.h | 8 +--
arch/x86/include/asm/processor.h | 14 ++---
arch/x86/include/asm/special_insns.h | 3 +-
arch/x86/include/asm/uaccess_64.h | 8 +--
include/linux/compiler.h | 29 ++++++++++
10 files changed, 155 insertions(+), 110 deletions(-)

diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 1b020381ab38..53f18258c86f 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -171,43 +171,37 @@ static inline int alternatives_text_reserved(void *start, void *end)
asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) ::: "memory")

/*
- * Alternative inline assembly with input.
+ * Alternative inline assembly with user-specified constraints. Use the
+ * OUTPUTS(), INPUTS(), and CLOBBERS() macros to combine the arguments as
+ * needed.
*
* Pecularities:
* No memory clobber here.
- * Argument numbers start with 1.
* Best is to use constraints that are fixed size (like (%1) ... "r")
* If you use variable sized constraints like "m" or "g" in the
* replacement make sure to pad to the worst case length.
* Leaving an unused argument 0 to keep API compatibility.
*/
-#define alternative_input(oldinstr, newinstr, feature, input...) \
+#define alternative_io(oldinstr, newinstr, feature, outputs, inputs, \
+ clobbers...) \
asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \
- : : "i" (0), ## input)
+ : outputs \
+ : inputs \
+ CLOBBERS_APPEND(clobbers))

/*
- * This is similar to alternative_input. But it has two features and
- * respective instructions.
+ * Like alternative_io, but for replacing a direct call with another one.
*
- * If CPU has feature2, newinstr2 is used.
- * Otherwise, if CPU has feature1, newinstr1 is used.
- * Otherwise, oldinstr is used.
+ * Positional operand names ("%0") and constraints ("0" (foo)) are not allowed.
*/
-#define alternative_input_2(oldinstr, newinstr1, feature1, newinstr2, \
- feature2, input...) \
- asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, \
- newinstr2, feature2) \
- : : "i" (0), ## input)
-
-/* Like alternative_input, but with a single output argument */
-#define alternative_io(oldinstr, newinstr, feature, output, input...) \
- asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \
- : output : "i" (0), ## input)
-
-/* Like alternative_io, but for replacing a direct call with another one. */
-#define alternative_call(oldfunc, newfunc, feature, output, input...) \
- asm volatile (ALTERNATIVE("call %P[old]", "call %P[new]", feature) \
- : output : [old] "i" (oldfunc), [new] "i" (newfunc), ## input)
+#define alternative_call(oldfunc, newfunc, feature, outputs, inputs, \
+ clobbers...) \
+ asm volatile (ALTERNATIVE("call %P[old]", "call %P[new]", \
+ feature), \
+ : outputs \
+ : [old] "i" (oldfunc), [new] "i" (newfunc) \
+ ARGS_APPEND(inputs) \
+ CLOBBERS_APPEND(clobbers))

/*
* Like alternative_call, but there are two features and respective functions.
@@ -215,29 +209,19 @@ static inline int alternatives_text_reserved(void *start, void *end)
* Otherwise, if CPU has feature1, function1 is used.
* Otherwise, old function is used.
*/
-#define alternative_call_2(oldfunc, newfunc1, feature1, newfunc2, feature2, \
- output, input...) \
-{ \
- register void *__sp asm(_ASM_SP); \
- asm volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", feature1,\
- "call %P[new2]", feature2) \
- : output, "+r" (__sp) \
- : [old] "i" (oldfunc), [new1] "i" (newfunc1), \
- [new2] "i" (newfunc2), ## input); \
+#define alternative_call_2(oldfunc, newfunc1, feature1, newfunc2, \
+ feature2, outputs, inputs, clobbers...) \
+{ \
+ register void *__sp asm(_ASM_SP); \
+ asm volatile (ALTERNATIVE_2("call %P[old]", \
+ "call %P[new1]", feature1, \
+ "call %P[new2]", feature2) \
+ : "+r" (__sp) ARGS_APPEND(outputs) \
+ : [old] "i" (oldfunc), [new1] "i" (newfunc1), \
+ [new2] "i" (newfunc2) ARGS_APPEND(inputs) \
+ CLOBBERS_APPEND(clobbers)); \
}

-/*
- * use this macro(s) if you need more than one output parameter
- * in alternative_io
- */
-#define ASM_OUTPUT2(a...) a
-
-/*
- * use this macro if you need clobbers but no inputs in
- * alternative_{input,io,call}()
- */
-#define ASM_NO_INPUT_CLOBBER(clbr...) "i" (0) : clbr
-
#endif /* __ASSEMBLY__ */

#endif /* _ASM_X86_ALTERNATIVE_H */
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 5a7e0eb38350..77d743d10b21 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -95,7 +95,8 @@ static inline void native_apic_mem_write(u32 reg, u32 v)

alternative_io("movl %[val], %P[reg]", "xchgl %[val], %P[reg]",
X86_BUG_11AP,
- ASM_OUTPUT2([val] "+r" (v), [reg] "+m" (*addr)));
+ OUTPUTS([val] "+r" (v), [reg] "+m" (*addr)),
+ INPUTS());
}

static inline u32 native_apic_mem_read(u32 reg)
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index 9e206f31ce2a..7ab0efe8a13d 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -22,15 +22,19 @@ typedef struct {
#endif

#ifdef CONFIG_X86_CMPXCHG64
-#define __alternative_atomic64(f, g, out, in...) \
- asm volatile("call %P[func]" \
- : out : [func] "i" (atomic64_##g##_cx8), ## in)
+#define __alternative_atomic64(f, g, outputs, inputs, clobbers...) \
+ asm volatile("call %P[func]" \
+ : outputs \
+ : [func] "i" (atomic64_##g##_cx8) \
+ ARGS_APPEND(inputs) \
+ CLOBBERS_APPEND(clobbers))

#define ATOMIC64_DECL(sym) ATOMIC64_DECL_ONE(sym##_cx8)
#else
-#define __alternative_atomic64(f, g, out, in...) \
- alternative_call(atomic64_##f##_386, atomic64_##g##_cx8, \
- X86_FEATURE_CX8, ASM_OUTPUT2(out), ## in)
+#define __alternative_atomic64(f, g, outputs, inputs, clobbers...) \
+ alternative_call(atomic64_##f##_386, atomic64_##g##_cx8, \
+ X86_FEATURE_CX8, \
+ OUTPUTS(outputs), INPUTS(inputs), clobbers)

#define ATOMIC64_DECL(sym) ATOMIC64_DECL_ONE(sym##_cx8); \
ATOMIC64_DECL_ONE(sym##_386)
@@ -41,8 +45,9 @@ ATOMIC64_DECL_ONE(inc_386);
ATOMIC64_DECL_ONE(dec_386);
#endif

-#define alternative_atomic64(f, out, in...) \
- __alternative_atomic64(f, f, ASM_OUTPUT2(out), ## in)
+#define alternative_atomic64(f, outputs, inputs, clobbers...) \
+ __alternative_atomic64(f, f, OUTPUTS(outputs), INPUTS(inputs), \
+ clobbers)

ATOMIC64_DECL(read);
ATOMIC64_DECL(set);
@@ -88,9 +93,10 @@ static inline long long atomic64_xchg(atomic64_t *v, long long n)
long long o;
unsigned high = (unsigned)(n >> 32);
unsigned low = (unsigned)n;
- alternative_atomic64(xchg, "=&A" (o),
- "S" (v), "b" (low), "c" (high)
- : "memory");
+ alternative_atomic64(xchg,
+ OUTPUTS("=&A" (o)),
+ INPUTS("S" (v), "b" (low), "c" (high)),
+ CLOBBERS("memory"));
return o;
}

@@ -105,9 +111,10 @@ static inline void atomic64_set(atomic64_t *v, long long i)
{
unsigned high = (unsigned)(i >> 32);
unsigned low = (unsigned)i;
- alternative_atomic64(set, /* no output */,
- "S" (v), "b" (low), "c" (high)
- : "eax", "edx", "memory");
+ alternative_atomic64(set,
+ OUTPUTS(),
+ INPUTS("S" (v), "b" (low), "c" (high)),
+ CLOBBERS("eax", "edx", "memory"));
}

/**
@@ -119,7 +126,10 @@ static inline void atomic64_set(atomic64_t *v, long long i)
static inline long long atomic64_read(const atomic64_t *v)
{
long long r;
- alternative_atomic64(read, "=&A" (r), "c" (v) : "memory");
+ alternative_atomic64(read,
+ OUTPUTS("=&A" (r)),
+ INPUTS("c" (v)),
+ CLOBBERS("memory"));
return r;
}

@@ -133,8 +143,9 @@ static inline long long atomic64_read(const atomic64_t *v)
static inline long long atomic64_add_return(long long i, atomic64_t *v)
{
alternative_atomic64(add_return,
- ASM_OUTPUT2("+A" (i), "+c" (v)),
- ASM_NO_INPUT_CLOBBER("memory"));
+ OUTPUTS("+A" (i), "+c" (v)),
+ INPUTS(),
+ CLOBBERS("memory"));
return i;
}

@@ -144,24 +155,29 @@ static inline long long atomic64_add_return(long long i, atomic64_t *v)
static inline long long atomic64_sub_return(long long i, atomic64_t *v)
{
alternative_atomic64(sub_return,
- ASM_OUTPUT2("+A" (i), "+c" (v)),
- ASM_NO_INPUT_CLOBBER("memory"));
+ OUTPUTS("+A" (i), "+c" (v)),
+ INPUTS(),
+ CLOBBERS("memory"));
return i;
}

static inline long long atomic64_inc_return(atomic64_t *v)
{
long long a;
- alternative_atomic64(inc_return, "=&A" (a),
- "S" (v) : "memory", "ecx");
+ alternative_atomic64(inc_return,
+ OUTPUTS("=&A" (a)),
+ INPUTS("S" (v)),
+ CLOBBERS("memory", "ecx"));
return a;
}

static inline long long atomic64_dec_return(atomic64_t *v)
{
long long a;
- alternative_atomic64(dec_return, "=&A" (a),
- "S" (v) : "memory", "ecx");
+ alternative_atomic64(dec_return,
+ OUTPUTS("=&A" (a)),
+ INPUTS("S" (v)),
+ CLOBBERS("memory", "ecx"));
return a;
}

@@ -175,8 +191,9 @@ static inline long long atomic64_dec_return(atomic64_t *v)
static inline long long atomic64_add(long long i, atomic64_t *v)
{
__alternative_atomic64(add, add_return,
- ASM_OUTPUT2("+A" (i), "+c" (v)),
- ASM_NO_INPUT_CLOBBER("memory"));
+ OUTPUTS("+A" (i), "+c" (v)),
+ INPUTS(),
+ CLOBBERS("memory"));
return i;
}

@@ -190,8 +207,9 @@ static inline long long atomic64_add(long long i, atomic64_t *v)
static inline long long atomic64_sub(long long i, atomic64_t *v)
{
__alternative_atomic64(sub, sub_return,
- ASM_OUTPUT2("+A" (i), "+c" (v)),
- ASM_NO_INPUT_CLOBBER("memory"));
+ OUTPUTS("+A" (i), "+c" (v)),
+ INPUTS(),
+ CLOBBERS("memory"));
return i;
}

@@ -217,8 +235,10 @@ static inline int atomic64_sub_and_test(long long i, atomic64_t *v)
*/
static inline void atomic64_inc(atomic64_t *v)
{
- __alternative_atomic64(inc, inc_return, /* no output */,
- "S" (v) : "memory", "eax", "ecx", "edx");
+ __alternative_atomic64(inc, inc_return,
+ OUTPUTS(),
+ INPUTS("S" (v)),
+ CLOBBERS("memory", "eax", "ecx", "edx"));
}

/**
@@ -229,8 +249,10 @@ static inline void atomic64_inc(atomic64_t *v)
*/
static inline void atomic64_dec(atomic64_t *v)
{
- __alternative_atomic64(dec, dec_return, /* no output */,
- "S" (v) : "memory", "eax", "ecx", "edx");
+ __alternative_atomic64(dec, dec_return,
+ OUTPUTS(),
+ INPUTS("S" (v)),
+ CLOBBERS("memory", "eax", "ecx", "edx"));
}

/**
@@ -287,8 +309,9 @@ static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u)
unsigned low = (unsigned)u;
unsigned high = (unsigned)(u >> 32);
alternative_atomic64(add_unless,
- ASM_OUTPUT2("+A" (a), "+c" (low), "+D" (high)),
- "S" (v) : "memory");
+ OUTPUTS("+A" (a), "+c" (low), "+D" (high)),
+ INPUTS("S" (v)),
+ CLOBBERS("memory"));
return (int)a;
}

@@ -296,16 +319,20 @@ static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u)
static inline int atomic64_inc_not_zero(atomic64_t *v)
{
int r;
- alternative_atomic64(inc_not_zero, "=&a" (r),
- "S" (v) : "ecx", "edx", "memory");
+ alternative_atomic64(inc_not_zero,
+ OUTPUTS("=&a" (r)),
+ INPUTS("S" (v)),
+ CLOBBERS("ecx", "edx", "memory"));
return r;
}

static inline long long atomic64_dec_if_positive(atomic64_t *v)
{
long long r;
- alternative_atomic64(dec_if_positive, "=&A" (r),
- "S" (v) : "ecx", "memory");
+ alternative_atomic64(dec_if_positive,
+ OUTPUTS("=&A" (r)),
+ INPUTS("S" (v)),
+ CLOBBERS("ecx", "memory"));
return r;
}

diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h
index 8154a317899f..9d0db650f73a 100644
--- a/arch/x86/include/asm/cmpxchg_32.h
+++ b/arch/x86/include/asm/cmpxchg_32.h
@@ -84,11 +84,11 @@ static inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new)
"call cmpxchg8b_emu", \
"lock; cmpxchg8b (%%esi)" , \
X86_FEATURE_CX8, \
- "=A" (__ret), \
- "S" ((ptr)), "A" (__old), \
- "b" ((unsigned int)__new), \
- "c" ((unsigned int)(__new>>32)) \
- : "memory"); \
+ OUTPUTS("=A" (__ret)), \
+ INPUTS("S" ((ptr)), "A" (__old), \
+ "b" ((unsigned int)__new), \
+ "c" ((unsigned int)(__new>>32))), \
+ CLOBBERS("memory")); \
__ret; })


@@ -100,11 +100,11 @@ static inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new)
alternative_io("call cmpxchg8b_emu", \
"cmpxchg8b (%%esi)" , \
X86_FEATURE_CX8, \
- "=A" (__ret), \
- "S" ((ptr)), "A" (__old), \
- "b" ((unsigned int)__new), \
- "c" ((unsigned int)(__new>>32)) \
- : "memory"); \
+ OUTPUTS("=A" (__ret)), \
+ INPUTS("S" ((ptr)), "A" (__old), \
+ "b" ((unsigned int)__new), \
+ "c" ((unsigned int)(__new>>32))), \
+ CLOBBERS("memory")); \
__ret; })

#endif
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index f7dbe752f80d..c6bf768f7708 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -44,8 +44,9 @@ static inline void clear_page(void *page)
alternative_call_2(clear_page_orig,
clear_page_rep, X86_FEATURE_REP_GOOD,
clear_page_erms, X86_FEATURE_ERMS,
- "+D" (page),
- ASM_NO_INPUT_CLOBBER("memory", "rax", "rcx"));
+ OUTPUTS("+D" (page)),
+ INPUTS(),
+ CLOBBERS("memory", "rax", "rcx"));
}

void copy_page(void *to, void *from);
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 2d1753758b0b..dc421c754e69 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -498,10 +498,10 @@ do { \
"cmpxchg16b " __percpu_arg([p1]) "\n\t" \
"setz %[ret]\n\t", \
X86_FEATURE_CX16, \
- ASM_OUTPUT2([ret] "=a" (__ret), \
- [p1] "+m" (pcp1), "+m" (pcp2), \
- "+d" (__o2)), \
- "b" (__n1), "c" (__n2), "a" (__o1) : "rsi"); \
+ OUTPUTS([ret] "=a" (__ret), [p1] "+m" (pcp1), \
+ "+m" (pcp2), "+d" (__o2)), \
+ INPUTS("b" (__n1), "c" (__n2), "a" (__o1)), \
+ CLOBBERS("rsi")); \
__ret; \
})

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 51cf1c7e9aca..44d7c6f033c9 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -780,9 +780,10 @@ extern char ignore_fpu_irq;
*/
static inline void prefetch(const void *x)
{
- alternative_input(BASE_PREFETCH, "prefetchnta %P[x]",
- X86_FEATURE_XMM,
- [x] "m" (*(const char *)x));
+ alternative_io(BASE_PREFETCH, "prefetchnta %P[x]",
+ X86_FEATURE_XMM,
+ OUTPUTS(),
+ INPUTS([x] "m" (*(const char *)x)));
}

/*
@@ -792,9 +793,10 @@ static inline void prefetch(const void *x)
*/
static inline void prefetchw(const void *x)
{
- alternative_input(BASE_PREFETCH, "prefetchw %P[x]",
- X86_FEATURE_3DNOWPREFETCH,
- [x] "m" (*(const char *)x));
+ alternative_io(BASE_PREFETCH, "prefetchw %P[x]",
+ X86_FEATURE_3DNOWPREFETCH,
+ OUTPUTS(),
+ INPUTS([x] "m" (*(const char *)x)));
}

static inline void spin_lock_prefetch(const void *x)
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index aeee6517ccc6..fa2caa1e77a5 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -219,7 +219,8 @@ static inline void clflushopt(volatile void *__p)
alternative_io(".byte " __stringify(NOP_DS_PREFIX) "; clflush %P[p]",
".byte 0x66; clflush %P[p]",
X86_FEATURE_CLFLUSHOPT,
- [p] "+m" (*(volatile char __force *)__p));
+ OUTPUTS([p] "+m" (*(volatile char __force *)__p)),
+ INPUTS());
}

static inline void clwb(volatile void *__p)
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index e09f71424795..9afd40681e43 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -38,10 +38,10 @@ copy_user_generic(void *to, const void *from, unsigned len)
X86_FEATURE_REP_GOOD,
copy_user_enhanced_fast_string,
X86_FEATURE_ERMS,
- ASM_OUTPUT2("=a" (ret), "+D" (to), "+S" (from),
- "+d" (len)),
- ASM_NO_INPUT_CLOBBER("memory", "rcx", "r8", "r9",
- "r10", "r11"));
+ OUTPUTS("=a" (ret), "+D" (to), "+S" (from),
+ "+d" (len)),
+ INPUTS(),
+ CLOBBERS("memory", "rcx", "r8", "r9", "r10", "r11"));
return ret;
}

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index dfaaeec4a32e..c738966434c1 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -620,4 +620,33 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
(_________p1); \
})

+#define __ARG17(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, \
+ _13, _14, _15, _16, _17, ...) _17
+
+/* Return 1 if the macro has arguments, 0 otherwise. */
+#define HAS_ARGS(...) __ARG17(0, ## __VA_ARGS__, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 0)
+
+/* Macros for passing inline asm constraints as macro arguments */
+#define ARGS(args...) args
+#define OUTPUTS ARGS
+#define INPUTS ARGS
+#define CLOBBERS ARGS
+
+/* If the macro has arguments, prepend them with a comma. */
+#define ARGS_APPEND(...) , ## __VA_ARGS__
+
+#define __CLOBBERS_APPEND_0(...)
+#define __CLOBBERS_APPEND_1(...) : __VA_ARGS__
+
+#define __CLOBBERS_APPEND(has_args, ...) \
+ __CLOBBERS_APPEND_ ## has_args (__VA_ARGS__)
+
+#define _CLOBBERS_APPEND(has_args, ...) \
+ __CLOBBERS_APPEND(has_args, __VA_ARGS__)
+
+/* If the macro has arguments, prepend them with a colon. */
+#define CLOBBERS_APPEND(...) \
+ _CLOBBERS_APPEND(HAS_ARGS(__VA_ARGS__), __VA_ARGS__)
+
#endif /* __LINUX_COMPILER_H */
--
2.13.5