[RFC PATCH 4/4] x86/asm: Use ASM_CALL() macro for inline asm statements with call instructions

From: Josh Poimboeuf
Date: Thu Aug 31 2017 - 10:13:54 EST


Inline asm statements which have call instructions can be problematic.
GCC doesn't know about the call instructions, so in some cases it can
insert the asm before setting up the frame pointer. This can result in
bad stack traces when unwinding from the called function.

Previously we worked around this issue by listing the stack pointer as
an input/output constraint for the inline asm. That works for GCC, but
unfortunately it doesn't work for Clang. In fact, it causes Clang to
corrupt the stack pointer.

Introduce a new ASM_CALL() macro, which should be used for all inline
statements which have call instructions. On GCC with frame pointers, it
sets the stack pointer as an input/output constraint, like before. On
GCC without frame pointers, it does nothing, which saves a small amount
of text. On Clang, it does nothing (for now).

Reported-by: Matthias Kaehlcke <mka@xxxxxxxxxxxx>
Signed-off-by: Josh Poimboeuf <jpoimboe@xxxxxxxxxx>
---
arch/x86/include/asm/alternative.h | 28 +++---
arch/x86/include/asm/arch_hweight.h | 14 +--
arch/x86/include/asm/atomic64_32.h | 10 +-
arch/x86/include/asm/mshyperv.h | 52 +++++-----
arch/x86/include/asm/paravirt_types.h | 54 +++++------
arch/x86/include/asm/preempt.h | 15 +--
arch/x86/include/asm/processor.h | 17 ++--
arch/x86/include/asm/rwsem.h | 115 +++++++++++------------
arch/x86/include/asm/uaccess.h | 24 ++---
arch/x86/include/asm/xen/hypercall.h | 59 ++++++------
arch/x86/kvm/emulate.c | 9 +-
arch/x86/kvm/vmx.c | 17 ++--
arch/x86/mm/fault.c | 13 ++-
include/linux/compiler-clang.h | 2 +
include/linux/compiler-gcc.h | 19 ++++
include/linux/compiler.h | 5 +
tools/objtool/Documentation/stack-validation.txt | 19 ++--
17 files changed, 237 insertions(+), 235 deletions(-)

diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 53f18258c86f..d10179652999 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -196,12 +196,11 @@ static inline int alternatives_text_reserved(void *start, void *end)
*/
#define alternative_call(oldfunc, newfunc, feature, outputs, inputs, \
clobbers...) \
- asm volatile (ALTERNATIVE("call %P[old]", "call %P[new]", \
- feature), \
- : outputs \
- : [old] "i" (oldfunc), [new] "i" (newfunc) \
- ARGS_APPEND(inputs) \
- CLOBBERS_APPEND(clobbers))
+ ASM_CALL(ALTERNATIVE("call %P[old]", "call %P[new]", feature), \
+ OUTPUTS(outputs), \
+ INPUTS([old] "i" (oldfunc), [new] "i" (newfunc) \
+ ARGS_APPEND(inputs)), \
+ clobbers)

/*
* Like alternative_call, but there are two features and respective functions.
@@ -211,16 +210,13 @@ static inline int alternatives_text_reserved(void *start, void *end)
*/
#define alternative_call_2(oldfunc, newfunc1, feature1, newfunc2, \
feature2, outputs, inputs, clobbers...) \
-{ \
- register void *__sp asm(_ASM_SP); \
- asm volatile (ALTERNATIVE_2("call %P[old]", \
- "call %P[new1]", feature1, \
- "call %P[new2]", feature2) \
- : "+r" (__sp) ARGS_APPEND(outputs) \
- : [old] "i" (oldfunc), [new1] "i" (newfunc1), \
- [new2] "i" (newfunc2) ARGS_APPEND(inputs) \
- CLOBBERS_APPEND(clobbers)); \
-}
+ ASM_CALL(ALTERNATIVE_2("call %P[old]", \
+ "call %P[new1]", feature1, \
+ "call %P[new2]", feature2), \
+ OUTPUTS(outputs), \
+ INPUTS([old] "i" (oldfunc), [new1] "i" (newfunc1), \
+ [new2] "i" (newfunc2) ARGS_APPEND(inputs)), \
+ clobbers)

#endif /* __ASSEMBLY__ */

diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h
index e7cd63175de4..3da74407eca0 100644
--- a/arch/x86/include/asm/arch_hweight.h
+++ b/arch/x86/include/asm/arch_hweight.h
@@ -23,9 +23,10 @@ static __always_inline unsigned int __arch_hweight32(unsigned int w)
{
unsigned int res;

- asm (ALTERNATIVE("call __sw_hweight32", POPCNT32, X86_FEATURE_POPCNT)
- : "="REG_OUT (res)
- : REG_IN (w));
+ ASM_CALL(ALTERNATIVE("call __sw_hweight32", POPCNT32,
+ X86_FEATURE_POPCNT),
+ OUTPUTS("="REG_OUT (res)),
+ INPUTS(REG_IN (w)));

return res;
}
@@ -51,9 +52,10 @@ static __always_inline unsigned long __arch_hweight64(__u64 w)
{
unsigned long res;

- asm (ALTERNATIVE("call __sw_hweight64", POPCNT64, X86_FEATURE_POPCNT)
- : "="REG_OUT (res)
- : REG_IN (w));
+ ASM_CALL(ALTERNATIVE("call __sw_hweight64", POPCNT64,
+ X86_FEATURE_POPCNT),
+ OUTPUTS("="REG_OUT (res)),
+ INPUTS(REG_IN (w)));

return res;
}
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index 7ab0efe8a13d..cb74b47ea71b 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -23,11 +23,11 @@ typedef struct {

#ifdef CONFIG_X86_CMPXCHG64
#define __alternative_atomic64(f, g, outputs, inputs, clobbers...) \
- asm volatile("call %P[func]" \
- : outputs \
- : [func] "i" (atomic64_##g##_cx8) \
- ARGS_APPEND(inputs) \
- CLOBBERS_APPEND(clobbers))
+ ASM_CALL("call %P[func]", \
+ OUTPUTS(outputs), \
+ INPUTS([func] "i" (atomic64_##g##_cx8) \
+ ARGS_APPEND(inputs)), \
+ clobbers)

#define ATOMIC64_DECL(sym) ATOMIC64_DECL_ONE(sym##_cx8)
#else
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index d0675d58fa32..d46b384ec987 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -177,19 +177,17 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
u64 input_address = input ? virt_to_phys(input) : 0;
u64 output_address = output ? virt_to_phys(output) : 0;
u64 hv_status;
- register void *__sp asm(_ASM_SP);

#ifdef CONFIG_X86_64
if (!hv_hypercall_pg)
return U64_MAX;

- __asm__ __volatile__("mov %[out], %%r8\n"
- "call *%[pg]"
- : "=a" (hv_status), "+r" (__sp),
- "+c" (control), "+d" (input_address)
- : [out] "r" (output_address),
- [pg] "m" (hv_hypercall_pg)
- : "cc", "memory", "r8", "r9", "r10", "r11");
+ ASM_CALL("mov %[out], %%r8\n"
+ "call *%[pg]",
+ OUTPUTS("=a" (hv_status), "+c" (control),
+ "+d" (input_address)),
+ INPUTS([out] "r" (output_address), [pg] "m" (hv_hypercall_pg)),
+ CLOBBERS("cc", "memory", "r8", "r9", "r10", "r11"));
#else
u32 input_address_hi = upper_32_bits(input_address);
u32 input_address_lo = lower_32_bits(input_address);
@@ -199,14 +197,12 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
if (!hv_hypercall_pg)
return U64_MAX;

- __asm__ __volatile__("call *%[pg]"
- : "=A" (hv_status),
- "+c" (input_address_lo), "+r" (__sp)
- : "A" (control),
- "b" (input_address_hi),
- "D"(output_address_hi), "S"(output_address_lo),
- [pg] "m" (hv_hypercall_pg)
- : "cc", "memory");
+ ASM_CALL("call *%[pg]",
+ OUTPUTS("=A" (hv_status), "+c" (input_address_lo)),
+ INPUTS("A" (control), "b" (input_address_hi),
+ "D" (output_address_hi), "S" (output_address_lo),
+ [pg] "m" (hv_hypercall_pg)),
+ CLOBBERS("cc", "memory"));
#endif /* !x86_64 */
return hv_status;
}
@@ -223,29 +219,25 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
{
u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
- register void *__sp asm(_ASM_SP);

#ifdef CONFIG_X86_64
{
- __asm__ __volatile__("call *%[pg]"
- : "=a" (hv_status), "+r" (__sp),
- "+c" (control), "+d" (input1)
- : [pg] "m" (hv_hypercall_pg)
- : "cc", "r8", "r9", "r10", "r11");
+ ASM_CALL("call *%[pg]",
+ OUTPUTS("=a" (hv_status), "+c" (control),
+ "+d" (input1)),
+ INPUTS([pg] "m" (hv_hypercall_pg)),
+ CLOBBERS("cc", "r8", "r9", "r10", "r11"));
}
#else
{
u32 input1_hi = upper_32_bits(input1);
u32 input1_lo = lower_32_bits(input1);

- __asm__ __volatile__ ("call *%[pg]"
- : "=A"(hv_status),
- "+c"(input1_lo),
- "+r"(__sp)
- : "A" (control),
- "b" (input1_hi),
- [pg] "m" (hv_hypercall_pg)
- : "cc", "edi", "esi");
+ ASM_CALL("call *%[pg]",
+ OUTPUTS("=A" (hv_status), "+c" (input1_lo)),
+ INPUTS("A" (control), "b" (input1_hi),
+ [pg] "m" (hv_hypercall_pg)),
+ CLOBBERS("cc", "edi", "esi"));
}
#endif
return hv_status;
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index a509259a3181..e97143fbc4c0 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -471,8 +471,7 @@ int paravirt_disable_iospace(void);
*/
#ifdef CONFIG_X86_32
#define PVOP_VCALL_ARGS \
- unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx; \
- register void *__sp asm("esp")
+ unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx;
#define PVOP_CALL_ARGS PVOP_VCALL_ARGS

#define PVOP_CALL_ARG1(x) "a" ((unsigned long)(x))
@@ -492,8 +491,7 @@ int paravirt_disable_iospace(void);
/* [re]ax isn't an arg, but the return val */
#define PVOP_VCALL_ARGS \
unsigned long __edi = __edi, __esi = __esi, \
- __edx = __edx, __ecx = __ecx, __eax = __eax; \
- register void *__sp asm("rsp")
+ __edx = __edx, __ecx = __ecx, __eax = __eax;
#define PVOP_CALL_ARGS PVOP_VCALL_ARGS

#define PVOP_CALL_ARG1(x) "D" ((unsigned long)(x))
@@ -541,24 +539,24 @@ int paravirt_disable_iospace(void);
/* This is 32-bit specific, but is okay in 64-bit */ \
/* since this condition will never hold */ \
if (sizeof(rettype) > sizeof(unsigned long)) { \
- asm volatile(pre \
- paravirt_alt(PARAVIRT_CALL) \
- post \
- : outputs, "+r" (__sp) \
- : paravirt_type(op), \
- paravirt_clobber(clbr), \
- ##__VA_ARGS__ \
- : "memory", "cc" extra_clbr); \
+ ASM_CALL(pre \
+ paravirt_alt(PARAVIRT_CALL) \
+ post, \
+ OUTPUTS(outputs), \
+ INPUTS(paravirt_type(op), \
+ paravirt_clobber(clbr), \
+ ##__VA_ARGS__), \
+ CLOBBERS("memory", "cc" extra_clbr)); \
__ret = (rettype)((((u64)__edx) << 32) | __eax); \
} else { \
- asm volatile(pre \
- paravirt_alt(PARAVIRT_CALL) \
- post \
- : outputs, "+r" (__sp) \
- : paravirt_type(op), \
- paravirt_clobber(clbr), \
- ##__VA_ARGS__ \
- : "memory", "cc" extra_clbr); \
+ ASM_CALL(pre \
+ paravirt_alt(PARAVIRT_CALL) \
+ post, \
+ OUTPUTS(outputs), \
+ INPUTS(paravirt_type(op), \
+ paravirt_clobber(clbr), \
+ ##__VA_ARGS__), \
+ CLOBBERS("memory", "cc" extra_clbr)); \
__ret = (rettype)(__eax & PVOP_RETMASK(rettype)); \
} \
__ret; \
@@ -578,14 +576,14 @@ int paravirt_disable_iospace(void);
({ \
PVOP_VCALL_ARGS; \
PVOP_TEST_NULL(op); \
- asm volatile(pre \
- paravirt_alt(PARAVIRT_CALL) \
- post \
- : outputs, "+r" (__sp) \
- : paravirt_type(op), \
- paravirt_clobber(clbr), \
- ##__VA_ARGS__ \
- : "memory", "cc" extra_clbr); \
+ ASM_CALL(pre \
+ paravirt_alt(PARAVIRT_CALL) \
+ post, \
+ OUTPUTS(outputs), \
+ INPUTS(paravirt_type(op), \
+ paravirt_clobber(clbr), \
+ ##__VA_ARGS__), \
+ CLOBBERS("memory", "cc" extra_clbr)); \
})

#define __PVOP_VCALL(op, pre, post, ...) \
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index ec1f3c651150..0d9316a73af0 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -100,19 +100,14 @@ static __always_inline bool should_resched(int preempt_offset)

#ifdef CONFIG_PREEMPT
extern asmlinkage void ___preempt_schedule(void);
-# define __preempt_schedule() \
-({ \
- register void *__sp asm(_ASM_SP); \
- asm volatile ("call ___preempt_schedule" : "+r"(__sp)); \
-})
+# define __preempt_schedule() \
+ ASM_CALL("call ___preempt_schedule", OUTPUTS(), INPUTS())

extern asmlinkage void preempt_schedule(void);
extern asmlinkage void ___preempt_schedule_notrace(void);
-# define __preempt_schedule_notrace() \
-({ \
- register void *__sp asm(_ASM_SP); \
- asm volatile ("call ___preempt_schedule_notrace" : "+r"(__sp)); \
-})
+# define __preempt_schedule_notrace() \
+ ASM_CALL("call ___preempt_schedule_notrace", OUTPUTS(), INPUTS())
+
extern asmlinkage void preempt_schedule_notrace(void);
#endif

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 44d7c6f033c9..8f5cd252be53 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -677,20 +677,21 @@ static inline void sync_core(void)
* Like all of Linux's memory ordering operations, this is a
* compiler barrier as well.
*/
- register void *__sp asm(_ASM_SP);

#ifdef CONFIG_X86_32
- asm volatile (
+ ASM_CALL(
"pushfl\n\t"
"pushl %%cs\n\t"
"pushl $1f\n\t"
"iret\n\t"
- "1:"
- : "+r" (__sp) : : "memory");
+ "1:",
+ OUTPUTS(),
+ INPUTS(),
+ CLOBBERS("memory"));
#else
unsigned int tmp;

- asm volatile (
+ ASM_CALL(
UNWIND_HINT_SAVE
"mov %%ss, %[tmp]\n\t"
"pushq %q[tmp]\n\t"
@@ -702,8 +703,10 @@ static inline void sync_core(void)
"pushq $1f\n\t"
"iretq\n\t"
UNWIND_HINT_RESTORE
- "1:"
- : [tmp] "=&r" (tmp), "+r" (__sp) : : "cc", "memory");
+ "1:",
+ OUTPUTS([tmp] "=&r" (tmp)),
+ INPUTS(),
+ CLOBBERS("cc", "memory"));
#endif
}

diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h
index b715152fb2b5..72253028b740 100644
--- a/arch/x86/include/asm/rwsem.h
+++ b/arch/x86/include/asm/rwsem.h
@@ -62,16 +62,16 @@
*/
static inline void __down_read(struct rw_semaphore *sem)
{
- asm volatile("# beginning down_read\n\t"
- LOCK_PREFIX _ASM_INC "(%[sem])\n\t"
- /* adds 0x00000001 */
- " jns 1f\n\t"
- " call call_rwsem_down_read_failed\n\t"
- "1:\n\t"
- "# ending down_read\n\t"
- : "+m" (sem->count)
- : [sem] "a" (sem)
- : "memory", "cc");
+ ASM_CALL("# beginning down_read\n\t"
+ LOCK_PREFIX _ASM_INC "(%[sem])\n\t"
+ /* adds 0x00000001 */
+ " jns 1f\n\t"
+ " call call_rwsem_down_read_failed\n\t"
+ "1:\n\t"
+ "# ending down_read\n\t",
+ OUTPUTS("+m" (sem->count)),
+ INPUTS([sem] "a" (sem)),
+ CLOBBERS("memory", "cc"));
}

/*
@@ -104,22 +104,21 @@ static inline bool __down_read_trylock(struct rw_semaphore *sem)
({ \
long tmp; \
struct rw_semaphore* ret; \
- register void *__sp asm(_ASM_SP); \
\
- asm volatile("# beginning down_write\n\t" \
- LOCK_PREFIX " xadd %[tmp],(%[sem])\n\t" \
- /* adds 0xffff0001, returns the old value */ \
- " test " __ASM_SEL_RAW(%w,%k) "[tmp]," \
- __ASM_SEL_RAW(%w,%k) "[tmp]\n\t" \
- /* was the active mask 0 before? */ \
- " jz 1f\n" \
- " call " slow_path "\n\t" \
- "1:\n\t" \
- "# ending down_write\n\t" \
- : "+m" (sem->count), [tmp] "=d" (tmp), "=a" (ret), \
- "+r" (__sp) \
- : [sem] "a" (sem), "d" (RWSEM_ACTIVE_WRITE_BIAS) \
- : "memory", "cc"); \
+ ASM_CALL("# beginning down_write\n\t" \
+ LOCK_PREFIX " xadd %[tmp],(%[sem])\n\t" \
+ /* adds 0xffff0001, returns the old value */ \
+ " test " __ASM_SEL_RAW(%w,%k) "[tmp]," \
+ __ASM_SEL_RAW(%w,%k) "[tmp]\n\t" \
+ /* was the active mask 0 before? */ \
+ " jz 1f\n" \
+ " call " slow_path "\n\t" \
+ "1:\n\t" \
+ "# ending down_write\n\t", \
+ OUTPUTS("+m" (sem->count), [tmp] "=d" (tmp), \
+ "=a" (ret)), \
+ INPUTS([sem] "a" (sem), "d" (RWSEM_ACTIVE_WRITE_BIAS)),\
+ CLOBBERS("memory", "cc")); \
ret; \
})

@@ -170,16 +169,16 @@ static inline bool __down_write_trylock(struct rw_semaphore *sem)
static inline void __up_read(struct rw_semaphore *sem)
{
long tmp;
- asm volatile("# beginning __up_read\n\t"
- LOCK_PREFIX " xadd %[tmp],(%[sem])\n\t"
- /* subtracts 1, returns the old value */
- " jns 1f\n\t"
- " call call_rwsem_wake\n" /* expects old value in %edx */
- "1:\n"
- "# ending __up_read\n"
- : "+m" (sem->count), [tmp] "=d" (tmp)
- : [sem] "a" (sem), "d" (-RWSEM_ACTIVE_READ_BIAS)
- : "memory", "cc");
+ ASM_CALL("# beginning __up_read\n\t"
+ LOCK_PREFIX " xadd %[tmp],(%[sem])\n\t"
+ /* subtracts 1, returns the old value */
+ " jns 1f\n\t"
+ " call call_rwsem_wake\n\t" /* expects old value in %edx */
+ "1:\n\t"
+ "# ending __up_read\n\t",
+ OUTPUTS("+m" (sem->count), [tmp] "=d" (tmp)),
+ INPUTS([sem] "a" (sem), "d" (-RWSEM_ACTIVE_READ_BIAS)),
+ CLOBBERS("memory", "cc"));
}

/*
@@ -188,16 +187,16 @@ static inline void __up_read(struct rw_semaphore *sem)
static inline void __up_write(struct rw_semaphore *sem)
{
long tmp;
- asm volatile("# beginning __up_write\n\t"
- LOCK_PREFIX " xadd %[tmp],(%[sem])\n\t"
- /* subtracts 0xffff0001, returns the old value */
- " jns 1f\n\t"
- " call call_rwsem_wake\n" /* expects old value in %edx */
- "1:\n\t"
- "# ending __up_write\n"
- : "+m" (sem->count), [tmp] "=d" (tmp)
- : [sem] "a" (sem), "d" (-RWSEM_ACTIVE_WRITE_BIAS)
- : "memory", "cc");
+ ASM_CALL("# beginning __up_write\n\t"
+ LOCK_PREFIX " xadd %[tmp],(%[sem])\n\t"
+ /* subtracts 0xffff0001, returns the old value */
+ " jns 1f\n\t"
+ " call call_rwsem_wake\n\t" /* expects old value in %edx */
+ "1:\n\t"
+ "# ending __up_write\n\t",
+ OUTPUTS("+m" (sem->count), [tmp] "=d" (tmp)),
+ INPUTS([sem] "a" (sem), "d" (-RWSEM_ACTIVE_WRITE_BIAS)),
+ CLOBBERS("memory", "cc"));
}

/*
@@ -205,19 +204,19 @@ static inline void __up_write(struct rw_semaphore *sem)
*/
static inline void __downgrade_write(struct rw_semaphore *sem)
{
- asm volatile("# beginning __downgrade_write\n\t"
- LOCK_PREFIX _ASM_ADD "%[bias],(%[sem])\n\t"
- /*
- * transitions 0xZZZZ0001 -> 0xYYYY0001 (i386)
- * 0xZZZZZZZZ00000001 -> 0xYYYYYYYY00000001 (x86_64)
- */
- " jns 1f\n\t"
- " call call_rwsem_downgrade_wake\n"
- "1:\n\t"
- "# ending __downgrade_write\n"
- : "+m" (sem->count)
- : [sem] "a" (sem), [bias] "er" (-RWSEM_WAITING_BIAS)
- : "memory", "cc");
+ ASM_CALL("# beginning __downgrade_write\n\t"
+ LOCK_PREFIX _ASM_ADD "%[bias],(%[sem])\n\t"
+ /*
+ * transitions 0xZZZZ0001 -> 0xYYYY0001 (i386)
+ * 0xZZZZZZZZ00000001 -> 0xYYYYYYYY00000001 (x86_64)
+ */
+ " jns 1f\n\t"
+ " call call_rwsem_downgrade_wake\n\t"
+ "1:\n\t"
+ "# ending __downgrade_write\n\t",
+ OUTPUTS("+m" (sem->count)),
+ INPUTS([sem] "a" (sem), [bias] "er" (-RWSEM_WAITING_BIAS)),
+ CLOBBERS("memory", "cc"));
}

#endif /* __KERNEL__ */
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 12fb37310872..1d43478f95bd 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -166,20 +166,20 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
({ \
int __ret_gu; \
register __inttype(*(ptr)) __val_gu asm("%"_ASM_DX); \
- register void *__sp asm(_ASM_SP); \
__chk_user_ptr(ptr); \
might_fault(); \
- asm volatile("call __get_user_%P[size]" \
- : "=a" (__ret_gu), "=r" (__val_gu), "+r" (__sp) \
- : "a" (ptr), [size] "i" (sizeof(*(ptr)))); \
+ ASM_CALL("call __get_user_%P[size]", \
+ OUTPUTS("=a" (__ret_gu), "=r" (__val_gu)), \
+ INPUTS("a" (ptr), [size] "i" (sizeof(*(ptr))))); \
(x) = (__force __typeof__(*(ptr))) __val_gu; \
__builtin_expect(__ret_gu, 0); \
})

-#define __put_user_x(size, x, ptr, __ret_pu) \
- asm volatile("call __put_user_" #size : "=a" (__ret_pu) \
- : "a" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx")
-
+#define __put_user_x(size, x, ptr, __ret_pu) \
+ ASM_CALL("call __put_user_" #size, \
+ OUTPUTS("=a" (__ret_pu)), \
+ INPUTS("a" ((typeof(*(ptr)))(x)), "c" (ptr)), \
+ CLOBBERS("ebx"))


#ifdef CONFIG_X86_32
@@ -206,9 +206,11 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
_ASM_EXTABLE_EX(2b, 3b) \
: : "A" (x), "r" (addr))

-#define __put_user_x8(x, ptr, __ret_pu) \
- asm volatile("call __put_user_8" : "=a" (__ret_pu) \
- : "A" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx")
+#define __put_user_x8(x, ptr, __ret_pu) \
+ ASM_CALL("call __put_user_8", \
+ OUTPUTS("=a" (__ret_pu)), \
+ INPUTS("A" ((typeof(*(ptr)))(x)), "c" (ptr)), \
+ CLOBBERS("ebx"))
#else
#define __put_user_asm_u64(x, ptr, retval, errret) \
__put_user_asm(x, ptr, retval, "q", "", "er", errret)
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index 9606688caa4b..7f205139525d 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -114,9 +114,8 @@ extern struct { char _entry[32]; } hypercall_page[];
register unsigned long __arg3 asm(__HYPERCALL_ARG3REG) = __arg3; \
register unsigned long __arg4 asm(__HYPERCALL_ARG4REG) = __arg4; \
register unsigned long __arg5 asm(__HYPERCALL_ARG5REG) = __arg5; \
- register void *__sp asm(_ASM_SP);

-#define __HYPERCALL_0PARAM "=r" (__res), "+r" (__sp)
+#define __HYPERCALL_0PARAM "=r" (__res)
#define __HYPERCALL_1PARAM __HYPERCALL_0PARAM, "+r" (__arg1)
#define __HYPERCALL_2PARAM __HYPERCALL_1PARAM, "+r" (__arg2)
#define __HYPERCALL_3PARAM __HYPERCALL_2PARAM, "+r" (__arg3)
@@ -146,10 +145,10 @@ extern struct { char _entry[32]; } hypercall_page[];
({ \
__HYPERCALL_DECLS; \
__HYPERCALL_0ARG(); \
- asm volatile (__HYPERCALL \
- : __HYPERCALL_0PARAM \
- : __HYPERCALL_ENTRY(name) \
- : __HYPERCALL_CLOBBER0); \
+ ASM_CALL(__HYPERCALL, \
+ OUTPUTS(__HYPERCALL_0PARAM), \
+ INPUTS(__HYPERCALL_ENTRY(name)), \
+ CLOBBERS(__HYPERCALL_CLOBBER0)); \
(type)__res; \
})

@@ -157,10 +156,10 @@ extern struct { char _entry[32]; } hypercall_page[];
({ \
__HYPERCALL_DECLS; \
__HYPERCALL_1ARG(a1); \
- asm volatile (__HYPERCALL \
- : __HYPERCALL_1PARAM \
- : __HYPERCALL_ENTRY(name) \
- : __HYPERCALL_CLOBBER1); \
+ ASM_CALL(__HYPERCALL, \
+ OUTPUTS(__HYPERCALL_1PARAM), \
+ INPUTS(__HYPERCALL_ENTRY(name)), \
+ CLOBBERS(__HYPERCALL_CLOBBER1)); \
(type)__res; \
})

@@ -168,10 +167,10 @@ extern struct { char _entry[32]; } hypercall_page[];
({ \
__HYPERCALL_DECLS; \
__HYPERCALL_2ARG(a1, a2); \
- asm volatile (__HYPERCALL \
- : __HYPERCALL_2PARAM \
- : __HYPERCALL_ENTRY(name) \
- : __HYPERCALL_CLOBBER2); \
+ ASM_CALL(__HYPERCALL, \
+ OUTPUTS(__HYPERCALL_2PARAM), \
+ INPUTS(__HYPERCALL_ENTRY(name)), \
+ CLOBBERS(__HYPERCALL_CLOBBER2)); \
(type)__res; \
})

@@ -179,10 +178,10 @@ extern struct { char _entry[32]; } hypercall_page[];
({ \
__HYPERCALL_DECLS; \
__HYPERCALL_3ARG(a1, a2, a3); \
- asm volatile (__HYPERCALL \
- : __HYPERCALL_3PARAM \
- : __HYPERCALL_ENTRY(name) \
- : __HYPERCALL_CLOBBER3); \
+ ASM_CALL(__HYPERCALL, \
+ OUTPUTS(__HYPERCALL_3PARAM), \
+ INPUTS(__HYPERCALL_ENTRY(name)), \
+ CLOBBERS(__HYPERCALL_CLOBBER3)); \
(type)__res; \
})

@@ -190,10 +189,10 @@ extern struct { char _entry[32]; } hypercall_page[];
({ \
__HYPERCALL_DECLS; \
__HYPERCALL_4ARG(a1, a2, a3, a4); \
- asm volatile (__HYPERCALL \
- : __HYPERCALL_4PARAM \
- : __HYPERCALL_ENTRY(name) \
- : __HYPERCALL_CLOBBER4); \
+ ASM_CALL(__HYPERCALL, \
+ OUTPUTS(__HYPERCALL_4PARAM), \
+ INPUTS(__HYPERCALL_ENTRY(name)), \
+ CLOBBERS(__HYPERCALL_CLOBBER4)); \
(type)__res; \
})

@@ -201,10 +200,10 @@ extern struct { char _entry[32]; } hypercall_page[];
({ \
__HYPERCALL_DECLS; \
__HYPERCALL_5ARG(a1, a2, a3, a4, a5); \
- asm volatile (__HYPERCALL \
- : __HYPERCALL_5PARAM \
- : __HYPERCALL_ENTRY(name) \
- : __HYPERCALL_CLOBBER5); \
+ ASM_CALL(__HYPERCALL, \
+ OUTPUTS(__HYPERCALL_5PARAM), \
+ INPUTS(__HYPERCALL_ENTRY(name)), \
+ CLOBBERS(__HYPERCALL_CLOBBER5)); \
(type)__res; \
})

@@ -218,10 +217,10 @@ privcmd_call(unsigned call,
__HYPERCALL_5ARG(a1, a2, a3, a4, a5);

stac();
- asm volatile("call *%[call]"
- : __HYPERCALL_5PARAM
- : [call] "a" (&hypercall_page[call])
- : __HYPERCALL_CLOBBER5);
+ ASM_CALL("call *%[call]",
+ OUTPUTS(__HYPERCALL_5PARAM),
+ INPUTS([call] "a" (&hypercall_page[call])),
+ CLOBBERS(__HYPERCALL_CLOBBER5));
clac();

return (long)__res;
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index fb0055953fbc..d1ac8b58e5df 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -5284,16 +5284,15 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt,

static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
{
- register void *__sp asm(_ASM_SP);
ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;

if (!(ctxt->d & ByteOp))
fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE;

- asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n"
- : "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags),
- [fastop]"+S"(fop), "+r"(__sp)
- : "c"(ctxt->src2.val));
+ ASM_CALL("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n",
+ OUTPUTS("+a"(ctxt->dst.val), "+d"(ctxt->src.val),
+ [flags]"+D"(flags), [fastop]"+S"(fop)),
+ INPUTS("c"(ctxt->src2.val)));

ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
if (!fop) /* exception is returned in fop variable */
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 70b90c0810d0..16f0c782204f 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -8765,7 +8765,6 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
{
u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
- register void *__sp asm(_ASM_SP);

if ((exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK))
== (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR)) {
@@ -8780,7 +8779,7 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
desc = (gate_desc *)vmx->host_idt_base + vector;
entry = gate_offset(desc);
- asm volatile(
+ ASM_CALL(
#ifdef CONFIG_X86_64
"mov %%" _ASM_SP ", %[sp]\n\t"
"and $0xfffffffffffffff0, %%" _ASM_SP "\n\t"
@@ -8789,16 +8788,14 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
#endif
"pushf\n\t"
__ASM_SIZE(push) " $%c[cs]\n\t"
- "call *%[entry]\n\t"
- :
+ "call *%[entry]\n\t",
#ifdef CONFIG_X86_64
- [sp]"=&r"(tmp),
+ [sp]"=&r"(tmp)
#endif
- "+r"(__sp)
- :
- [entry]"r"(entry),
- [ss]"i"(__KERNEL_DS),
- [cs]"i"(__KERNEL_CS)
+ OUTPUTS(),
+ INPUTS([entry] "r" (entry),
+ [ss] "i" (__KERNEL_DS),
+ [cs] "i" (__KERNEL_CS))
);
}
}
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index b836a7274e12..395fb8108744 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -806,7 +806,6 @@ no_context(struct pt_regs *regs, unsigned long error_code,
if (is_vmalloc_addr((void *)address) &&
(((unsigned long)tsk->stack - 1 - address < PAGE_SIZE) ||
address - ((unsigned long)tsk->stack + THREAD_SIZE) < PAGE_SIZE)) {
- register void *__sp asm("rsp");
unsigned long stack = this_cpu_read(orig_ist.ist[DOUBLEFAULT_STACK]) - sizeof(void *);
/*
* We're likely to be running with very little stack space
@@ -818,13 +817,13 @@ no_context(struct pt_regs *regs, unsigned long error_code,
* and then double-fault, though, because we're likely to
* break the console driver and lose most of the stack dump.
*/
- asm volatile ("movq %[stack], %%rsp\n\t"
- "call handle_stack_overflow\n\t"
- "1: jmp 1b"
- : "+r" (__sp)
- : "D" ("kernel stack overflow (page fault)"),
+ ASM_CALL("movq %[stack], %%rsp\n\t"
+ "call handle_stack_overflow\n\t"
+ "1: jmp 1b",
+ OUTPUTS(),
+ INPUTS("D" ("kernel stack overflow (page fault)"),
"S" (regs), "d" (address),
- [stack] "rm" (stack));
+ [stack] "rm" (stack)));
unreachable();
}
#endif
diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index de179993e039..8523591a092a 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -15,3 +15,5 @@
* with any version that can compile the kernel
*/
#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
+
+#undef ASM_CALL
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 16d41de92ee3..08e5f24147ed 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -128,6 +128,25 @@
#define __always_unused __attribute__((unused))
#define __mode(x) __attribute__((mode(x)))

+#ifdef CONFIG_FRAME_POINTER
+/*
+ * All x86 inline asm statements with a 'call' instruction must use this macro.
+ * It ensures that GCC sets up the containing function's frame pointer before
+ * inserting the asm.
+ *
+ * WARNING: Positional operand names ("%0") and constraints ("0" (foo)) are
+ * not allowed.
+ */
+#define ASM_CALL(str, outputs, inputs, clobbers...) \
+({ \
+ register void *__sp asm(_ASM_SP); \
+ asm volatile(str \
+ : "+r" (__sp) ARGS_APPEND(outputs) \
+ : inputs \
+ CLOBBERS_APPEND(clobbers)); \
+})
+#endif
+
/* gcc version specific checks */

#if GCC_VERSION < 30200
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index c738966434c1..4843a3843103 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -649,4 +649,9 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
#define CLOBBERS_APPEND(...) \
_CLOBBERS_APPEND(HAS_ARGS(__VA_ARGS__), __VA_ARGS__)

+#ifndef ASM_CALL
+# define ASM_CALL(str, outputs, inputs, clobbers...) \
+ asm volatile(str : outputs : inputs CLOBBERS_APPEND(clobbers))
+#endif
+
#endif /* __LINUX_COMPILER_H */
diff --git a/tools/objtool/Documentation/stack-validation.txt b/tools/objtool/Documentation/stack-validation.txt
index 6a1af43862df..766a00ebf80c 100644
--- a/tools/objtool/Documentation/stack-validation.txt
+++ b/tools/objtool/Documentation/stack-validation.txt
@@ -193,13 +193,8 @@ they mean, and suggestions for how to fix them.

If it's a GCC-compiled .c file, the error may be because the function
uses an inline asm() statement which has a "call" instruction. An
- asm() statement with a call instruction must declare the use of the
- stack pointer in its output operand. For example, on x86_64:
-
- register void *__sp asm("rsp");
- asm volatile("call func" : "+r" (__sp));
-
- Otherwise the stack frame may not get created before the call.
+ asm() statement with a call instruction must use the ASM_CALL macro,
+ which forces the frame pointer to be saved before the call.


2. file.o: warning: objtool: .text+0x53: unreachable instruction
@@ -221,7 +216,7 @@ they mean, and suggestions for how to fix them.
change ENDPROC to END.


-4. file.o: warning: objtool: func(): can't find starting instruction
+3. file.o: warning: objtool: func(): can't find starting instruction
or
file.o: warning: objtool: func()+0x11dd: can't decode instruction

@@ -230,7 +225,7 @@ they mean, and suggestions for how to fix them.
section like .data or .rodata.


-5. file.o: warning: objtool: func()+0x6: unsupported instruction in callable function
+4. file.o: warning: objtool: func()+0x6: unsupported instruction in callable function

This is a kernel entry/exit instruction like sysenter or iret. Such
instructions aren't allowed in a callable function, and are most
@@ -239,7 +234,7 @@ they mean, and suggestions for how to fix them.
annotated with the unwind hint macros in asm/unwind_hints.h.


-6. file.o: warning: objtool: func()+0x26: sibling call from callable instruction with modified stack frame
+5. file.o: warning: objtool: func()+0x26: sibling call from callable instruction with modified stack frame

This is a dynamic jump or a jump to an undefined symbol. Objtool
assumed it's a sibling call and detected that the frame pointer
@@ -253,7 +248,7 @@ they mean, and suggestions for how to fix them.
the unwind hint macros in asm/unwind_hints.h.


-7. file: warning: objtool: func()+0x5c: stack state mismatch
+6. file: warning: objtool: func()+0x5c: stack state mismatch

The instruction's frame pointer state is inconsistent, depending on
which execution path was taken to reach the instruction.
@@ -270,7 +265,7 @@ they mean, and suggestions for how to fix them.
asm/unwind_hints.h.


-8. file.o: warning: objtool: funcA() falls through to next function funcB()
+7. file.o: warning: objtool: funcA() falls through to next function funcB()

This means that funcA() doesn't end with a return instruction or an
unconditional jump, and that objtool has determined that the function
--
2.13.5