Re: [PATCHv2] arm: Preserve TPIDRURW on context switch

From: Jonathan Austin
Date: Fri May 03 2013 - 05:21:40 EST


Hi AndrÃ,

Will pointed me at this thread and I had a look at fixing
this up yesterday by extending his original patch...

There are a few things about this that aren't quite right. Most
of the comments are cosmetic but there's an issue in copy_thread
that will result in incorrect behaviour, I think.

I've commented below inline and there's a patch at the bottom- can
you let me know if it works for you?

On 02/05/13 20:54, Andrà Hentschel wrote:
> Am 24.04.2013 11:42, schrieb Will Deacon:
>> Hi Andrew,
>>
>> On Tue, Apr 23, 2013 at 11:42:22PM +0100, Andrà Hentschel wrote:
>>> Am 23.04.2013 11:15, schrieb Will Deacon:
>>>> You could introduce `get' tls functions, which don't do anything for CPUs
>>>> without the relevant registers.
>>>
>>> Before i have another round of testing and patch formatting/sending,
>>> what about the untested patch below?
>>
>> Ok. Comments inline.
>
> I answered to that seperatly.
> Here is another try based on your comments:
>
>
>
> diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
> index cddda1f..bb5b48d 100644
> --- a/arch/arm/include/asm/thread_info.h
> +++ b/arch/arm/include/asm/thread_info.h
> @@ -58,7 +58,7 @@ struct thread_info {
> struct cpu_context_save cpu_context; /* cpu context */
> __u32 syscall; /* syscall number */
> __u8 used_cp[16]; /* thread used copro */
> - unsigned long tp_value;
> + unsigned long tp_value[2];
> #ifdef CONFIG_CRUNCH
> struct crunch_state crunchstate;
> #endif
> diff --git a/arch/arm/include/asm/tls.h b/arch/arm/include/asm/tls.h
> index 73409e6..02f8674 100644
> --- a/arch/arm/include/asm/tls.h
> +++ b/arch/arm/include/asm/tls.h
> @@ -2,48 +2,87 @@
> #define __ASMARM_TLS_H
>
> #ifdef __ASSEMBLY__
> + .macro check_hwcap_tls, tmp1
> + ldr \tmp1, =elf_hwcap
> + ldr \tmp1, [\tmp1, #0]
> + tst \tmp1, #HWCAP_TLS @ hardware TLS available?
> + .endm
> +
> +
> + .macro get_tls_none, tp, tmp1
> + .endm
> +
> + .macro get_tls_v6k, tp, tmp1
> + mrc p15, 0, \tmp1, c13, c0, 2 @ get user r/w TLS register
> + str \tmp1, [\tp, #4]
> + .endm
> +
> + .macro get_tls_v6, tp, tmp1
> + check_hwcap_tls \tmp1

I tend to steer clear of asm that requires certain behaviour wrt
the flags, though in this case I think it's probably a sufficiently
self contained case to be okay...

> + mrcne p15, 0, \tmp1, c13, c0, 2 @ get user r/w TLS register
> + strne \tmp1, [\tp, #4]
> + .endm
> +
> +
> .macro set_tls_none, tp, tmp1, tmp2
> .endm
>
> .macro set_tls_v6k, tp, tmp1, tmp2
> - mcr p15, 0, \tp, c13, c0, 3 @ set TLS register
> - mov \tmp1, #0
> - mcr p15, 0, \tmp1, c13, c0, 2 @ clear user r/w TLS register
> + ldrd \tmp1, \tmp2, [\tp]
> + mcr p15, 0, \tmp1, c13, c0, 3 @ set user r/o TLS register
> + mcr p15, 0, \tmp2, c13, c0, 2 @ set user r/w TLS register
> .endm
>
> .macro set_tls_v6, tp, tmp1, tmp2
> - ldr \tmp1, =elf_hwcap
> - ldr \tmp1, [\tmp1, #0]
> mov \tmp2, #0xffff0fff
> - tst \tmp1, #HWCAP_TLS @ hardware TLS available?
> - mcrne p15, 0, \tp, c13, c0, 3 @ yes, set TLS register
> - movne \tmp1, #0
> - mcrne p15, 0, \tmp1, c13, c0, 2 @ clear user r/w TLS register
> - streq \tp, [\tmp2, #-15] @ set TLS value at 0xffff0ff0
> + check_hwcap_tls \tmp1
> + ldrdne \tmp1, \tmp2, [\tp]
> + ldreq \tmp1, [\tp]
> + mcrne p15, 0, \tmp1, c13, c0, 3 @ yes, set user r/o TLS register
> + mcrne p15, 0, \tmp2, c13, c0, 2 @ set user r/w TLS register
> + streq \tmp1, [\tmp2, #-15] @ set TLS value at 0xffff0ff0
> .endm
>
> .macro set_tls_software, tp, tmp1, tmp2
> - mov \tmp1, #0xffff0fff
> - str \tp, [\tmp1, #-15] @ set TLS value at 0xffff0ff0
> + ldr \tmp1, [\tp]
> + mov \tmp2, #0xffff0fff
> + str \tmp1, [\tmp2, #-15] @ set TLS value at 0xffff0ff0
> .endm
> #endif
>
> #ifdef CONFIG_TLS_REG_EMUL
> #define tls_emu 1
> #define has_tls_reg 1
> +#define get_tls get_tls_none

This is different from the set_tls, which deals with both
tpidrurw and tpidruro, so the naming is a little inconsistent here...

> #define set_tls set_tls_none
> #elif defined(CONFIG_CPU_V6)
> #define tls_emu 0
> #define has_tls_reg (elf_hwcap & HWCAP_TLS)
> +#define get_tls get_tls_v6
> #define set_tls set_tls_v6
> #elif defined(CONFIG_CPU_32v6K)
> #define tls_emu 0
> #define has_tls_reg 1
> +#define get_tls get_tls_v6k
> #define set_tls set_tls_v6k
> #else
> #define tls_emu 0
> #define has_tls_reg 0
> +#define get_tls get_tls_none
> #define set_tls set_tls_software
> #endif
>
> +#ifndef __ASSEMBLY__
> +static inline void get_tpidrurw(unsigned long *tpidrurw)

A bit weird to have tpidrurw here but tls elsewhere - I
settled on tlsuser... (see below)

> +{
> + unsigned long t;
> +#ifdef CONFIG_TLS_REG_EMUL
> + return;
> +#endif
> + if (!has_tls_reg) return;
> + __asm__("mcr p15, 0, %0, c13, c0, 2" : : "r" (t));
> + *tpidrurw = t;
> +}
> +#endif
> +
> #endif /* __ASMARM_TLS_H */
> diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
> index 0f82098..2c892b2 100644
> --- a/arch/arm/kernel/entry-armv.S
> +++ b/arch/arm/kernel/entry-armv.S
> @@ -728,7 +728,7 @@ ENTRY(__switch_to)
> UNWIND(.fnstart )
> UNWIND(.cantunwind )
> add ip, r1, #TI_CPU_SAVE
> - ldr r3, [r2, #TI_TP_VALUE]
> + add r3, r1, #TI_TP_VALUE
> ARM( stmia ip!, {r4 - sl, fp, sp, lr} ) @ Store most regs on stack
> THUMB( stmia ip!, {r4 - sl, fp} ) @ Store most regs on stack
> THUMB( str sp, [ip], #4 )
> @@ -736,6 +736,8 @@ ENTRY(__switch_to)
> #ifdef CONFIG_CPU_USE_DOMAINS
> ldr r6, [r2, #TI_CPU_DOMAIN]
> #endif
> + get_tls r3, r4
> + add r3, r2, #TI_TP_VALUE
> set_tls r3, r4, r5
> #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
> ldr r7, [r2, #TI_TASK]
> diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
> index 047d3e4..a13bbc8 100644
> --- a/arch/arm/kernel/process.c
> +++ b/arch/arm/kernel/process.c
> @@ -36,6 +36,7 @@
> #include <asm/cacheflush.h>
> #include <asm/idmap.h>
> #include <asm/processor.h>
> +#include <asm/tls.h>
> #include <asm/thread_notify.h>
> #include <asm/stacktrace.h>
> #include <asm/mach/time.h>
> @@ -395,7 +396,10 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start,
> clear_ptrace_hw_breakpoint(p);
>
> if (clone_flags & CLONE_SETTLS)
> - thread->tp_value = childregs->ARM_r3;
> + {
> + thread->tp_value[0] = childregs->ARM_r3;
> + get_tpidrurw(&thread->tp_value[1]);
> + }

This isn't quite right - the re-reading of tpidrurw should
be independent of CLONE_SETTLS. We should update tpidrurw
from userspace in all cases.

The following is what I've been looking at/testing...
It works on V7 and I've build tested it for 1136 - I would've
sent it yesterday but was getting things set up for testing on
1136 (v6 not k)

----8<-------