[PATCH 1/3 UPDATED] x86, percpu: Add 'percpu_read_stable()' interfacefor cacheable accesses

From: Tejun Heo
Date: Mon Aug 03 2009 - 11:14:05 EST


From: Linus Tolvards <torvalds@xxxxxxxxxxxxxxxxxxxx>

This is very useful for some common things like 'get_current()' and
'get_thread_info()', which can be used multiple times in a function, and
where the result is cacheable.

tj: Added the magical undocumented "P" modifier to UP __percpu_arg()
to force gcc to dereference the pointer value passed in via the
"p" input constraint. Without this, percpu_read_stable() returns
the address of the percpu variable. Also added comment explaining
the difference between percpu_read() and percpu_read_stable().

Signed-off-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Signed-off-by: Tejun Heo <tj@xxxxxxxxxx>
Signed-off-by: H. Peter Anvin <hpa@xxxxxxxxx>
---
It was the missing magic 'P'. Patch titles have been updated as
requested. The next two patches are the same except for the subject
update. The updated patches are available in the following git tree.

git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu.git x86-percpu

Please note that the tree is rebased and needs to be forcibly pulled
into x86/percpu. Thanks.

arch/x86/include/asm/current.h | 2 +-
arch/x86/include/asm/percpu.h | 26 +++++++++++++++++++-------
arch/x86/include/asm/thread_info.h | 2 +-
3 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h
index c68c361..4d447b7 100644
--- a/arch/x86/include/asm/current.h
+++ b/arch/x86/include/asm/current.h
@@ -11,7 +11,7 @@ DECLARE_PER_CPU(struct task_struct *, current_task);

static __always_inline struct task_struct *get_current(void)
{
- return percpu_read(current_task);
+ return percpu_read_stable(current_task);
}

#define current get_current()
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 103f1dd..04eacef 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -49,7 +49,7 @@
#define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x
#define __my_cpu_offset percpu_read(this_cpu_off)
#else
-#define __percpu_arg(x) "%" #x
+#define __percpu_arg(x) "%P" #x
#endif

/*
@@ -104,36 +104,48 @@ do { \
} \
} while (0)

-#define percpu_from_op(op, var) \
+#define percpu_from_op(op, var, constraint) \
({ \
typeof(var) ret__; \
switch (sizeof(var)) { \
case 1: \
asm(op "b "__percpu_arg(1)",%0" \
: "=q" (ret__) \
- : "m" (var)); \
+ : constraint); \
break; \
case 2: \
asm(op "w "__percpu_arg(1)",%0" \
: "=r" (ret__) \
- : "m" (var)); \
+ : constraint); \
break; \
case 4: \
asm(op "l "__percpu_arg(1)",%0" \
: "=r" (ret__) \
- : "m" (var)); \
+ : constraint); \
break; \
case 8: \
asm(op "q "__percpu_arg(1)",%0" \
: "=r" (ret__) \
- : "m" (var)); \
+ : constraint); \
break; \
default: __bad_percpu_size(); \
} \
ret__; \
})

-#define percpu_read(var) percpu_from_op("mov", per_cpu__##var)
+/*
+ * percpu_read() makes gcc load the percpu variable every time it is
+ * accessed while percpu_read_stable() allows the value to be cached.
+ * percpu_read_stable() is more efficient and can be used if its value
+ * is guaranteed to be valid across cpus. The current users include
+ * get_current() and get_thread_info() both of which are actually
+ * per-thread variables implemented as per-cpu variables and thus
+ * stable for the duration of the respective task.
+ */
+#define percpu_read(var) percpu_from_op("mov", per_cpu__##var, \
+ "m" (per_cpu__##var))
+#define percpu_read_stable(var) percpu_from_op("mov", per_cpu__##var, \
+ "p" (&per_cpu__##var))
#define percpu_write(var, val) percpu_to_op("mov", per_cpu__##var, val)
#define percpu_add(var, val) percpu_to_op("add", per_cpu__##var, val)
#define percpu_sub(var, val) percpu_to_op("sub", per_cpu__##var, val)
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index fad7d40..a1bb5a1 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -213,7 +213,7 @@ DECLARE_PER_CPU(unsigned long, kernel_stack);
static inline struct thread_info *current_thread_info(void)
{
struct thread_info *ti;
- ti = (void *)(percpu_read(kernel_stack) +
+ ti = (void *)(percpu_read_stable(kernel_stack) +
KERNEL_STACK_OFFSET - THREAD_SIZE);
return ti;
}
--
1.6.0.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/