PowerPC fastpaths for mutex subsystem

From: Joel Schopp
Date: Sat Jan 07 2006 - 12:48:12 EST


This is the second pass at optimizing the fastpath for the new mutex subsystem on PowerPC. I think it is ready to be included in the series with the other mutex patches now. Tested on a 4 core (2 SMT threads/core) Power5 machine with gcc 3.3.2.

Test results from synchro-test.ko:

All tests run for default 5 seconds
Threads semaphores mutexes mutexes+attached
1 63,465,364 58,404,630 62,109,571
4 58,424,282 35,541,297 37,820,794
8 40,731,668 35,541,297 40,281,768
16 38,372,769 37,256,298 41,751,764
32 38,406,895 36,933,675 38,731,571
64 37,232,017 36,222,480 40,766,379

Signed-off-by: Joel Schopp <jschopp@xxxxxxxxxxxxxx> Index: 2.6.15-mutex14/include/asm-powerpc/mutex.h
===================================================================
--- 2.6.15-mutex14.orig/include/asm-powerpc/mutex.h 2006-01-04 14:46:31.%N -0600
+++ 2.6.15-mutex14/include/asm-powerpc/mutex.h 2006-01-06 17:36:09.%N -0600
@@ -1,9 +1,84 @@
/*
- * Pull in the generic implementation for the mutex fastpath.
+ * include/asm-powerpc/mutex.h
*
- * TODO: implement optimized primitives instead, or leave the generic
- * implementation in place, or pick the atomic_xchg() based generic
- * implementation. (see asm-generic/mutex-xchg.h for details)
+ * PowerPC optimized mutex locking primitives
+ *
+ * Please look into asm-generic/mutex-xchg.h for a formal definition.
+ * Copyright (C) 2006 Joel Schopp <jschopp@xxxxxxxxxxxxxx>, IBM
*/
+#ifndef _ASM_MUTEX_H
+#define _ASM_MUTEX_H
+#define __mutex_fastpath_lock(count, fail_fn)\
+do{ \
+ int tmp; \
+ __asm__ __volatile__( \
+"1: lwarx %0,0,%1\n" \
+" addic %0,%0,-1\n" \
+" stwcx. %0,0,%1\n" \
+" bne- 1b\n" \
+ ISYNC_ON_SMP \
+ : "=&r" (tmp) \
+ : "r" (&(count)->counter) \
+ : "cr0", "memory"); \
+ if (unlikely(tmp < 0)) \
+ fail_fn(count); \
+} while (0)
+
+#define __mutex_fastpath_unlock(count, fail_fn)\
+do{ \
+ int tmp; \
+ __asm__ __volatile__(SYNC_ON_SMP\
+"1: lwarx %0,0,%1\n" \
+" addic %0,%0,1\n" \
+" stwcx. %0,0,%1\n" \
+" bne- 1b\n" \
+ : "=&r" (tmp) \
+ : "r" (&(count)->counter) \
+ : "cr0", "memory"); \
+ if (unlikely(tmp <= 0)) \
+ fail_fn(count); \
+} while (0)
+
+
+static inline int
+__mutex_fastpath_trylock(atomic_t* count, int (*fail_fn)(atomic_t*))
+{
+ int tmp;
+ __asm__ __volatile__(
+"1: lwarx %0,0,%1\n"
+" cmpwi 0,%0,1\n"
+" bne- 2f\n"
+" addic %0,%0,-1\n"
+" stwcx. %0,0,%1\n"
+" bne- 1b\n"
+" isync\n"
+"2:"
+ : "=&r" (tmp)
+ : "r" (&(count)->counter)
+ : "cr0", "memory");
+
+ return (int)tmp;
+
+}
+
+#define __mutex_slowpath_needs_to_unlock() 1

-#include <asm-generic/mutex-dec.h>
+static inline int
+__mutex_fastpath_lock_retval(atomic_t* count, int (*fail_fn)(atomic_t *))
+{
+ int tmp;
+ __asm__ __volatile__(
+"1: lwarx %0,0,%1\n"
+" addic %0,%0,-1\n"
+" stwcx. %0,0,%1\n"
+" bne- 1b\n"
+" isync \n"
+ : "=&r" (tmp)
+ : "r" (&(count)->counter)
+ : "cr0", "memory");
+ if (unlikely(tmp < 0))
+ return fail_fn(count);
+ else
+ return 0;
+}
+#endif