Testers wanted! (was Re: "movb" for spin-unlock)

From: Oliver Xymoron (oxymoron@waste.org)
Date: Sun Apr 23 2000 - 20:17:06 EST


On Sun, 23 Apr 2000, [iso-8859-1] Jakob Østergaard wrote:

> On Sun, 23 Apr 2000, Oliver Xymoron wrote:
>
> > Below is Manfred's lock test code if people with Pentium Pros want to
> > bang on it. If it locks up, we have a problem. Set USE_MB to one to use
> > the mov-based unlock. My dual PPro is a later stepping 9 and seems to work
> > just fine. If you have an SMP system with steppings 1-8 (only 1, 2, 6, and
> > 7 should be out there) and can confirm this works for you, that'd be
> > great.
>
> From what I can see in the code, USE_MB should be set to zero for the program
> to use the mov based unlock:
>
> > #if USE_MB == 0
> > "movl $1,%1\n\t" /* set current_state = 1 */
> > #else
> > "lock;bts $0,%1\n\t"
> > #endif

Yep. I only took a quick glance at it before sending it on. Turns out it
doesn't work as I remembered at all. When I said it worked on my machine,
that was also from memory (too lazy to login to the machine in question).

> Dual PPro stepping 1 here. I was just finishing my ``success report'' as the
> program locked up :(

Not a problem - turns out Manfred's program was testing something
different, but it started the discussion about the simpler unlock.

I've hacked it into a new variant that tests what we're trying to test,
and is a fair amount simpler to read (after you get past all of the kernel
cut and paste!). See racefunc(). The idea is see if we can ever acquire a
lock while changes made to data in the previous critical section are still
not visible.

TESTERS:

Anyone with a dual x86, please give this a quick run. I want to hear about
any failures (it'll abort with "state was 1!") and if you have a PPro with
a stepping of less than 9 (which have been rumored to have a problem with
this), tell me about your successes as well. Send me a copy of your
/proc/cpuinfo too. If we can get a solid set of success reports for early
P6s and no failures elsewhere, perhaps we can get this faster locking
method into the kernel.

/*

movb.c

Compile with:

cc -O2 -o movb movb.c -lpthread

undefine MOVB below to test the old spin_unlock variant

*/

#include <stdio.h>
#include <pthread.h>
#include <assert.h>

#define MOVB

#define mb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory")

typedef struct { unsigned long a[100]; } __dummy_lock_t;
#define __dummy_lock(lock) (*(__dummy_lock_t *)(lock))

typedef struct {
        volatile unsigned int lock;
#if SPINLOCK_DEBUG
        unsigned magic;
#endif
} spinlock_t;

#if SPINLOCK_DEBUG
#define SPINLOCK_MAGIC_INIT , SPINLOCK_MAGIC
#else
#define SPINLOCK_MAGIC_INIT /* */
#endif

#define SPIN_LOCK_UNLOCKED (spinlock_t) { 0 SPINLOCK_MAGIC_INIT }

#define spin_lock_init(x) do { *(x) = SPIN_LOCK_UNLOCKED; } while(0)

#define spin_lock_string \
        "\n1:\t" \
        "lock ; btsl $0,%0\n\t" \
        "jc 2f\n" \
        ".section .text.lock,\"ax\"\n" \
        "2:\t" \
        "testb $1,%0\n\t" \
        "rep;nop\n\t" \
        "jne 2b\n\t" \
        "jmp 1b\n" \
        ".previous"

#ifdef MOVB
#define spin_unlock_string \
        "movb $0,%0"
#else
#define spin_unlock_string \
        "lock ; btrl $0,%0"
#endif

extern inline void spin_lock(spinlock_t *lock)
{
#if SPINLOCK_DEBUG
        __label__ here;
here:
        if (lock->magic != SPINLOCK_MAGIC) {
printk("eip: %p\n", &&here);
                BUG();
        }
#endif
        __asm__ __volatile__(
                spin_lock_string
                :"=m" (__dummy_lock(lock)));
}

extern inline void spin_unlock(spinlock_t *lock)
{
#if SPINLOCK_DEBUG
        if (lock->magic != SPINLOCK_MAGIC)
                BUG();
        if (!lock->lock)
                BUG();
#endif
        __asm__ __volatile__(
                spin_unlock_string
                :"=m" (__dummy_lock(lock)));
}

typedef void *threadfunc(void *);

void start_thread(threadfunc *f)
{
        pthread_t thread;
        int res;

        res = pthread_create(&thread,NULL,f,NULL);

        if(res != 0)
                assert(0);
}

static spinlock_t testlock = SPIN_LOCK_UNLOCKED;
volatile int state=0;

void racefunc()
{
        int i, j;
        volatile int delay=50;

        spin_lock(&testlock);
        
        i=state;
        if(i) {
                printf("state was %d!\n", i);
                exit(-1);
        }
        
        /* force change of state on bus */
        state=1;
        mb();
        for(j=0;j<delay;j++)
                ;

        /* swap next two lines to demonstrate race */
        state=0;
        spin_unlock(&testlock);
}

void * cpu1(void *param)
{
        int i, j;
        volatile int delay=1;
        
        for(i=0; i<5000000; i++) {

                for(j=0;j<delay;j++)
                        ;

                racefunc();

                if((i%5000)==0) {
                        printf("delay %d: ok\n",delay);
                        delay++;
                }
        }

        printf("thread %d finished.\n",(int)param);
        exit(0);
}

void* cpu2(void* param)
{
        volatile int delay2 = 300;
        int i=0,j;

        for(;;) {
                for(j=0;j<delay2;j++)
                        ;
                
                racefunc();
        }
}

int main()
{
        printf("movb:\n");
        printf(" starting, please wait.\n");
        start_thread(cpu2);
        cpu1(0);
}

--
 "Love the dolphins," she advised him. "Write by W.A.S.T.E.." 

- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.rutgers.edu Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Sun Apr 23 2000 - 21:00:22 EST