Re: Shared memory not SMP safe to user-mode code.

Richard B. Johnson (root@chaos.analogic.com)
Tue, 30 Nov 1999 15:54:43 -0500 (EST)


On Tue, 30 Nov 1999, Alan Cox wrote:

> > Currently, if you were to do the trivial:
> >
> > *ptr = shared_memory;
> >
> > while (!*ptr)
> > ;
> >
> > This will loop forever, even when hardware writes non-zero to shared
> > memory. This is because the CPU "knows" that it didn't write something
>
> If *ptr is in PCI space it will be uncached, if it is in main memory it will
> cause a cache line invalidation on the CPUs holding the line. Any CPU holding
> the like in E or S state will go to I, a CPU holding the line in M state
> may block the write and write its data from cache to main memory then go I
> and allow the transaction to continue.
>
> On non PC hardware things can of course be very different indeed.
>

Well it doen't happen on any of the SMP machines I have tried it on.
There is, however, a software fix.

The following code demostrates that locking using a key value and
a lock variable works (any Jr. CS major knows that). It also shows
that the cache is not updated unless some drastic steps are taken
(see function flush(), which does a "far" return).

In the new code, I use the rdtsc counter for a new key value so
reviewers are not confused. It does not care about interrupts. They
remain enabled.

Now, the test. If you execute this, it will run "forever" with no
problems. However, if you comment-out flush() (or redefine it to do
nothing), it will eventually loop with an impossible lock variable.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <errno.h>
#include <signal.h>
#include <sys/wait.h>
#include <netinet/in.h>
#include <fcntl.h>
#include <sys/ipc.h>
#include <sys/shm.h>
#include <sys/mman.h>

extern void iopl(int);

typedef struct {
unsigned int pad0;
unsigned int pad1;
unsigned int pad2;
unsigned int i;
unsigned int j;
unsigned int spin;
unsigned int pad3;
unsigned int pad4;
} PARS __attribute__((packed));

#define KEY 0xdeadface
#define PAGE_SIZE 0x1000
#define FIRST_FLAGS (IPC_EXCL|IPC_CREAT|SHM_R|SHM_W)
#define NEXT_FLAGS (IPC_EXCL|SHM_R|SHM_W)
#define PROT (PROT_READ|PROT_WRITE)
#define FLAGS (MAP_FIXED|MAP_SHARED)
#if !defined(MAP_FAILED)
#define MAP_FAILED (void *) -1
#endif

/*-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-*/
/*
* This gets a unique number. Since it takes CPU cycles to call this
* thing, it will be different for every caller.
*/
static __inline__ unsigned int unique()
{
int ret;
__asm__ __volatile__( "rdtsc\n"
: "=eax" (ret)
: /* No inputs */
: "eax", "edx" );
return ret;
}
/*-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-*/
static __inline__ void flush()
{
__asm__ __volatile__("pushl %cs\n"
"\tpushl 1f\n"
"\tretl\n"
"\t1:\n"
);
}
#define ERRORS(s) \
{ \
fprintf(stderr, "Error from line %d, file %s, call %s() (%s)\n", \
__LINE__,__FILE__,s,strerror(errno)); \
exit(EXIT_FAILURE); \
}

/*-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-*/
/*
* This initialzes one page of shared memory. If it doesn't exist,
* it is created.
*/
void init_shmem(PARS **pars)
{
int i;
if(*pars == NULL)
{
if((i = shmget(KEY, PAGE_SIZE, FIRST_FLAGS)) < 0)
if((i = shmget(KEY, PAGE_SIZE, NEXT_FLAGS)) < 0)
ERRORS("shmget");
if((*pars = (PARS *) shmat(i, NULL, 0)) == MAP_FAILED)
ERRORS("shmat");
}
}
/*-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-*/
/*
* This detaches a shared memory segment.
*/
void quit_shmem(PARS **pars)
{
if(shmdt((const void *) *pars) < 0)
ERRORS("shmdt");
*pars = NULL;
}
/*-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-*/

int main()
{
PARS *pars = NULL;
volatile unsigned int key;
init_shmem(&pars);
iopl(3);
memset(pars, 0x00, PAGE_SIZE);
fprintf(stderr, "Spin = %u\n", pars->spin);
fprintf(stderr, "J = %u\n", pars->j);
fprintf(stderr, "I = %u\n", pars->i);

switch(fork())
{
case 0:
quit_shmem(&pars);
init_shmem(&pars);
fprintf(stderr, "CHILD\n");
fprintf(stderr, "Spin = %u\n", pars->spin);
fprintf(stderr, "J = %u\n", pars->j);
fprintf(stderr, "I = %u\n", pars->i);
for(;;)
{
while(pars->spin)
key = unique();
pars->spin += key;
if(pars->spin != key)
{
flush();
pars->spin -= key;
fprintf(stderr, " Child spinning with %u\n", pars->spin);
fprintf(stderr, " Key value was %u\n", key);
fprintf(stderr, "Pad0 = %u\n", pars->pad0);
fprintf(stderr, "Pad1 = %u\n", pars->pad1);
fprintf(stderr, "Pad2 = %u\n", pars->pad2);
fprintf(stderr, "Pad3 = %u\n", pars->pad3);
usleep(rand() % 10000);
continue;
}
if(pars->i < pars->j)
fprintf(stderr, "i = %08x, j = %08x\n", pars->i, pars->j);
else
fprintf(stderr, "Good\n");
pars->spin -= key;
}
case -1:
ERRORS("fork");
default:
fprintf(stderr, "PARENT\n");
fprintf(stderr, "Spin = %u\n", pars->spin);
fprintf(stderr, "J = %u\n", pars->j);
fprintf(stderr, "I = %u\n", pars->i);
for(;;)
{
while(pars->spin)
key = unique();
pars->spin += key;
if(pars->spin != key)
{
flush();
pars->spin -= key;
fprintf(stderr, "Parent spinning with %u\n", pars->spin);
fprintf(stderr, " Key value was %u\n", key);
usleep(rand() % 10000);
continue;
}
pars->i++;
pars->j++;
pars->spin -= key;
}
}
return 0;
}

Cheers,
Dick Johnson

Penguin : Linux version 2.3.13 on an i686 machine (400.59 BogoMips).
Warning : It's hard to remain at the trailing edge of technology.

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/