/* test_movntq.c * Program that tests the K7 optimized routines for copying * and zeroing pages (which fail on some MoBos in the kernel). * gcc -O2 -Wall -g -fomit-frame-pointer -o test_movntq test_movntq.c * and run on AMD K7! * (c) Kurt Garloff , 2001-07-28, GNU GPL */ #include #include #include #include #define PAGE_SIZE 4096 #define NR_TESTS 4096 void * fpu_ctx; double c; void trigger_fpu () { double a = 4.3; double b = rand()/ (float)RAND_MAX; c = a/b; } void movntq_copy_page0 (void* to, void* from) { //void *d0, *d1; //printf ("%p <- %p\n", to, from); asm volatile ( "\n\t prefetch (%0)" "\n\t prefetch 64(%0)" "\n\t prefetch 128(%0)" "\n\t prefetch 192(%0)" "\n\t fxsave (%3)" "\n\t prefetch 256(%0)" "\n\t movl %2, %%ecx" "\n\t fnclex" "\n\t1: prefetch 320(%0)" "\n\t movq (%0),%%mm0" "\n\t movntq %%mm0,(%1)" "\n\t movq 8(%0),%%mm1" "\n\t movntq %%mm1,8(%1)" "\n\t movq 16(%0),%%mm2" "\n\t movntq %%mm2,16(%1)" "\n\t movq 24(%0),%%mm3" "\n\t movntq %%mm3,24(%1)" "\n\t movq 32(%0),%%mm4" "\n\t movntq %%mm4,32(%1)" "\n\t movq 40(%0),%%mm5" "\n\t movntq %%mm5,40(%1)" "\n\t movq 48(%0),%%mm6" "\n\t movntq %%mm6,48(%1)" "\n\t movq 56(%0),%%mm7" "\n\t movntq %%mm7,56(%1)" /*"\n\t sfence"*/ "\n\t addl $64,%0" "\n\t addl $64,%1" "\n\t loop 1b" "\n\t movl $5, %%ecx" "\n\t2: movq (%0),%%mm0" "\n\t movntq %%mm0,(%1)" "\n\t movq 8(%0),%%mm1" "\n\t movntq %%mm1,8(%1)" "\n\t movq 16(%0),%%mm2" "\n\t movntq %%mm2,16(%1)" "\n\t movq 24(%0),%%mm3" "\n\t movntq %%mm3,24(%1)" "\n\t movq 32(%0),%%mm4" "\n\t movntq %%mm4,32(%1)" "\n\t movq 40(%0),%%mm5" "\n\t movntq %%mm5,40(%1)" "\n\t movq 48(%0),%%mm6" "\n\t movntq %%mm6,48(%1)" "\n\t movq 56(%0),%%mm7" "\n\t movntq %%mm7,56(%1)" "\n\t addl $64,%0" "\n\t addl $64,%1" "\n\t loop 2b" "\n\t sfence" "\n\t fxrstor (%3) \n" : : "r" (from), "r" (to), "i" (PAGE_SIZE/64 - 5), "r" (fpu_ctx) : "memory", "ecx" ); }; void movntq_zero_page0 (void* to) { //void *d0; //printf ("%p <- 0\n", to); asm volatile ( "\n\t fxsave (%2)" "\n\t movl %1, %%ecx" "\n\t fnclex" "\n\t pxor %%mm0, %%mm0" "\n\t1: " "\n\t movntq %%mm0,(%0)" "\n\t movntq %%mm0,8(%0)" "\n\t movntq %%mm0,16(%0)" "\n\t movntq %%mm0,24(%0)" "\n\t movntq %%mm0,32(%0)" "\n\t movntq %%mm0,40(%0)" "\n\t movntq %%mm0,48(%0)" "\n\t movntq %%mm0,56(%0)" /*"\n\t sfence"*/ "\n\t addl $64,%0" "\n\t loop 1b" "\n\t sfence" "\n\t fxrstor (%2) \n" : : "r" (to), "i" (PAGE_SIZE/64), "r" (fpu_ctx) : "memory", "ecx"); } void alloc_fpu_ctx () { fpu_ctx = (void*) memalign (256, 1024); } void fill_rand_page (void* mem) { int* ptr = (int*) mem; do { *ptr = rand(); } while (( (char*)(++ptr) - (char*)mem) < PAGE_SIZE); } void* memzero (void* mem, size_t ln) { int* ptr = (int*)mem; int i = ln / sizeof(int); while (i--) if (*ptr++ != 0) return (void*)ptr; return 0; } int main () { void *b1, *b2, *b3; void* err; int i; srand (5); alloc_fpu_ctx (); trigger_fpu (); b3 = b1 = (void*) memalign (PAGE_SIZE, (NR_TESTS+1)*PAGE_SIZE); fill_rand_page (b1); for (i = 0; i < NR_TESTS; i++) { b2 = (void*) ((char*)b3 + PAGE_SIZE); movntq_copy_page0 (b2, b3); if (memcmp (b3, b2, PAGE_SIZE)) { printf ("Error (%i)!\n", i); exit (1); } movntq_zero_page0 (b3); if ((err = memzero (b3, PAGE_SIZE))) { printf ("Error! (%i) %p\n", i, err); exit (2); } b3 = b2; } free (b1); free (fpu_ctx); return 0; }