Re: [PATCH] reduce inlined x86 memcpy by 2 bytes

From: Denis Vlasenko
Date: Tue Mar 22 2005 - 01:54:25 EST


On Sunday 20 March 2005 15:17, Adrian Bunk wrote:
> Hi Denis,
>
> what do your benchmarks say about replacing the whole assembler code
> with a
>
> #define __memcpy __builtin_memcpy

It generates call to out-of-line memcpy()
if count is non-constant.

# cat t.c
extern char *a, *b;
extern int n;

void f() {
__builtin_memcpy(a,b,n);
}

void g() {
__builtin_memcpy(a,b,24);
}
# gcc -S -O2 --omit-frame-pointer t.c
# cat t.s
.file "t.c"
.text
.p2align 2,,3
.globl f
.type f, @function
f:
subl $16, %esp
pushl n
pushl b
pushl a
call memcpy
addl $28, %esp
ret
.size f, .-f
.p2align 2,,3
.globl g
.type g, @function
g:
pushl %edi
pushl %esi
movl a, %edi
movl b, %esi
cld
movl $6, %ecx
rep
movsl
popl %esi
popl %edi
ret
.size g, .-g
.section .note.GNU-stack,"",@progbits
.ident "GCC: (GNU) 3.4.1"

Proving that it is slower than inline is left
as an excercise to the reader :)

Kernel one will be inlined always.
void h) { __memcpy(a,b,n);} is
movl n, %eax
pushl %edi
movl %eax, %ecx
pushl %esi
movl a, %edi
movl b, %esi
shrl $2, %ecx
#APP
rep ; movsl
movl %eax,%ecx
andl $3,%ecx
jz 1f
rep ; movsb
1:
#NO_APP
popl %esi
popl %edi
ret
--
vda

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/