Re: [patch 2.6.17-rc5 1/2] i386 memcpy: use as few moves as possiblefor I/O

From: H. Peter Anvin
Date: Tue May 30 2006 - 23:44:35 EST


Chuck Ebbert wrote:
Chris Lesiak reported that changes to i386's __memcpy() broke his device
because it can't handle byte moves and the new code uses them for
all trailing bytes when the length is not divisible by four. The old
code tried to use a 16-bit move and/or a byte move as needed.

H. Peter Anvin:
"There are only a few semantics that make sense: fixed 8, 16, 32, or 64
bits, plus "optimal"; the latter to be used for anything that doesn't
require a specific transfer size. Logically, an unqualified
"memcpy_to/fromio" should be the optimal size (as few transfers as
possible)"

So add back the old code as __minimal_memcpy and have IO transfers
use that.


I was thinking some more about that, and I suspect the "right" way to do this looks something like the attached code. Note that it assymetric, and that it's probably too long to inline.

I haven't tested this yet, and I probably won't have time to do so this evening.

-hpa /*
* arch/i386/lib/memcpy_io.S
*
* The most general form of memory copy to/from I/O space, used for
* devices which can handle arbitrary transactions with appropriate
* handling of byte enables. The goal is to produce the minimum
* number of naturally aligned transactions on the bus.
*/

#include <linux/config.h>

.globl memcpy_toio
.type memcpy_toio, @function

memcpy_toio:
pushl %edi
pushl %esi

#ifdef CONFIG_REGPARM
movl %eax, %edi
movl %edx, %esi
#else
movl 12(%esp), %eax
movl 16(%esp), %edx
movl 20(%esp), %ecx
#endif

jecxz 1f

testl $1, %edi
jz 2f
movsb
decl %ecx
2:
cmpl $2, %ecx
jb 3f
testl $2, %edi
jz 4f
movsw
decl %ecx
decl %ecx
4:
movl %ecx, %edx
shrl $2, %ecx
jz 5f
rep ; movsl
5:
movl %edx, %ecx
testb $2, %cl
jz 3f
movsw
3:
testb $1, %cl
jz 1f
movsb
1:
pop %esi
pop %edi
ret

.size memcpy_toio, .-memcpy_toio

.globl memcpy_toio
.type memcpy_fromio, @function

memcpy_fromio:
pushl %edi
pushl %esi

#ifdef CONFIG_REGPARM
movl %eax, %edi
movl %edx, %esi
#else
movl 12(%esp), %eax
movl 16(%esp), %edx
movl 20(%esp), %ecx
#endif

jecxz 1f

testl $1, %esi
jz 2f
movsb
decl %ecx
2:
cmpl $2, %ecx
jb 3f
testl $2, %esi
jz 4f
movsw
decl %ecx
decl %ecx
4:
movl %ecx, %edx
shrl $2, %ecx
jz 5f
rep ; movsl
5:
movl %edx, %ecx
testb $2, %cl
jz 3f
movsw
3:
testb $1, %cl
jz 1f
movsb
1:
pop %esi
pop %edi
ret

.size memcpy_fromio, .-memcpy_fromio