[PATCH v2 5/6] powerpc32: cacheable_memcpy becomes memcpy

From: Christophe Leroy
Date: Tue May 19 2015 - 06:08:47 EST


cacheable_memcpy uses dcbz instruction and is more efficient than
memcpy when the destination is in RAM. If the destination is in an
io area, memcpy_toio() is normally used, not memcpy

This patch renames memcpy as generic_memcpy, and renames
cacheable_memcpy as memcpy

On MPC885, we get approximatly 7% increase of the transfer rate
on an FTP reception

Signed-off-by: Christophe Leroy <christophe.leroy@xxxxxx>
---
arch/powerpc/lib/copy_32.S | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S
index 9262071..1d49c74 100644
--- a/arch/powerpc/lib/copy_32.S
+++ b/arch/powerpc/lib/copy_32.S
@@ -129,13 +129,18 @@ _GLOBAL(memset)
* We only use this version if the source and dest don't overlap.
* -- paulus.
*/
-_GLOBAL(cacheable_memcpy)
+_GLOBAL(memmove)
+ cmplw 0,r3,r4
+ bgt backwards_memcpy
+ /* fall through */
+
+_GLOBAL(memcpy)
add r7,r3,r5 /* test if the src & dst overlap */
add r8,r4,r5
cmplw 0,r4,r7
cmplw 1,r3,r8
crand 0,0,4 /* cr0.lt &= cr1.lt */
- blt memcpy /* if regions overlap */
+ blt generic_memcpy /* if regions overlap */

addi r4,r4,-4
addi r6,r3,-4
@@ -201,12 +206,7 @@ _GLOBAL(cacheable_memcpy)
bdnz 40b
65: blr

-_GLOBAL(memmove)
- cmplw 0,r3,r4
- bgt backwards_memcpy
- /* fall through */
-
-_GLOBAL(memcpy)
+_GLOBAL(generic_memcpy)
srwi. r7,r5,3
addi r6,r3,-4
addi r4,r4,-4
--
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/