Re: PATCH: Altivec support for RAID-6

From: Antonio Vargas
Date: Fri Nov 19 2004 - 16:03:44 EST


On Fri, 19 Nov 2004 13:04:11 +0100, Antonio Vargas <windenntw@xxxxxxxxx> wrote:
> On Fri, 19 Nov 2004 05:05:54 +0000 (UTC), H. Peter Anvin <hpa@xxxxxxxxx> wrote:
> > Followup to: <69304d1104111812234656a606@xxxxxxxxxxxxxx>
> > By author: Antonio Vargas <windenntw@xxxxxxxxx>
> > In newsgroup: linux.dev.kernel
> > >
> > > hpa, are you aware of any other routines which should benefit from altivec?
> > >
> >
> > Presumably the XOR code used by RAID-5, and quite possibly some of the
> > cryptography stuff. Unlike most SIMD instruction sets, it should be
> > possible to write AES using Altivec.
>
> I'll take a crack at the RAID-5 stuff first then.
>

Here we go... this file is compile-tested on gcc 3.3 from userspace,
it can serve as an starting point for testing.

Signed-off-by: Antonio Vargas <windenntw@xxxxxxxxx>

--
Greetz, Antonio Vargas aka winden of network

Las cosas no son lo que parecen, excepto cuando parecen lo que si son.
/*
* include/asm-ppc/xor.h
*
* Altivec optimized RAID-5 checksumming functions.
* Copyright 2004 Antonio Vargas, windenntw@xxxxxxxxx
*
* Based on include/asm-generic/xor.h and drivers/md/raid6altivec.uc
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, version 2.
*
* You should have received a copy of the GNU General Public License
* (for example /usr/src/linux/COPYING); if not, write to the Free
* Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/

#ifdef KERNEL
#include <asm-generic/xor.h>
#endif

#ifdef CONFIG_ALTIVEC

#ifdef KERNEL
#include <altivec.h>
#include <asm/cputable.h>
#include <asm/processor.h>
#include <asm/system.h>
#else
#define prefetch(x)
#define prefetchw(x)
void enable_kernel_altivec(){};
void preempt_enable(){};
void preempt_disable(){};
#endif

typedef vector unsigned char unative_t;

#define BODY\
long lines = bytes / (4 * sizeof(unative_t)) - 1;\
int i;\
preempt_disable();\
enable_kernel_altivec();\
PREF(0)\
do {\
PREF(4);\
CALC(0);\
CALC(1);\
CALC(2);\
CALC(3);\
INC;\
} while (--lines > 0);\
for(i = 0 ; i < 4 ; i++)\
CALC(i);\
preempt_enable();


#define PREF2(x) prefetchw(p1 + x); prefetch(p2 + x);
#define PREF3(x) PREF2(x) prefetch(p3 + x);
#define PREF4(x) PREF3(x) prefetch(p4 + x);
#define PREF5(x) PREF4(x) prefetch(p5 + x);

#define CALC2(x) p1[x] ^= p2[x]
#define CALC3(x) CALC2(x) ^ p3[x]
#define CALC4(x) CALC3(x) ^ p4[x]
#define CALC5(x) CALC4(x) ^ p5[x]

#define INC2 p1 += 4; p2 += 4;
#define INC3 INC2 p3 += 4;
#define INC4 INC3 p4 += 4;
#define INC5 INC4 p5 += 4;

static void
xor_altivec2(unsigned long bytes, unative_t *p1, unative_t *p2)
{
#undef PREF
#undef CALC
#undef INC
#define PREF(x) PREF2(x)
#define CALC(x) CALC2(x)
#define INC INC2
BODY
}

static void
xor_altivec3(unsigned long bytes, unative_t *p1, unative_t *p2, unative_t *p3)
{
#undef PREF
#undef CALC
#undef INC
#define PREF(x) PREF3(x)
#define CALC(x) CALC3(x)
#define INC INC3
BODY
}


static void
xor_altivec4(unsigned long bytes, unative_t *p1, unative_t *p2, unative_t *p3, unative_t *p4)
{
#undef PREF
#undef CALC
#undef INC
#define PREF(x) PREF4(x)
#define CALC(x) CALC4(x)
#define INC INC4
BODY
}

static void
xor_altivec5(unsigned long bytes, unative_t *p1, unative_t *p2, unative_t *p3, unative_t *p4, unative_t *p5)
{
#undef PREF
#undef CALC
#undef INC
#define PREF(x) PREF5(x)
#define CALC(x) CALC5(x)
#define INC INC5
BODY
}

#ifdef KERNEL
static struct xor_block_template xor_block_altivec = {
.name = "altivec",
.do_2 = xor_altivec2,
.do_3 = xor_altivec3,
.do_4 = xor_altivec4,
.do_5 = xor_altivec5,
};

#undef XOR_TRY_TEMPLATES
#define XOR_TRY_TEMPLATES \
do { \
xor_speed(&xor_block_8regs); \
xor_speed(&xor_block_8regs_p); \
xor_speed(&xor_block_32regs); \
xor_speed(&xor_block_32regs_p); \
xor_speed(&xor_block_altivec); \
} while (0)
#endif

#endif