Re: [RFC PATCH 7/8] crypto: x86/aes-kl - Support AES algorithm using Key Locker instructions

From: Peter Zijlstra
Date: Fri Dec 18 2020 - 05:12:43 EST


On Wed, Dec 16, 2020 at 09:41:45AM -0800, Chang S. Bae wrote:
> diff --git a/arch/x86/include/asm/inst.h b/arch/x86/include/asm/inst.h
> index bd7f02480ca1..b719a11a2905 100644
> --- a/arch/x86/include/asm/inst.h
> +++ b/arch/x86/include/asm/inst.h
> @@ -122,9 +122,62 @@
> #endif
> .endm
>
> + .macro XMM_NUM opd xmm
> + \opd = REG_NUM_INVALID
> + .ifc \xmm,%xmm0
> + \opd = 0
> + .endif
> + .ifc \xmm,%xmm1
> + \opd = 1
> + .endif
> + .ifc \xmm,%xmm2
> + \opd = 2
> + .endif
> + .ifc \xmm,%xmm3
> + \opd = 3
> + .endif
> + .ifc \xmm,%xmm4
> + \opd = 4
> + .endif
> + .ifc \xmm,%xmm5
> + \opd = 5
> + .endif
> + .ifc \xmm,%xmm6
> + \opd = 6
> + .endif
> + .ifc \xmm,%xmm7
> + \opd = 7
> + .endif
> + .ifc \xmm,%xmm8
> + \opd = 8
> + .endif
> + .ifc \xmm,%xmm9
> + \opd = 9
> + .endif
> + .ifc \xmm,%xmm10
> + \opd = 10
> + .endif
> + .ifc \xmm,%xmm11
> + \opd = 11
> + .endif
> + .ifc \xmm,%xmm12
> + \opd = 12
> + .endif
> + .ifc \xmm,%xmm13
> + \opd = 13
> + .endif
> + .ifc \xmm,%xmm14
> + \opd = 14
> + .endif
> + .ifc \xmm,%xmm15
> + \opd = 15
> + .endif
> + .endm
> +
> .macro REG_TYPE type reg
> R32_NUM reg_type_r32 \reg
> R64_NUM reg_type_r64 \reg
> + XMM_NUM reg_type_xmm \reg
> .if reg_type_r64 <> REG_NUM_INVALID
> \type = REG_TYPE_R64
> .elseif reg_type_r32 <> REG_NUM_INVALID
> @@ -134,6 +187,14 @@
> .endif
> .endm
>
> + .macro PFX_OPD_SIZE
> + .byte 0x66
> + .endm
> +
> + .macro PFX_RPT
> + .byte 0xf3
> + .endm
> +
> .macro PFX_REX opd1 opd2 W=0
> .if ((\opd1 | \opd2) & 8) || \W
> .byte 0x40 | ((\opd1 & 8) >> 3) | ((\opd2 & 8) >> 1) | (\W << 3)
> @@ -158,6 +219,146 @@
> .byte 0x0f, 0xc7
> MODRM 0xc0 rdpid_opd 0x7
> .endm
> +
> + .macro ENCODEKEY128 reg1 reg2
> + R32_NUM encodekey128_opd1 \reg1
> + R32_NUM encodekey128_opd2 \reg2
> + PFX_RPT
> + .byte 0xf, 0x38, 0xfa
> + MODRM 0xc0 encodekey128_opd2 encodekey128_opd1
> + .endm
> +
> + .macro ENCODEKEY256 reg1 reg2
> + R32_NUM encodekey256_opd1 \reg1
> + R32_NUM encodekey256_opd2 \reg2
> + PFX_RPT
> + .byte 0x0f, 0x38, 0xfb
> + MODRM 0xc0 encodekey256_opd1 encodekey256_opd2
> + .endm
> +
> + .macro AESENC128KL reg, xmm
> + REG_TYPE aesenc128kl_opd1_type \reg
> + .if aesenc128kl_opd1_type == REG_TYPE_R64
> + R64_NUM aesenc128kl_opd1 \reg
> + .elseif aesenc128kl_opd1_type == REG_TYPE_R32
> + R32_NUM aesenc128kl_opd1 \reg
> + .else
> + aesenc128kl_opd1 = REG_NUM_INVALID
> + .endif
> + XMM_NUM aesenc128kl_opd2 \xmm
> + PFX_RPT
> + .byte 0x0f, 0x38, 0xdc
> + MODRM 0x0 aesenc128kl_opd1 aesenc128kl_opd2
> + .endm
> +
> + .macro AESDEC128KL reg, xmm
> + REG_TYPE aesdec128kl_opd1_type \reg
> + .if aesdec128kl_opd1_type == REG_TYPE_R64
> + R64_NUM aesdec128kl_opd1 \reg
> + .elseif aesdec128kl_opd1_type == REG_TYPE_R32
> + R32_NUM aesdec128kl_opd1 \reg
> + .else
> + aesdec128kl_opd1 = REG_NUM_INVALID
> + .endif
> + XMM_NUM aesdec128kl_opd2 \xmm
> + PFX_RPT
> + .byte 0x0f, 0x38, 0xdd
> + MODRM 0x0 aesdec128kl_opd1 aesdec128kl_opd2
> + .endm
> +
> + .macro AESENC256KL reg, xmm
> + REG_TYPE aesenc256kl_opd1_type \reg
> + .if aesenc256kl_opd1_type == REG_TYPE_R64
> + R64_NUM aesenc256kl_opd1 \reg
> + .elseif aesenc256kl_opd1_type == REG_TYPE_R32
> + R32_NUM aesenc256kl_opd1 \reg
> + .else
> + aesenc256kl_opd1 = REG_NUM_INVALID
> + .endif
> + XMM_NUM aesenc256kl_opd2 \xmm
> + PFX_RPT
> + .byte 0x0f, 0x38, 0xde
> + MODRM 0x0 aesenc256kl_opd1 aesenc256kl_opd2
> + .endm
> +
> + .macro AESDEC256KL reg, xmm
> + REG_TYPE aesdec256kl_opd1_type \reg
> + .if aesdec256kl_opd1_type == REG_TYPE_R64
> + R64_NUM aesdec256kl_opd1 \reg
> + .elseif aesdec256kl_opd1_type == REG_TYPE_R32
> + R32_NUM aesdec256kl_opd1 \reg
> + .else
> + aesdec256kl_opd1 = REG_NUM_INVALID
> + .endif
> + XMM_NUM aesdec256kl_opd2 \xmm
> + PFX_RPT
> + .byte 0x0f, 0x38, 0xdf
> + MODRM 0x0 aesdec256kl_opd1 aesdec256kl_opd2
> + .endm
> +
> + .macro AESENCWIDE128KL reg
> + REG_TYPE aesencwide128kl_opd1_type \reg
> + .if aesencwide128kl_opd1_type == REG_TYPE_R64
> + R64_NUM aesencwide128kl_opd1 \reg
> + .elseif aesencwide128kl_opd1_type == REG_TYPE_R32
> + R32_NUM aesencwide128kl_opd1 \reg
> + .else
> + aesencwide128kl_opd1 = REG_NUM_INVALID
> + .endif
> + PFX_RPT
> + .byte 0x0f, 0x38, 0xd8
> + MODRM 0x0 aesencwide128kl_opd1 0x0
> + .endm
> +
> + .macro AESDECWIDE128KL reg
> + REG_TYPE aesdecwide128kl_opd1_type \reg
> + .if aesdecwide128kl_opd1_type == REG_TYPE_R64
> + R64_NUM aesdecwide128kl_opd1 \reg
> + .elseif aesdecwide128kl_opd1_type == REG_TYPE_R32
> + R32_NUM aesdecwide128kl_opd1 \reg
> + .else
> + aesdecwide128kl_opd1 = REG_NUM_INVALID
> + .endif
> + PFX_RPT
> + .byte 0x0f, 0x38, 0xd8
> + MODRM 0x0 aesdecwide128kl_opd1 0x1
> + .endm
> +
> + .macro AESENCWIDE256KL reg
> + REG_TYPE aesencwide256kl_opd1_type \reg
> + .if aesencwide256kl_opd1_type == REG_TYPE_R64
> + R64_NUM aesencwide256kl_opd1 \reg
> + .elseif aesencwide256kl_opd1_type == REG_TYPE_R32
> + R32_NUM aesencwide256kl_opd1 \reg
> + .else
> + aesencwide256kl_opd1 = REG_NUM_INVALID
> + .endif
> + PFX_RPT
> + .byte 0x0f, 0x38, 0xd8
> + MODRM 0x0 aesencwide256kl_opd1 0x2
> + .endm
> +
> + .macro AESDECWIDE256KL reg
> + REG_TYPE aesdecwide256kl_opd1_type \reg
> + .if aesdecwide256kl_opd1_type == REG_TYPE_R64
> + R64_NUM aesdecwide256kl_opd1 \reg
> + .elseif aesdecwide256kl_opd1_type == REG_TYPE_R32
> + R32_NUM aesdecwide256kl_opd1 \reg
> + .else
> + aesdecwide256kl_opd1 = REG_NUM_INVALID
> + .endif
> + PFX_RPT
> + .byte 0x0f, 0x38, 0xd8
> + MODRM 0x0 aesdecwide256kl_opd1 0x3
> + .endm
> +
> + .macro LOADIWKEY xmm1, xmm2
> + XMM_NUM loadiwkey_opd1 \xmm1
> + XMM_NUM loadiwkey_opd2 \xmm2
> + PFX_RPT
> + .byte 0x0f, 0x38, 0xdc
> + MODRM 0xc0 loadiwkey_opd1 loadiwkey_opd2
> + .endm
> #endif
>
> #endif

*groan*, so what actual version of binutils is needed and why is this
driver important enough to build on ancient crud to warrant all this
gunk?