mirror of
https://github.com/vincentmli/bpfire.git
synced 2026-04-09 18:45:54 +02:00
kernel: add support aes-ni support for aes-192 and 256
Signed-off-by: Arne Fitzenreiter <arne_f@ipfire.org>
This commit is contained in:
@@ -201,6 +201,11 @@ endif
|
||||
cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0025-Drivers-hv-vmbus-Support-per-channel-driver-state.patch
|
||||
cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux-hyperv_Mark_the_Hyoer-V_TSC_as_unstable.patch
|
||||
|
||||
ifneq "$(KCFG)" "-headers"
|
||||
# fix AES-NI 192 and 256 bits / grsec is needed for this patch version
|
||||
cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux/0100-crypto-aesni-Add-support-for-192-256-bit-keys-to-AES.patch
|
||||
endif
|
||||
|
||||
# fix empty symbol crc's
|
||||
cd $(DIR_APP) && patch -Np1 < $(DIR_SRC)/src/patches/linux-genksyms_fix_typeof_handling.patch
|
||||
|
||||
|
||||
@@ -0,0 +1,689 @@
|
||||
From bcdbd313c0e6fd630a8945fd58dc5383631dc6dd Mon Sep 17 00:00:00 2001
|
||||
From: Timothy McCaffrey <timothy.mccaffrey@unisys.com>
|
||||
Date: Tue, 13 Jan 2015 13:16:43 -0500
|
||||
Subject: [PATCH] crypto: aesni - Add support for 192 & 256 bit keys to AESNI
|
||||
RFC4106
|
||||
|
||||
These patches fix the RFC4106 implementation in the aesni-intel
|
||||
module so it supports 192 & 256 bit keys.
|
||||
|
||||
Since the AVX support that was added to this module also only
|
||||
supports 128 bit keys, and this patch only affects the SSE
|
||||
implementation, changes were also made to use the SSE version
|
||||
if key sizes other than 128 are specified.
|
||||
|
||||
RFC4106 specifies that 192 & 256 bit keys must be supported (section
|
||||
8.4).
|
||||
|
||||
Also, this should fix Strongswan issue 341 where the aesni module
|
||||
needs to be unloaded if 256 bit keys are used:
|
||||
|
||||
http://wiki.strongswan.org/issues/341
|
||||
|
||||
This patch has been tested with Sandy Bridge and Haswell processors.
|
||||
With 128 bit keys and input buffers > 512 bytes a slight performance
|
||||
degradation was noticed (~1%). For input buffers of less than 512
|
||||
bytes there was no performance impact. Compared to 128 bit keys,
|
||||
256 bit key size performance is approx. .5 cycles per byte slower
|
||||
on Sandy Bridge, and .37 cycles per byte slower on Haswell (vs.
|
||||
SSE code).
|
||||
|
||||
This patch has also been tested with StrongSwan IPSec connections
|
||||
where it worked correctly.
|
||||
|
||||
I created this diff from a git clone of crypto-2.6.git.
|
||||
|
||||
Any questions, please feel free to contact me.
|
||||
|
||||
Signed-off-by: Timothy McCaffrey <timothy.mccaffrey@unisys.com>
|
||||
Signed-off-by: Jarod Wilson <jarod@redhat.com>
|
||||
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
|
||||
---
|
||||
arch/x86/crypto/aesni-intel_asm.S | 342 +++++++++++++++++++------------------
|
||||
arch/x86/crypto/aesni-intel_glue.c | 31 +++-
|
||||
2 files changed, 202 insertions(+), 171 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
|
||||
index c92c7d8..f5cdfbf 100644
|
||||
--- a/arch/x86/crypto/aesni-intel_asm.S
|
||||
+++ b/arch/x86/crypto/aesni-intel_asm.S
|
||||
@@ -33,12 +33,23 @@
|
||||
#include <asm/inst.h>
|
||||
#include <asm/alternative-asm.h>
|
||||
|
||||
+/*
|
||||
+ * The following macros are used to move an (un)aligned 16 byte value to/from
|
||||
+ * an XMM register. This can done for either FP or integer values, for FP use
|
||||
+ * movaps (move aligned packed single) or integer use movdqa (move double quad
|
||||
+ * aligned). It doesn't make a performance difference which instruction is used
|
||||
+ * since Nehalem (original Core i7) was released. However, the movaps is a byte
|
||||
+ * shorter, so that is the one we'll use for now. (same for unaligned).
|
||||
+ */
|
||||
+#define MOVADQ movaps
|
||||
+#define MOVUDQ movups
|
||||
+
|
||||
#ifdef __x86_64__
|
||||
+
|
||||
.data
|
||||
.align 16
|
||||
.Lgf128mul_x_ble_mask:
|
||||
.octa 0x00000000000000010000000000000087
|
||||
-
|
||||
POLY: .octa 0xC2000000000000000000000000000001
|
||||
TWOONE: .octa 0x00000001000000000000000000000001
|
||||
|
||||
@@ -90,6 +101,7 @@ enc: .octa 0x2
|
||||
#define arg8 STACK_OFFSET+16(%r14)
|
||||
#define arg9 STACK_OFFSET+24(%r14)
|
||||
#define arg10 STACK_OFFSET+32(%r14)
|
||||
+#define keysize 2*15*16(%arg1)
|
||||
#endif
|
||||
|
||||
|
||||
@@ -214,10 +226,12 @@ enc: .octa 0x2
|
||||
|
||||
.macro INITIAL_BLOCKS_DEC num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \
|
||||
XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation
|
||||
+ MOVADQ SHUF_MASK(%rip), %xmm14
|
||||
mov arg7, %r10 # %r10 = AAD
|
||||
mov arg8, %r15 # %r15 = aadLen
|
||||
mov %r15, %r11
|
||||
pxor %xmm\i, %xmm\i
|
||||
+
|
||||
_get_AAD_loop\num_initial_blocks\operation:
|
||||
movd (%r10), \TMP1
|
||||
pslldq $12, \TMP1
|
||||
@@ -226,6 +240,7 @@ _get_AAD_loop\num_initial_blocks\operation:
|
||||
add $4, %r10
|
||||
sub $4, %r15
|
||||
jne _get_AAD_loop\num_initial_blocks\operation
|
||||
+
|
||||
cmp $16, %r11
|
||||
je _get_AAD_loop2_done\num_initial_blocks\operation
|
||||
mov $16, %r15
|
||||
@@ -234,8 +249,8 @@ _get_AAD_loop2\num_initial_blocks\operation:
|
||||
sub $4, %r15
|
||||
cmp %r11, %r15
|
||||
jne _get_AAD_loop2\num_initial_blocks\operation
|
||||
+
|
||||
_get_AAD_loop2_done\num_initial_blocks\operation:
|
||||
- movdqa SHUF_MASK(%rip), %xmm14
|
||||
PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data
|
||||
|
||||
xor %r11, %r11 # initialise the data pointer offset as zero
|
||||
@@ -244,59 +259,34 @@ _get_AAD_loop2_done\num_initial_blocks\operation:
|
||||
|
||||
mov %arg5, %rax # %rax = *Y0
|
||||
movdqu (%rax), \XMM0 # XMM0 = Y0
|
||||
- movdqa SHUF_MASK(%rip), %xmm14
|
||||
PSHUFB_XMM %xmm14, \XMM0
|
||||
|
||||
.if (\i == 5) || (\i == 6) || (\i == 7)
|
||||
+ MOVADQ ONE(%RIP),\TMP1
|
||||
+ MOVADQ (%arg1),\TMP2
|
||||
.irpc index, \i_seq
|
||||
- paddd ONE(%rip), \XMM0 # INCR Y0
|
||||
+ paddd \TMP1, \XMM0 # INCR Y0
|
||||
movdqa \XMM0, %xmm\index
|
||||
- movdqa SHUF_MASK(%rip), %xmm14
|
||||
PSHUFB_XMM %xmm14, %xmm\index # perform a 16 byte swap
|
||||
-
|
||||
-.endr
|
||||
-.irpc index, \i_seq
|
||||
- pxor 16*0(%arg1), %xmm\index
|
||||
-.endr
|
||||
-.irpc index, \i_seq
|
||||
- movaps 0x10(%rdi), \TMP1
|
||||
- AESENC \TMP1, %xmm\index # Round 1
|
||||
-.endr
|
||||
-.irpc index, \i_seq
|
||||
- movaps 0x20(%arg1), \TMP1
|
||||
- AESENC \TMP1, %xmm\index # Round 2
|
||||
+ pxor \TMP2, %xmm\index
|
||||
.endr
|
||||
-.irpc index, \i_seq
|
||||
- movaps 0x30(%arg1), \TMP1
|
||||
- AESENC \TMP1, %xmm\index # Round 2
|
||||
-.endr
|
||||
-.irpc index, \i_seq
|
||||
- movaps 0x40(%arg1), \TMP1
|
||||
- AESENC \TMP1, %xmm\index # Round 2
|
||||
-.endr
|
||||
-.irpc index, \i_seq
|
||||
- movaps 0x50(%arg1), \TMP1
|
||||
- AESENC \TMP1, %xmm\index # Round 2
|
||||
-.endr
|
||||
-.irpc index, \i_seq
|
||||
- movaps 0x60(%arg1), \TMP1
|
||||
- AESENC \TMP1, %xmm\index # Round 2
|
||||
-.endr
|
||||
-.irpc index, \i_seq
|
||||
- movaps 0x70(%arg1), \TMP1
|
||||
- AESENC \TMP1, %xmm\index # Round 2
|
||||
-.endr
|
||||
-.irpc index, \i_seq
|
||||
- movaps 0x80(%arg1), \TMP1
|
||||
- AESENC \TMP1, %xmm\index # Round 2
|
||||
-.endr
|
||||
-.irpc index, \i_seq
|
||||
- movaps 0x90(%arg1), \TMP1
|
||||
- AESENC \TMP1, %xmm\index # Round 2
|
||||
+ lea 0x10(%arg1),%r10
|
||||
+ mov keysize,%eax
|
||||
+ shr $2,%eax # 128->4, 192->6, 256->8
|
||||
+ add $5,%eax # 128->9, 192->11, 256->13
|
||||
+
|
||||
+aes_loop_initial_dec\num_initial_blocks:
|
||||
+ MOVADQ (%r10),\TMP1
|
||||
+.irpc index, \i_seq
|
||||
+ AESENC \TMP1, %xmm\index
|
||||
.endr
|
||||
+ add $16,%r10
|
||||
+ sub $1,%eax
|
||||
+ jnz aes_loop_initial_dec\num_initial_blocks
|
||||
+
|
||||
+ MOVADQ (%r10), \TMP1
|
||||
.irpc index, \i_seq
|
||||
- movaps 0xa0(%arg1), \TMP1
|
||||
- AESENCLAST \TMP1, %xmm\index # Round 10
|
||||
+ AESENCLAST \TMP1, %xmm\index # Last Round
|
||||
.endr
|
||||
.irpc index, \i_seq
|
||||
movdqu (%arg3 , %r11, 1), \TMP1
|
||||
@@ -306,10 +296,8 @@ _get_AAD_loop2_done\num_initial_blocks\operation:
|
||||
add $16, %r11
|
||||
|
||||
movdqa \TMP1, %xmm\index
|
||||
- movdqa SHUF_MASK(%rip), %xmm14
|
||||
PSHUFB_XMM %xmm14, %xmm\index
|
||||
-
|
||||
- # prepare plaintext/ciphertext for GHASH computation
|
||||
+ # prepare plaintext/ciphertext for GHASH computation
|
||||
.endr
|
||||
.endif
|
||||
GHASH_MUL %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
|
||||
@@ -339,30 +327,28 @@ _get_AAD_loop2_done\num_initial_blocks\operation:
|
||||
* Precomputations for HashKey parallel with encryption of first 4 blocks.
|
||||
* Haskey_i_k holds XORed values of the low and high parts of the Haskey_i
|
||||
*/
|
||||
- paddd ONE(%rip), \XMM0 # INCR Y0
|
||||
- movdqa \XMM0, \XMM1
|
||||
- movdqa SHUF_MASK(%rip), %xmm14
|
||||
+ MOVADQ ONE(%rip), \TMP1
|
||||
+ paddd \TMP1, \XMM0 # INCR Y0
|
||||
+ MOVADQ \XMM0, \XMM1
|
||||
PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap
|
||||
|
||||
- paddd ONE(%rip), \XMM0 # INCR Y0
|
||||
- movdqa \XMM0, \XMM2
|
||||
- movdqa SHUF_MASK(%rip), %xmm14
|
||||
+ paddd \TMP1, \XMM0 # INCR Y0
|
||||
+ MOVADQ \XMM0, \XMM2
|
||||
PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap
|
||||
|
||||
- paddd ONE(%rip), \XMM0 # INCR Y0
|
||||
- movdqa \XMM0, \XMM3
|
||||
- movdqa SHUF_MASK(%rip), %xmm14
|
||||
+ paddd \TMP1, \XMM0 # INCR Y0
|
||||
+ MOVADQ \XMM0, \XMM3
|
||||
PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap
|
||||
|
||||
- paddd ONE(%rip), \XMM0 # INCR Y0
|
||||
- movdqa \XMM0, \XMM4
|
||||
- movdqa SHUF_MASK(%rip), %xmm14
|
||||
+ paddd \TMP1, \XMM0 # INCR Y0
|
||||
+ MOVADQ \XMM0, \XMM4
|
||||
PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap
|
||||
|
||||
- pxor 16*0(%arg1), \XMM1
|
||||
- pxor 16*0(%arg1), \XMM2
|
||||
- pxor 16*0(%arg1), \XMM3
|
||||
- pxor 16*0(%arg1), \XMM4
|
||||
+ MOVADQ 0(%arg1),\TMP1
|
||||
+ pxor \TMP1, \XMM1
|
||||
+ pxor \TMP1, \XMM2
|
||||
+ pxor \TMP1, \XMM3
|
||||
+ pxor \TMP1, \XMM4
|
||||
movdqa \TMP3, \TMP5
|
||||
pshufd $78, \TMP3, \TMP1
|
||||
pxor \TMP3, \TMP1
|
||||
@@ -400,7 +386,23 @@ _get_AAD_loop2_done\num_initial_blocks\operation:
|
||||
pshufd $78, \TMP5, \TMP1
|
||||
pxor \TMP5, \TMP1
|
||||
movdqa \TMP1, HashKey_4_k(%rsp)
|
||||
- movaps 0xa0(%arg1), \TMP2
|
||||
+ lea 0xa0(%arg1),%r10
|
||||
+ mov keysize,%eax
|
||||
+ shr $2,%eax # 128->4, 192->6, 256->8
|
||||
+ sub $4,%eax # 128->0, 192->2, 256->4
|
||||
+ jz aes_loop_pre_dec_done\num_initial_blocks
|
||||
+
|
||||
+aes_loop_pre_dec\num_initial_blocks:
|
||||
+ MOVADQ (%r10),\TMP2
|
||||
+.irpc index, 1234
|
||||
+ AESENC \TMP2, %xmm\index
|
||||
+.endr
|
||||
+ add $16,%r10
|
||||
+ sub $1,%eax
|
||||
+ jnz aes_loop_pre_dec\num_initial_blocks
|
||||
+
|
||||
+aes_loop_pre_dec_done\num_initial_blocks:
|
||||
+ MOVADQ (%r10), \TMP2
|
||||
AESENCLAST \TMP2, \XMM1
|
||||
AESENCLAST \TMP2, \XMM2
|
||||
AESENCLAST \TMP2, \XMM3
|
||||
@@ -422,15 +424,11 @@ _get_AAD_loop2_done\num_initial_blocks\operation:
|
||||
movdqu \XMM4, 16*3(%arg2 , %r11 , 1)
|
||||
movdqa \TMP1, \XMM4
|
||||
add $64, %r11
|
||||
- movdqa SHUF_MASK(%rip), %xmm14
|
||||
PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap
|
||||
pxor \XMMDst, \XMM1
|
||||
# combine GHASHed value with the corresponding ciphertext
|
||||
- movdqa SHUF_MASK(%rip), %xmm14
|
||||
PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap
|
||||
- movdqa SHUF_MASK(%rip), %xmm14
|
||||
PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap
|
||||
- movdqa SHUF_MASK(%rip), %xmm14
|
||||
PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap
|
||||
|
||||
_initial_blocks_done\num_initial_blocks\operation:
|
||||
@@ -452,6 +450,7 @@ _initial_blocks_done\num_initial_blocks\operation:
|
||||
|
||||
.macro INITIAL_BLOCKS_ENC num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \
|
||||
XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation
|
||||
+ MOVADQ SHUF_MASK(%rip), %xmm14
|
||||
mov arg7, %r10 # %r10 = AAD
|
||||
mov arg8, %r15 # %r15 = aadLen
|
||||
mov %r15, %r11
|
||||
@@ -473,7 +472,6 @@ _get_AAD_loop2\num_initial_blocks\operation:
|
||||
cmp %r11, %r15
|
||||
jne _get_AAD_loop2\num_initial_blocks\operation
|
||||
_get_AAD_loop2_done\num_initial_blocks\operation:
|
||||
- movdqa SHUF_MASK(%rip), %xmm14
|
||||
PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data
|
||||
|
||||
xor %r11, %r11 # initialise the data pointer offset as zero
|
||||
@@ -482,59 +480,35 @@ _get_AAD_loop2_done\num_initial_blocks\operation:
|
||||
|
||||
mov %arg5, %rax # %rax = *Y0
|
||||
movdqu (%rax), \XMM0 # XMM0 = Y0
|
||||
- movdqa SHUF_MASK(%rip), %xmm14
|
||||
PSHUFB_XMM %xmm14, \XMM0
|
||||
|
||||
.if (\i == 5) || (\i == 6) || (\i == 7)
|
||||
-.irpc index, \i_seq
|
||||
- paddd ONE(%rip), \XMM0 # INCR Y0
|
||||
- movdqa \XMM0, %xmm\index
|
||||
- movdqa SHUF_MASK(%rip), %xmm14
|
||||
- PSHUFB_XMM %xmm14, %xmm\index # perform a 16 byte swap
|
||||
|
||||
-.endr
|
||||
-.irpc index, \i_seq
|
||||
- pxor 16*0(%arg1), %xmm\index
|
||||
-.endr
|
||||
-.irpc index, \i_seq
|
||||
- movaps 0x10(%rdi), \TMP1
|
||||
- AESENC \TMP1, %xmm\index # Round 1
|
||||
-.endr
|
||||
-.irpc index, \i_seq
|
||||
- movaps 0x20(%arg1), \TMP1
|
||||
- AESENC \TMP1, %xmm\index # Round 2
|
||||
-.endr
|
||||
+ MOVADQ ONE(%RIP),\TMP1
|
||||
+ MOVADQ 0(%arg1),\TMP2
|
||||
.irpc index, \i_seq
|
||||
- movaps 0x30(%arg1), \TMP1
|
||||
- AESENC \TMP1, %xmm\index # Round 2
|
||||
+ paddd \TMP1, \XMM0 # INCR Y0
|
||||
+ MOVADQ \XMM0, %xmm\index
|
||||
+ PSHUFB_XMM %xmm14, %xmm\index # perform a 16 byte swap
|
||||
+ pxor \TMP2, %xmm\index
|
||||
.endr
|
||||
-.irpc index, \i_seq
|
||||
- movaps 0x40(%arg1), \TMP1
|
||||
- AESENC \TMP1, %xmm\index # Round 2
|
||||
-.endr
|
||||
-.irpc index, \i_seq
|
||||
- movaps 0x50(%arg1), \TMP1
|
||||
- AESENC \TMP1, %xmm\index # Round 2
|
||||
-.endr
|
||||
-.irpc index, \i_seq
|
||||
- movaps 0x60(%arg1), \TMP1
|
||||
- AESENC \TMP1, %xmm\index # Round 2
|
||||
-.endr
|
||||
-.irpc index, \i_seq
|
||||
- movaps 0x70(%arg1), \TMP1
|
||||
- AESENC \TMP1, %xmm\index # Round 2
|
||||
-.endr
|
||||
-.irpc index, \i_seq
|
||||
- movaps 0x80(%arg1), \TMP1
|
||||
- AESENC \TMP1, %xmm\index # Round 2
|
||||
-.endr
|
||||
-.irpc index, \i_seq
|
||||
- movaps 0x90(%arg1), \TMP1
|
||||
- AESENC \TMP1, %xmm\index # Round 2
|
||||
+ lea 0x10(%arg1),%r10
|
||||
+ mov keysize,%eax
|
||||
+ shr $2,%eax # 128->4, 192->6, 256->8
|
||||
+ add $5,%eax # 128->9, 192->11, 256->13
|
||||
+
|
||||
+aes_loop_initial_enc\num_initial_blocks:
|
||||
+ MOVADQ (%r10),\TMP1
|
||||
+.irpc index, \i_seq
|
||||
+ AESENC \TMP1, %xmm\index
|
||||
.endr
|
||||
+ add $16,%r10
|
||||
+ sub $1,%eax
|
||||
+ jnz aes_loop_initial_enc\num_initial_blocks
|
||||
+
|
||||
+ MOVADQ (%r10), \TMP1
|
||||
.irpc index, \i_seq
|
||||
- movaps 0xa0(%arg1), \TMP1
|
||||
- AESENCLAST \TMP1, %xmm\index # Round 10
|
||||
+ AESENCLAST \TMP1, %xmm\index # Last Round
|
||||
.endr
|
||||
.irpc index, \i_seq
|
||||
movdqu (%arg3 , %r11, 1), \TMP1
|
||||
@@ -542,8 +516,6 @@ _get_AAD_loop2_done\num_initial_blocks\operation:
|
||||
movdqu %xmm\index, (%arg2 , %r11, 1)
|
||||
# write back plaintext/ciphertext for num_initial_blocks
|
||||
add $16, %r11
|
||||
-
|
||||
- movdqa SHUF_MASK(%rip), %xmm14
|
||||
PSHUFB_XMM %xmm14, %xmm\index
|
||||
|
||||
# prepare plaintext/ciphertext for GHASH computation
|
||||
@@ -576,30 +548,28 @@ _get_AAD_loop2_done\num_initial_blocks\operation:
|
||||
* Precomputations for HashKey parallel with encryption of first 4 blocks.
|
||||
* Haskey_i_k holds XORed values of the low and high parts of the Haskey_i
|
||||
*/
|
||||
- paddd ONE(%rip), \XMM0 # INCR Y0
|
||||
- movdqa \XMM0, \XMM1
|
||||
- movdqa SHUF_MASK(%rip), %xmm14
|
||||
+ MOVADQ ONE(%RIP),\TMP1
|
||||
+ paddd \TMP1, \XMM0 # INCR Y0
|
||||
+ MOVADQ \XMM0, \XMM1
|
||||
PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap
|
||||
|
||||
- paddd ONE(%rip), \XMM0 # INCR Y0
|
||||
- movdqa \XMM0, \XMM2
|
||||
- movdqa SHUF_MASK(%rip), %xmm14
|
||||
+ paddd \TMP1, \XMM0 # INCR Y0
|
||||
+ MOVADQ \XMM0, \XMM2
|
||||
PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap
|
||||
|
||||
- paddd ONE(%rip), \XMM0 # INCR Y0
|
||||
- movdqa \XMM0, \XMM3
|
||||
- movdqa SHUF_MASK(%rip), %xmm14
|
||||
+ paddd \TMP1, \XMM0 # INCR Y0
|
||||
+ MOVADQ \XMM0, \XMM3
|
||||
PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap
|
||||
|
||||
- paddd ONE(%rip), \XMM0 # INCR Y0
|
||||
- movdqa \XMM0, \XMM4
|
||||
- movdqa SHUF_MASK(%rip), %xmm14
|
||||
+ paddd \TMP1, \XMM0 # INCR Y0
|
||||
+ MOVADQ \XMM0, \XMM4
|
||||
PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap
|
||||
|
||||
- pxor 16*0(%arg1), \XMM1
|
||||
- pxor 16*0(%arg1), \XMM2
|
||||
- pxor 16*0(%arg1), \XMM3
|
||||
- pxor 16*0(%arg1), \XMM4
|
||||
+ MOVADQ 0(%arg1),\TMP1
|
||||
+ pxor \TMP1, \XMM1
|
||||
+ pxor \TMP1, \XMM2
|
||||
+ pxor \TMP1, \XMM3
|
||||
+ pxor \TMP1, \XMM4
|
||||
movdqa \TMP3, \TMP5
|
||||
pshufd $78, \TMP3, \TMP1
|
||||
pxor \TMP3, \TMP1
|
||||
@@ -637,7 +607,23 @@ _get_AAD_loop2_done\num_initial_blocks\operation:
|
||||
pshufd $78, \TMP5, \TMP1
|
||||
pxor \TMP5, \TMP1
|
||||
movdqa \TMP1, HashKey_4_k(%rsp)
|
||||
- movaps 0xa0(%arg1), \TMP2
|
||||
+ lea 0xa0(%arg1),%r10
|
||||
+ mov keysize,%eax
|
||||
+ shr $2,%eax # 128->4, 192->6, 256->8
|
||||
+ sub $4,%eax # 128->0, 192->2, 256->4
|
||||
+ jz aes_loop_pre_enc_done\num_initial_blocks
|
||||
+
|
||||
+aes_loop_pre_enc\num_initial_blocks:
|
||||
+ MOVADQ (%r10),\TMP2
|
||||
+.irpc index, 1234
|
||||
+ AESENC \TMP2, %xmm\index
|
||||
+.endr
|
||||
+ add $16,%r10
|
||||
+ sub $1,%eax
|
||||
+ jnz aes_loop_pre_enc\num_initial_blocks
|
||||
+
|
||||
+aes_loop_pre_enc_done\num_initial_blocks:
|
||||
+ MOVADQ (%r10), \TMP2
|
||||
AESENCLAST \TMP2, \XMM1
|
||||
AESENCLAST \TMP2, \XMM2
|
||||
AESENCLAST \TMP2, \XMM3
|
||||
@@ -656,15 +642,11 @@ _get_AAD_loop2_done\num_initial_blocks\operation:
|
||||
movdqu \XMM4, 16*3(%arg2 , %r11 , 1)
|
||||
|
||||
add $64, %r11
|
||||
- movdqa SHUF_MASK(%rip), %xmm14
|
||||
PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap
|
||||
pxor \XMMDst, \XMM1
|
||||
# combine GHASHed value with the corresponding ciphertext
|
||||
- movdqa SHUF_MASK(%rip), %xmm14
|
||||
PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap
|
||||
- movdqa SHUF_MASK(%rip), %xmm14
|
||||
PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap
|
||||
- movdqa SHUF_MASK(%rip), %xmm14
|
||||
PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap
|
||||
|
||||
_initial_blocks_done\num_initial_blocks\operation:
|
||||
@@ -795,7 +777,23 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
|
||||
AESENC \TMP3, \XMM3
|
||||
AESENC \TMP3, \XMM4
|
||||
PCLMULQDQ 0x00, \TMP5, \XMM8 # XMM8 = a0*b0
|
||||
- movaps 0xa0(%arg1), \TMP3
|
||||
+ lea 0xa0(%arg1),%r10
|
||||
+ mov keysize,%eax
|
||||
+ shr $2,%eax # 128->4, 192->6, 256->8
|
||||
+ sub $4,%eax # 128->0, 192->2, 256->4
|
||||
+ jz aes_loop_par_enc_done
|
||||
+
|
||||
+aes_loop_par_enc:
|
||||
+ MOVADQ (%r10),\TMP3
|
||||
+.irpc index, 1234
|
||||
+ AESENC \TMP3, %xmm\index
|
||||
+.endr
|
||||
+ add $16,%r10
|
||||
+ sub $1,%eax
|
||||
+ jnz aes_loop_par_enc
|
||||
+
|
||||
+aes_loop_par_enc_done:
|
||||
+ MOVADQ (%r10), \TMP3
|
||||
AESENCLAST \TMP3, \XMM1 # Round 10
|
||||
AESENCLAST \TMP3, \XMM2
|
||||
AESENCLAST \TMP3, \XMM3
|
||||
@@ -987,8 +985,24 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
|
||||
AESENC \TMP3, \XMM3
|
||||
AESENC \TMP3, \XMM4
|
||||
PCLMULQDQ 0x00, \TMP5, \XMM8 # XMM8 = a0*b0
|
||||
- movaps 0xa0(%arg1), \TMP3
|
||||
- AESENCLAST \TMP3, \XMM1 # Round 10
|
||||
+ lea 0xa0(%arg1),%r10
|
||||
+ mov keysize,%eax
|
||||
+ shr $2,%eax # 128->4, 192->6, 256->8
|
||||
+ sub $4,%eax # 128->0, 192->2, 256->4
|
||||
+ jz aes_loop_par_dec_done
|
||||
+
|
||||
+aes_loop_par_dec:
|
||||
+ MOVADQ (%r10),\TMP3
|
||||
+.irpc index, 1234
|
||||
+ AESENC \TMP3, %xmm\index
|
||||
+.endr
|
||||
+ add $16,%r10
|
||||
+ sub $1,%eax
|
||||
+ jnz aes_loop_par_dec
|
||||
+
|
||||
+aes_loop_par_dec_done:
|
||||
+ MOVADQ (%r10), \TMP3
|
||||
+ AESENCLAST \TMP3, \XMM1 # last round
|
||||
AESENCLAST \TMP3, \XMM2
|
||||
AESENCLAST \TMP3, \XMM3
|
||||
AESENCLAST \TMP3, \XMM4
|
||||
@@ -1156,33 +1170,29 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
|
||||
pxor \TMP6, \XMMDst # reduced result is in XMMDst
|
||||
.endm
|
||||
|
||||
-/* Encryption of a single block done*/
|
||||
-.macro ENCRYPT_SINGLE_BLOCK XMM0 TMP1
|
||||
|
||||
- pxor (%arg1), \XMM0
|
||||
- movaps 16(%arg1), \TMP1
|
||||
- AESENC \TMP1, \XMM0
|
||||
- movaps 32(%arg1), \TMP1
|
||||
- AESENC \TMP1, \XMM0
|
||||
- movaps 48(%arg1), \TMP1
|
||||
- AESENC \TMP1, \XMM0
|
||||
- movaps 64(%arg1), \TMP1
|
||||
- AESENC \TMP1, \XMM0
|
||||
- movaps 80(%arg1), \TMP1
|
||||
- AESENC \TMP1, \XMM0
|
||||
- movaps 96(%arg1), \TMP1
|
||||
- AESENC \TMP1, \XMM0
|
||||
- movaps 112(%arg1), \TMP1
|
||||
- AESENC \TMP1, \XMM0
|
||||
- movaps 128(%arg1), \TMP1
|
||||
- AESENC \TMP1, \XMM0
|
||||
- movaps 144(%arg1), \TMP1
|
||||
- AESENC \TMP1, \XMM0
|
||||
- movaps 160(%arg1), \TMP1
|
||||
- AESENCLAST \TMP1, \XMM0
|
||||
-.endm
|
||||
+/* Encryption of a single block
|
||||
+* uses eax & r10
|
||||
+*/
|
||||
|
||||
+.macro ENCRYPT_SINGLE_BLOCK XMM0 TMP1
|
||||
|
||||
+ pxor (%arg1), \XMM0
|
||||
+ mov keysize,%eax
|
||||
+ shr $2,%eax # 128->4, 192->6, 256->8
|
||||
+ add $5,%eax # 128->9, 192->11, 256->13
|
||||
+ lea 16(%arg1), %r10 # get first expanded key address
|
||||
+
|
||||
+_esb_loop_\@:
|
||||
+ MOVADQ (%r10),\TMP1
|
||||
+ AESENC \TMP1,\XMM0
|
||||
+ add $16,%r10
|
||||
+ sub $1,%eax
|
||||
+ jnz _esb_loop_\@
|
||||
+
|
||||
+ MOVADQ (%r10),\TMP1
|
||||
+ AESENCLAST \TMP1,\XMM0
|
||||
+.endm
|
||||
/*****************************************************************************
|
||||
* void aesni_gcm_dec(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary.
|
||||
* u8 *out, // Plaintext output. Encrypt in-place is allowed.
|
||||
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
|
||||
index 6d4faba..bfaf817 100644
|
||||
--- a/arch/x86/crypto/aesni-intel_glue.c
|
||||
+++ b/arch/x86/crypto/aesni-intel_glue.c
|
||||
@@ -177,7 +177,8 @@ static void aesni_gcm_enc_avx(void *ctx, u8 *out,
|
||||
u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
|
||||
u8 *auth_tag, unsigned long auth_tag_len)
|
||||
{
|
||||
- if (plaintext_len < AVX_GEN2_OPTSIZE) {
|
||||
+ struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx;
|
||||
+ if ((plaintext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)){
|
||||
aesni_gcm_enc(ctx, out, in, plaintext_len, iv, hash_subkey, aad,
|
||||
aad_len, auth_tag, auth_tag_len);
|
||||
} else {
|
||||
@@ -192,7 +193,8 @@ static void aesni_gcm_dec_avx(void *ctx, u8 *out,
|
||||
u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
|
||||
u8 *auth_tag, unsigned long auth_tag_len)
|
||||
{
|
||||
- if (ciphertext_len < AVX_GEN2_OPTSIZE) {
|
||||
+ struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx;
|
||||
+ if ((ciphertext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)) {
|
||||
aesni_gcm_dec(ctx, out, in, ciphertext_len, iv, hash_subkey, aad,
|
||||
aad_len, auth_tag, auth_tag_len);
|
||||
} else {
|
||||
@@ -226,7 +228,8 @@ static void aesni_gcm_enc_avx2(void *ctx, u8 *out,
|
||||
u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
|
||||
u8 *auth_tag, unsigned long auth_tag_len)
|
||||
{
|
||||
- if (plaintext_len < AVX_GEN2_OPTSIZE) {
|
||||
+ struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx;
|
||||
+ if ((plaintext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)) {
|
||||
aesni_gcm_enc(ctx, out, in, plaintext_len, iv, hash_subkey, aad,
|
||||
aad_len, auth_tag, auth_tag_len);
|
||||
} else if (plaintext_len < AVX_GEN4_OPTSIZE) {
|
||||
@@ -245,7 +248,8 @@ static void aesni_gcm_dec_avx2(void *ctx, u8 *out,
|
||||
u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
|
||||
u8 *auth_tag, unsigned long auth_tag_len)
|
||||
{
|
||||
- if (ciphertext_len < AVX_GEN2_OPTSIZE) {
|
||||
+ struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx;
|
||||
+ if ((ciphertext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)) {
|
||||
aesni_gcm_dec(ctx, out, in, ciphertext_len, iv, hash_subkey,
|
||||
aad, aad_len, auth_tag, auth_tag_len);
|
||||
} else if (ciphertext_len < AVX_GEN4_OPTSIZE) {
|
||||
@@ -878,7 +882,8 @@ static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key,
|
||||
}
|
||||
/*Account for 4 byte nonce at the end.*/
|
||||
key_len -= 4;
|
||||
- if (key_len != AES_KEYSIZE_128) {
|
||||
+ if (key_len != AES_KEYSIZE_128 && key_len != AES_KEYSIZE_192 &&
|
||||
+ key_len != AES_KEYSIZE_256) {
|
||||
crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
|
||||
return -EINVAL;
|
||||
}
|
||||
@@ -989,6 +994,7 @@ static int __driver_rfc4106_encrypt(struct aead_request *req)
|
||||
__be32 counter = cpu_to_be32(1);
|
||||
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
|
||||
struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
|
||||
+ u32 key_len = ctx->aes_key_expanded.key_length;
|
||||
void *aes_ctx = &(ctx->aes_key_expanded);
|
||||
unsigned long auth_tag_len = crypto_aead_authsize(tfm);
|
||||
u8 iv_tab[16+AESNI_ALIGN];
|
||||
@@ -1003,6 +1009,13 @@ static int __driver_rfc4106_encrypt(struct aead_request *req)
|
||||
/* to 8 or 12 bytes */
|
||||
if (unlikely(req->assoclen != 8 && req->assoclen != 12))
|
||||
return -EINVAL;
|
||||
+ if (unlikely(auth_tag_len != 8 && auth_tag_len != 12 && auth_tag_len != 16))
|
||||
+ return -EINVAL;
|
||||
+ if (unlikely(key_len != AES_KEYSIZE_128 &&
|
||||
+ key_len != AES_KEYSIZE_192 &&
|
||||
+ key_len != AES_KEYSIZE_256))
|
||||
+ return -EINVAL;
|
||||
+
|
||||
/* IV below built */
|
||||
for (i = 0; i < 4; i++)
|
||||
*(iv+i) = ctx->nonce[i];
|
||||
@@ -1067,6 +1080,7 @@ static int __driver_rfc4106_decrypt(struct aead_request *req)
|
||||
int retval = 0;
|
||||
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
|
||||
struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
|
||||
+ u32 key_len = ctx->aes_key_expanded.key_length;
|
||||
void *aes_ctx = &(ctx->aes_key_expanded);
|
||||
unsigned long auth_tag_len = crypto_aead_authsize(tfm);
|
||||
u8 iv_and_authTag[32+AESNI_ALIGN];
|
||||
@@ -1080,6 +1094,13 @@ static int __driver_rfc4106_decrypt(struct aead_request *req)
|
||||
if (unlikely((req->cryptlen < auth_tag_len) ||
|
||||
(req->assoclen != 8 && req->assoclen != 12)))
|
||||
return -EINVAL;
|
||||
+ if (unlikely(auth_tag_len != 8 && auth_tag_len != 12 && auth_tag_len != 16))
|
||||
+ return -EINVAL;
|
||||
+ if (unlikely(key_len != AES_KEYSIZE_128 &&
|
||||
+ key_len != AES_KEYSIZE_192 &&
|
||||
+ key_len != AES_KEYSIZE_256))
|
||||
+ return -EINVAL;
|
||||
+
|
||||
/* Assuming we are supporting rfc4106 64-bit extended */
|
||||
/* sequence numbers We need to have the AAD length */
|
||||
/* equal to 8 or 12 bytes */
|
||||
--
|
||||
2.7.4
|
||||
|
||||
Reference in New Issue
Block a user