Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions wolfcrypt/src/aes_xts_asm.S
Original file line number Diff line number Diff line change
Expand Up @@ -1444,7 +1444,6 @@ AES_XTS_init_avx1:
.p2align 4
_AES_XTS_init_avx1:
#endif /* __APPLE__ */
movl %edx, %eax
vmovdqu (%rdi), %xmm0
# aes_enc_block
vpxor (%rsi), %xmm0, %xmm0
Expand All @@ -1466,13 +1465,13 @@ _AES_XTS_init_avx1:
vaesenc %xmm2, %xmm0, %xmm0
vmovdqu 144(%rsi), %xmm2
vaesenc %xmm2, %xmm0, %xmm0
cmpl $11, %eax
cmpl $11, %edx
vmovdqu 160(%rsi), %xmm2
jl L_AES_XTS_init_avx1_tweak_aes_enc_block_last
vaesenc %xmm2, %xmm0, %xmm0
vmovdqu 176(%rsi), %xmm3
vaesenc %xmm3, %xmm0, %xmm0
cmpl $13, %eax
cmpl $13, %edx
vmovdqu 192(%rsi), %xmm2
jl L_AES_XTS_init_avx1_tweak_aes_enc_block_last
vaesenc %xmm2, %xmm0, %xmm0
Expand Down
5 changes: 2 additions & 3 deletions wolfcrypt/src/aes_xts_asm.asm
Original file line number Diff line number Diff line change
Expand Up @@ -1456,7 +1456,6 @@ _text ENDS
IFDEF HAVE_INTEL_AVX1
_text SEGMENT READONLY PARA
AES_XTS_init_avx1 PROC
mov eax, r8d
vmovdqu xmm0, OWORD PTR [rcx]
; aes_enc_block
vpxor xmm0, xmm0, [rdx]
Expand All @@ -1478,13 +1477,13 @@ AES_XTS_init_avx1 PROC
vaesenc xmm0, xmm0, xmm2
vmovdqu xmm2, OWORD PTR [rdx+144]
vaesenc xmm0, xmm0, xmm2
cmp eax, 11
cmp r8d, 11
vmovdqu xmm2, OWORD PTR [rdx+160]
jl L_AES_XTS_init_avx1_tweak_aes_enc_block_last
vaesenc xmm0, xmm0, xmm2
vmovdqu xmm3, OWORD PTR [rdx+176]
vaesenc xmm0, xmm0, xmm3
cmp eax, 13
cmp r8d, 13
vmovdqu xmm2, OWORD PTR [rdx+192]
jl L_AES_XTS_init_avx1_tweak_aes_enc_block_last
vaesenc xmm0, xmm0, xmm2
Expand Down
2 changes: 1 addition & 1 deletion wolfcrypt/src/fe_x25519_asm.S
Original file line number Diff line number Diff line change
Expand Up @@ -12372,7 +12372,7 @@ _fe_cmov_table_avx2:
pushq %r15
pushq %rbx
movq %rdx, %rcx
xor %rbx, %rbx
xorq %rbx, %rbx
movsbq %cl, %rax
cdq
xorb %dl, %al
Expand Down
6 changes: 3 additions & 3 deletions wolfcrypt/src/port/arm/armv8-32-aes-asm.S
Original file line number Diff line number Diff line change
Expand Up @@ -12555,7 +12555,7 @@ L_AES_CBC_encrypt_block_nr_256:
eor r5, r5, r9
eor r6, r6, r10
eor r7, r7, r11
#endif /* WOLFSSL_ARMASM_AES_BLOCK_INLINE */
#endif /* !WOLFSSL_ARMASM_AES_BLOCK_INLINE */
pop {r1, r2, lr}
ldr r3, [sp]
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6)
Expand Down Expand Up @@ -13269,7 +13269,7 @@ L_AES_CBC_encrypt_block_nr_192:
eor r5, r5, r9
eor r6, r6, r10
eor r7, r7, r11
#endif /* WOLFSSL_ARMASM_AES_BLOCK_INLINE */
#endif /* !WOLFSSL_ARMASM_AES_BLOCK_INLINE */
pop {r1, r2, lr}
ldr r3, [sp]
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6)
Expand Down Expand Up @@ -13983,7 +13983,7 @@ L_AES_CBC_encrypt_block_nr_128:
eor r5, r5, r9
eor r6, r6, r10
eor r7, r7, r11
#endif /* WOLFSSL_ARMASM_AES_BLOCK_INLINE */
#endif /* !WOLFSSL_ARMASM_AES_BLOCK_INLINE */
pop {r1, r2, lr}
ldr r3, [sp]
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6)
Expand Down
6 changes: 3 additions & 3 deletions wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c
Original file line number Diff line number Diff line change
Expand Up @@ -12930,7 +12930,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_encrypt(const unsigned char* in,
"eor r5, r5, r9\n\t"
"eor r6, r6, r10\n\t"
"eor r7, r7, r11\n\t"
#endif /* WOLFSSL_ARMASM_AES_BLOCK_INLINE */
#endif /* !WOLFSSL_ARMASM_AES_BLOCK_INLINE */
"pop {r1, %[len], lr}\n\t"
"ldr %[ks], [sp]\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6)
Expand Down Expand Up @@ -13647,7 +13647,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_encrypt(const unsigned char* in,
"eor r5, r5, r9\n\t"
"eor r6, r6, r10\n\t"
"eor r7, r7, r11\n\t"
#endif /* WOLFSSL_ARMASM_AES_BLOCK_INLINE */
#endif /* !WOLFSSL_ARMASM_AES_BLOCK_INLINE */
"pop {r1, %[len], lr}\n\t"
"ldr %[ks], [sp]\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6)
Expand Down Expand Up @@ -14364,7 +14364,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_encrypt(const unsigned char* in,
"eor r5, r5, r9\n\t"
"eor r6, r6, r10\n\t"
"eor r7, r7, r11\n\t"
#endif /* WOLFSSL_ARMASM_AES_BLOCK_INLINE */
#endif /* !WOLFSSL_ARMASM_AES_BLOCK_INLINE */
"pop {r1, %[len], lr}\n\t"
"ldr %[ks], [sp]\n\t"
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6)
Expand Down
28 changes: 14 additions & 14 deletions wolfcrypt/src/port/arm/armv8-32-chacha-asm.S
Original file line number Diff line number Diff line change
Expand Up @@ -89,14 +89,14 @@ wc_chacha_setkey:
#endif /* BIG_ENDIAN_ORDER */
stm r0!, {r4, r5, r12, lr}
# Next 16 bytes of key.
beq L_chacha_arm32_setkey_same_keyb_ytes
beq L_chacha_arm32_setkey_same_key_bytes
# Update key pointer for next 16 bytes.
add r1, r1, r2
ldr r4, [r1]
ldr r5, [r1, #4]
ldr r12, [r1, #8]
ldr lr, [r1, #12]
L_chacha_arm32_setkey_same_keyb_ytes:
L_chacha_arm32_setkey_same_key_bytes:
stm r0, {r4, r5, r12, lr}
pop {r4, r5, pc}
.size wc_chacha_setkey,.-wc_chacha_setkey
Expand Down Expand Up @@ -572,11 +572,11 @@ L_chacha_crypt_bytes_arm32_round_start_256:
ror r11, r11, #16
veor q7, q7, q4
add r8, r8, r10
vrev32.i16 q15, q15
vrev32.16 q15, q15
add r9, r9, r11
vrev32.i16 q3, q3
vrev32.16 q3, q3
eor r4, r4, r8
vrev32.i16 q7, q7
vrev32.16 q7, q7
eor r5, r5, r9
# c += d; b ^= c; b <<<= 12;
vadd.i32 q14, q14, q15
Expand Down Expand Up @@ -685,11 +685,11 @@ L_chacha_crypt_bytes_arm32_round_start_256:
ror r10, r10, #16
veor q7, q7, q4
add r8, r8, r11
vrev32.i16 q15, q15
vrev32.16 q15, q15
add r9, r9, r10
vrev32.i16 q3, q3
vrev32.16 q3, q3
eor r5, r5, r8
vrev32.i16 q7, q7
vrev32.16 q7, q7
eor r6, r6, r9
# c += d; b ^= c; b <<<= 12;
vadd.i32 q14, q14, q15
Expand Down Expand Up @@ -891,8 +891,8 @@ L_chacha_crypt_bytes_arm32_round_start_128:
vadd.i32 q4, q4, q5
veor q3, q3, q0
veor q7, q7, q4
vrev32.i16 q3, q3
vrev32.i16 q7, q7
vrev32.16 q3, q3
vrev32.16 q7, q7
# c += d; b ^= c; b <<<= 12;
vadd.i32 q2, q2, q3
vadd.i32 q6, q6, q7
Expand Down Expand Up @@ -932,8 +932,8 @@ L_chacha_crypt_bytes_arm32_round_start_128:
vadd.i32 q4, q4, q5
veor q3, q3, q0
veor q7, q7, q4
vrev32.i16 q3, q3
vrev32.i16 q7, q7
vrev32.16 q3, q3
vrev32.16 q7, q7
# c += d; b ^= c; b <<<= 12;
vadd.i32 q2, q2, q3
vadd.i32 q6, q6, q7
Expand Down Expand Up @@ -1178,13 +1178,13 @@ wc_chacha_setkey:
vldm r3, {q0}
vld1.8 {q1}, [r1]!
#ifdef BIG_ENDIAN_ORDER
vrev32.i16 q1, q1
vrev32.16 q1, q1
#endif /* BIG_ENDIAN_ORDER */
vstm r0!, {q0, q1}
beq L_chacha_setkey_arm32_done
vld1.8 {q1}, [r1]
#ifdef BIG_ENDIAN_ORDER
vrev32.i16 q1, q1
vrev32.16 q1, q1
#endif /* BIG_ENDIAN_ORDER */
L_chacha_setkey_arm32_done:
vstm r0, {q1}
Expand Down
28 changes: 14 additions & 14 deletions wolfcrypt/src/port/arm/armv8-32-chacha-asm_c.c
Original file line number Diff line number Diff line change
Expand Up @@ -134,15 +134,15 @@ WC_OMIT_FRAME_POINTER void wc_chacha_setkey(word32* x, const byte* key,
#endif /* BIG_ENDIAN_ORDER */
"stm %[x]!, {r4, r5, r12, lr}\n\t"
/* Next 16 bytes of key. */
"beq L_chacha_arm32_setkey_same_keyb_ytes_%=\n\t"
"beq L_chacha_arm32_setkey_same_key_bytes_%=\n\t"
/* Update key pointer for next 16 bytes. */
"add %[key], %[key], %[keySz]\n\t"
"ldr r4, [%[key]]\n\t"
"ldr r5, [%[key], #4]\n\t"
"ldr r12, [%[key], #8]\n\t"
"ldr lr, [%[key], #12]\n\t"
"\n"
"L_chacha_arm32_setkey_same_keyb_ytes_%=: \n\t"
"L_chacha_arm32_setkey_same_key_bytes_%=: \n\t"
"stm %[x], {r4, r5, r12, lr}\n\t"
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
: [x] "+r" (x), [key] "+r" (key), [keySz] "+r" (keySz),
Expand Down Expand Up @@ -693,11 +693,11 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c,
"ror r11, r11, #16\n\t"
"veor q7, q7, q4\n\t"
"add r8, r8, r10\n\t"
"vrev32.i16 q15, q15\n\t"
"vrev32.16 q15, q15\n\t"
"add r9, r9, r11\n\t"
"vrev32.i16 q3, q3\n\t"
"vrev32.16 q3, q3\n\t"
"eor r4, r4, r8\n\t"
"vrev32.i16 q7, q7\n\t"
"vrev32.16 q7, q7\n\t"
"eor r5, r5, r9\n\t"
/* c += d; b ^= c; b <<<= 12; */
"vadd.i32 q14, q14, q15\n\t"
Expand Down Expand Up @@ -806,11 +806,11 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c,
"ror r10, r10, #16\n\t"
"veor q7, q7, q4\n\t"
"add r8, r8, r11\n\t"
"vrev32.i16 q15, q15\n\t"
"vrev32.16 q15, q15\n\t"
"add r9, r9, r10\n\t"
"vrev32.i16 q3, q3\n\t"
"vrev32.16 q3, q3\n\t"
"eor r5, r5, r8\n\t"
"vrev32.i16 q7, q7\n\t"
"vrev32.16 q7, q7\n\t"
"eor r6, r6, r9\n\t"
/* c += d; b ^= c; b <<<= 12; */
"vadd.i32 q14, q14, q15\n\t"
Expand Down Expand Up @@ -1014,8 +1014,8 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c,
"vadd.i32 q4, q4, q5\n\t"
"veor q3, q3, q0\n\t"
"veor q7, q7, q4\n\t"
"vrev32.i16 q3, q3\n\t"
"vrev32.i16 q7, q7\n\t"
"vrev32.16 q3, q3\n\t"
"vrev32.16 q7, q7\n\t"
/* c += d; b ^= c; b <<<= 12; */
"vadd.i32 q2, q2, q3\n\t"
"vadd.i32 q6, q6, q7\n\t"
Expand Down Expand Up @@ -1055,8 +1055,8 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c,
"vadd.i32 q4, q4, q5\n\t"
"veor q3, q3, q0\n\t"
"veor q7, q7, q4\n\t"
"vrev32.i16 q3, q3\n\t"
"vrev32.i16 q7, q7\n\t"
"vrev32.16 q3, q3\n\t"
"vrev32.16 q7, q7\n\t"
/* c += d; b ^= c; b <<<= 12; */
"vadd.i32 q2, q2, q3\n\t"
"vadd.i32 q6, q6, q7\n\t"
Expand Down Expand Up @@ -1329,13 +1329,13 @@ WC_OMIT_FRAME_POINTER void wc_chacha_setkey(word32* x, const byte* key,
"vldm r3, {q0}\n\t"
"vld1.8 {q1}, [%[key]]!\n\t"
#ifdef BIG_ENDIAN_ORDER
"vrev32.i16 q1, q1\n\t"
"vrev32.16 q1, q1\n\t"
#endif /* BIG_ENDIAN_ORDER */
"vstm %[x]!, {q0-q1}\n\t"
"beq L_chacha_setkey_arm32_done_%=\n\t"
"vld1.8 {q1}, [%[key]]\n\t"
#ifdef BIG_ENDIAN_ORDER
"vrev32.i16 q1, q1\n\t"
"vrev32.16 q1, q1\n\t"
#endif /* BIG_ENDIAN_ORDER */
"\n"
"L_chacha_setkey_arm32_done_%=: \n\t"
Expand Down
Loading
Loading