Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 15 additions & 27 deletions wolfcrypt/src/port/arm/armv8-aes-asm.S
Original file line number Diff line number Diff line change
Expand Up @@ -44510,37 +44510,32 @@ L_AES_CTR_encrypt_NEON_loop_4:
ld1 {v4.2d}, [x9], #16
mov v8.d[1], x10
mov v8.d[0], x11
rev64 v8.16b, v8.16b
rev32 v8.16b, v8.16b
rev64 v8.4s, v8.4s
# Round: 0 - XOR in key schedule
eor v0.16b, v8.16b, v4.16b
adds x10, x10, #1
adc x11, x11, xzr
mov v8.d[1], x10
mov v8.d[0], x11
rev64 v8.16b, v8.16b
rev32 v8.16b, v8.16b
rev64 v8.4s, v8.4s
eor v1.16b, v8.16b, v4.16b
adds x10, x10, #1
adc x11, x11, xzr
mov v8.d[1], x10
mov v8.d[0], x11
rev64 v8.16b, v8.16b
rev32 v8.16b, v8.16b
rev64 v8.4s, v8.4s
eor v2.16b, v8.16b, v4.16b
adds x10, x10, #1
adc x11, x11, xzr
mov v8.d[1], x10
mov v8.d[0], x11
rev64 v8.16b, v8.16b
rev32 v8.16b, v8.16b
rev64 v8.4s, v8.4s
eor v3.16b, v8.16b, v4.16b
adds x10, x10, #1
adc x11, x11, xzr
mov v8.d[1], x10
mov v8.d[0], x11
rev64 v8.16b, v8.16b
rev32 v8.16b, v8.16b
rev64 v8.4s, v8.4s
sub w8, w4, #2
L_AES_CTR_encrypt_NEON_loop_nr_4:
tbl v4.16b, {v16.16b, v17.16b, v18.16b, v19.16b}, v0.16b
Expand Down Expand Up @@ -44939,8 +44934,7 @@ L_AES_CTR_encrypt_NEON_loop_nr_4:
bge L_AES_CTR_encrypt_NEON_loop_4
mov v2.d[1], x10
mov v2.d[0], x11
rev64 v2.16b, v2.16b
rev32 v2.16b, v2.16b
rev64 v2.4s, v2.4s
L_AES_CTR_encrypt_NEON_start_2:
movi v12.16b, #0x40
movi v13.16b, #0x80
Expand All @@ -44958,15 +44952,13 @@ L_AES_CTR_encrypt_NEON_loop_2:
adc x11, x11, xzr
mov v2.d[1], x10
mov v2.d[0], x11
rev64 v2.16b, v2.16b
rev32 v2.16b, v2.16b
rev64 v2.4s, v2.4s
eor v1.16b, v2.16b, v4.16b
adds x10, x10, #1
adc x11, x11, xzr
mov v2.d[1], x10
mov v2.d[0], x11
rev64 v2.16b, v2.16b
rev32 v2.16b, v2.16b
rev64 v2.4s, v2.4s
sub w8, w4, #2
L_AES_CTR_encrypt_NEON_loop_nr_2:
eor v8.16b, v0.16b, v12.16b
Expand Down Expand Up @@ -45283,8 +45275,7 @@ L_AES_CTR_encrypt_NEON_loop_nr_1:
adc x11, x11, xzr
mov v2.d[1], x10
mov v2.d[0], x11
rev64 v2.16b, v2.16b
rev32 v2.16b, v2.16b
rev64 v2.4s, v2.4s
L_AES_CTR_encrypt_NEON_data_done:
rev32 v2.16b, v2.16b
st1 {v2.2d}, [x5]
Expand Down Expand Up @@ -49782,8 +49773,7 @@ _AES_XTS_decrypt_NEON:
mov x17, #0x87
ands w19, w2, #15
cset w16, ne
lsl w16, w16, #4
sub w2, w2, w16
sub w2, w2, w16, lsl 4
ld1 {v2.2d}, [x3]
ld1 {v4.2d}, [x5]
rev32 v2.16b, v2.16b
Expand Down Expand Up @@ -51681,11 +51671,10 @@ L_AES_set_encrypt_key_loop_256:
stp w6, w7, [x2]
stnp w8, w9, [x2, #8]
sub x2, x2, #16
mov w3, w9
ubfx w6, w3, #8, #8
ubfx w7, w3, #16, #8
ubfx w8, w3, #24, #8
ubfx w3, w3, #0, #8
ubfx w6, w9, #8, #8
ubfx w7, w9, #16, #8
ubfx w8, w9, #24, #8
ubfx w3, w9, #0, #8
lsl w6, w6, #2
lsl w7, w7, #2
lsl w8, w8, #2
Expand Down Expand Up @@ -55626,8 +55615,7 @@ _AES_XTS_decrypt:
#endif /* __APPLE__ */
ands w11, w2, #15
cset w11, ne
lsl w11, w11, #4
sub w2, w2, w11
sub w2, w2, w11, lsl 4
mov x11, #0x87
mov x28, x5
ldp x23, x24, [x3]
Expand Down
42 changes: 15 additions & 27 deletions wolfcrypt/src/port/arm/armv8-aes-asm_c.c
Original file line number Diff line number Diff line change
Expand Up @@ -44842,37 +44842,32 @@ void AES_CTR_encrypt_NEON(const unsigned char* in, unsigned char* out,
"ld1 {v4.2d}, [x9], #16\n\t"
"mov v8.d[1], x10\n\t"
"mov v8.d[0], x11\n\t"
"rev64 v8.16b, v8.16b\n\t"
"rev32 v8.16b, v8.16b\n\t"
"rev64 v8.4s, v8.4s\n\t"
/* Round: 0 - XOR in key schedule */
"eor v0.16b, v8.16b, v4.16b\n\t"
"adds x10, x10, #1\n\t"
"adc x11, x11, xzr\n\t"
"mov v8.d[1], x10\n\t"
"mov v8.d[0], x11\n\t"
"rev64 v8.16b, v8.16b\n\t"
"rev32 v8.16b, v8.16b\n\t"
"rev64 v8.4s, v8.4s\n\t"
"eor v1.16b, v8.16b, v4.16b\n\t"
"adds x10, x10, #1\n\t"
"adc x11, x11, xzr\n\t"
"mov v8.d[1], x10\n\t"
"mov v8.d[0], x11\n\t"
"rev64 v8.16b, v8.16b\n\t"
"rev32 v8.16b, v8.16b\n\t"
"rev64 v8.4s, v8.4s\n\t"
"eor v2.16b, v8.16b, v4.16b\n\t"
"adds x10, x10, #1\n\t"
"adc x11, x11, xzr\n\t"
"mov v8.d[1], x10\n\t"
"mov v8.d[0], x11\n\t"
"rev64 v8.16b, v8.16b\n\t"
"rev32 v8.16b, v8.16b\n\t"
"rev64 v8.4s, v8.4s\n\t"
"eor v3.16b, v8.16b, v4.16b\n\t"
"adds x10, x10, #1\n\t"
"adc x11, x11, xzr\n\t"
"mov v8.d[1], x10\n\t"
"mov v8.d[0], x11\n\t"
"rev64 v8.16b, v8.16b\n\t"
"rev32 v8.16b, v8.16b\n\t"
"rev64 v8.4s, v8.4s\n\t"
"sub w8, %w[nr], #2\n\t"
"\n"
"L_AES_CTR_encrypt_NEON_loop_nr_4_%=:\n\t"
Expand Down Expand Up @@ -45272,8 +45267,7 @@ void AES_CTR_encrypt_NEON(const unsigned char* in, unsigned char* out,
"b.ge L_AES_CTR_encrypt_NEON_loop_4_%=\n\t"
"mov v2.d[1], x10\n\t"
"mov v2.d[0], x11\n\t"
"rev64 v2.16b, v2.16b\n\t"
"rev32 v2.16b, v2.16b\n\t"
"rev64 v2.4s, v2.4s\n\t"
"\n"
"L_AES_CTR_encrypt_NEON_start_2_%=:\n\t"
"movi v12.16b, #0x40\n\t"
Expand All @@ -45293,15 +45287,13 @@ void AES_CTR_encrypt_NEON(const unsigned char* in, unsigned char* out,
"adc x11, x11, xzr\n\t"
"mov v2.d[1], x10\n\t"
"mov v2.d[0], x11\n\t"
"rev64 v2.16b, v2.16b\n\t"
"rev32 v2.16b, v2.16b\n\t"
"rev64 v2.4s, v2.4s\n\t"
"eor v1.16b, v2.16b, v4.16b\n\t"
"adds x10, x10, #1\n\t"
"adc x11, x11, xzr\n\t"
"mov v2.d[1], x10\n\t"
"mov v2.d[0], x11\n\t"
"rev64 v2.16b, v2.16b\n\t"
"rev32 v2.16b, v2.16b\n\t"
"rev64 v2.4s, v2.4s\n\t"
"sub w8, %w[nr], #2\n\t"
"\n"
"L_AES_CTR_encrypt_NEON_loop_nr_2_%=:\n\t"
Expand Down Expand Up @@ -45621,8 +45613,7 @@ void AES_CTR_encrypt_NEON(const unsigned char* in, unsigned char* out,
"adc x11, x11, xzr\n\t"
"mov v2.d[1], x10\n\t"
"mov v2.d[0], x11\n\t"
"rev64 v2.16b, v2.16b\n\t"
"rev32 v2.16b, v2.16b\n\t"
"rev64 v2.4s, v2.4s\n\t"
"\n"
"L_AES_CTR_encrypt_NEON_data_done_%=:\n\t"
"rev32 v2.16b, v2.16b\n\t"
Expand Down Expand Up @@ -49988,8 +49979,7 @@ void AES_XTS_decrypt_NEON(const byte* in, byte* out, word32 sz, const byte* i,
"mov x17, #0x87\n\t"
"ands w19, %w[sz], #15\n\t"
"cset w16, ne\n\t"
"lsl w16, w16, #4\n\t"
"sub %w[sz], %w[sz], w16\n\t"
"sub %w[sz], %w[sz], w16, lsl 4\n\t"
"ld1 {v2.2d}, [%x[i]]\n\t"
"ld1 {v4.2d}, [%x[key2]]\n\t"
"rev32 v2.16b, v2.16b\n\t"
Expand Down Expand Up @@ -51831,11 +51821,10 @@ void AES_set_encrypt_key(const unsigned char* key, word32 len,
"stp w6, w7, [%x[ks]]\n\t"
"stnp w8, w9, [%x[ks], #8]\n\t"
"sub %x[ks], %x[ks], #16\n\t"
"mov w3, w9\n\t"
"ubfx w6, w3, #8, #8\n\t"
"ubfx w7, w3, #16, #8\n\t"
"ubfx w8, w3, #24, #8\n\t"
"ubfx w3, w3, #0, #8\n\t"
"ubfx w6, w9, #8, #8\n\t"
"ubfx w7, w9, #16, #8\n\t"
"ubfx w8, w9, #24, #8\n\t"
"ubfx w3, w9, #0, #8\n\t"
"lsl w6, w6, #2\n\t"
"lsl w7, w7, #2\n\t"
"lsl w8, w8, #2\n\t"
Expand Down Expand Up @@ -55618,8 +55607,7 @@ void AES_XTS_decrypt(const byte* in, byte* out, word32 sz, const byte* i,
"add x29, sp, #0\n\t"
"ands w11, %w[sz], #15\n\t"
"cset w11, ne\n\t"
"lsl w11, w11, #4\n\t"
"sub %w[sz], %w[sz], w11\n\t"
"sub %w[sz], %w[sz], w11, lsl 4\n\t"
"mov x11, #0x87\n\t"
"mov x28, %x[key2]\n\t"
"ldp x23, x24, [%x[i]]\n\t"
Expand Down
3 changes: 1 addition & 2 deletions wolfcrypt/src/port/arm/armv8-chacha-asm.S
Original file line number Diff line number Diff line change
Expand Up @@ -1101,8 +1101,7 @@ L_chacha_use_over_arm64_byte_loop:
eor w5, w5, w4
subs x3, x3, #1
strb w5, [x1], #1
beq L_chacha_use_over_arm64_done
b L_chacha_use_over_arm64_byte_loop
bne L_chacha_use_over_arm64_byte_loop
L_chacha_use_over_arm64_done:
ret
#ifndef __APPLE__
Expand Down
3 changes: 1 addition & 2 deletions wolfcrypt/src/port/arm/armv8-chacha-asm_c.c
Original file line number Diff line number Diff line change
Expand Up @@ -1024,8 +1024,7 @@ void wc_chacha_use_over(byte* over, byte* output, const byte* input, word32 len)
"eor w5, w5, w4\n\t"
"subs %w[len], %w[len], #1\n\t"
"strb w5, [%x[output]], #1\n\t"
"b.eq L_chacha_use_over_arm64_done_%=\n\t"
"b L_chacha_use_over_arm64_byte_loop_%=\n\t"
"b.ne L_chacha_use_over_arm64_byte_loop_%=\n\t"
"\n"
"L_chacha_use_over_arm64_done_%=:\n\t"
: [over] "+r" (over), [output] "+r" (output), [len] "+r" (len)
Expand Down
Loading